backend running for mvp1

2025-04-15 01:04:38 +02:00 · 2025-04-15 01:04:38 +02:00 · c75a3b67ce
commit c75a3b67ce
parent b0c45fb798
10 changed files with 2529 additions and 615 deletions
--- a/gwserver/modules/BAKcoder.py
+++ b/gwserver/modules/BAKcoder.py
--- a/gwserver/modules/BAKwebcrawler.py
+++ b/gwserver/modules/BAKwebcrawler.py
@ -0,0 +1,613 @@
 """
 WebCrawler-Agent für die Recherche und Beschaffung von Informationen aus dem Web.
 Angepasst für das refaktorisierte Core-Modul.
 """
 import json
 import logging
 import random
 import time
 import traceback
 from typing import List, Dict, Any, Optional, Union
 import re
 import uuid
 from datetime import datetime
 from urllib.parse import quote_plus, unquote
 from bs4 import BeautifulSoup
 import requests
 from modules.agentservice_base import BaseAgent
 from connectors.connector_aichat_openai import ChatService
 from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
 from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol
 logger = logging.getLogger(__name__)
 class WebcrawlerAgent(BaseAgent):
    """Agent für Web-Recherche und Informationsbeschaffung"""
    def __init__(self):
        """Initialisiert den WebCrawler-Agenten"""
        super().__init__()
        self.id = "webcrawler"
        self.name = "Webscraper"
        self.type = "scraper"
        self.description = "Recherchiert Informationen im Web"
        self.capabilities = "web_search,information_retrieval,data_collection,source_verification,content_integration"
        self.result_format = "SearchResults"
        # Add enhanced document capabilities
        self.supports_documents = True
        self.document_capabilities = ["read", "create"]
        self.required_context = ["workflow_id"]
        self.document_handler = None
        # Initialize protocol
        self.protocol = AgentCommunicationProtocol()
        # Chat-Service initialisieren
        self.chat_service = ChatService()
        # Utility-Klassen initialisieren
        self.message_utils = MessageUtils()
        # Web-Crawling-Konfiguration
        self.max_url = 3
        self.max_key = 3
        self.max_result = 3
        self.timeout = 10
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Referer': 'https://www.google.com/',
            'DNT': '1',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
        }
        self.max_urls = 10
        self.max_content_length = 100000
    def get_agent_info(self) -> Dict[str, Any]:
        """Get agent information for agent registry"""
        info = super().get_agent_info()
        info.update({
            "metadata": {
                "max_url": self.max_url,
                "max_result": self.max_result,
                "timeout": self.timeout
            }
        })
        return info
    def set_document_handler(self, document_handler):
        """Set the document handler for file operations"""
        self.document_handler = document_handler
    async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Verarbeitet eine Nachricht und führt eine Web-Recherche durch.
        Args:
            message: Die zu verarbeitende Nachricht
            context: Zusätzlicher Kontext
        Returns:
            Die generierte Antwort mit der Web-Recherche
        """
        # Extract workflow_id from context or message
        workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
        # Get or create logging_utils
        log_func = context.get("log_func") if context else None
        logging_utils = LoggingUtils(workflow_id, log_func)
        # Send status update using protocol
        if log_func:
            status_message = self.protocol.create_status_update_message(
                status_description="Starte Web-Recherche",
                sender_id=self.id,
                status="in_progress",
                progress=0.0,
                context_id=workflow_id
            )
            log_func(workflow_id, status_message.content, "info", self.id, self.name)
        # Create response structure
        response = {
            "role": "assistant",
            "content": "",
            "agent_id": self.id,
            "agent_type": self.type,
            "agent_name": self.name,
            "result_format": self.result_format,
            "workflow_id": workflow_id
        }
        try:
            # Get the query from the message
            prompt = await self.get_prompt(message)
            logging_utils.info(f"Web-Recherche für: {prompt[:50]}...", "agents")
            # Update progress using protocol
            if log_func:
                status_message = self.protocol.create_status_update_message(
                    status_description=f"Recherchiere: {prompt[:30]}...",
                    sender_id=self.id,
                    status="in_progress",
                    progress=0.3,
                    context_id=workflow_id
                )
                log_func(workflow_id, status_message.content, "info", self.id, self.name)
            # Führe die Web-Recherche durch
            web_query_result = await self.get_web_query(message)
            # Final status update
            if log_func:
                status_message = self.protocol.create_status_update_message(
                    status_description="Web-Recherche abgeschlossen",
                    sender_id=self.id,
                    status="completed",
                    progress=1.0,
                    context_id=workflow_id
                )
                log_func(workflow_id, status_message.content, "info", self.id, self.name)
            # Set the content in the response
            response["content"] = web_query_result
            return response
        except Exception as e:
            error_msg = f"Fehler bei der Web-Recherche: {str(e)}"
            logging_utils.error(error_msg, "error")
            # Create error response using protocol
            error_message = self.protocol.create_error_message(
                error_description=error_msg,
                sender_id=self.id,
                error_type="web_search",
                error_details={"traceback": traceback.format_exc()},
                context_id=workflow_id
            )
            response["content"] = f"## Fehler bei der Web-Recherche\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
            return response
    def send_document_request(self, document_description: str, sender_id: str, receiver_id: str, filters: Dict[str, Any] = None, context_id: str = None) -> AgentMessage:
        """Send a document request using the protocol"""
        return self.protocol.create_document_request_message(
            document_description=document_description,
            sender_id=sender_id,
            receiver_id=receiver_id,
            filters=filters,
            context_id=context_id
        )
    def send_result_message(self, result_content: str, sender_id: str, receiver_id: str, task_id: str, 
                        output_data: Dict[str, Any] = None, context_id: str = None) -> AgentMessage:
        """Send a result message using the protocol"""
        return self.protocol.create_result_message(
            result_content=result_content,
            sender_id=sender_id,
            receiver_id=receiver_id,
            task_id=task_id,
            output_data=output_data,
            result_format="SearchResults",
            context_id=context_id
        )
    async def get_prompt(self, message_context: Dict[str, Any]) -> str:
        task = message_context.get("content", "")
        return task.strip()    
    async def get_web_query(self, message_context: Dict[str, Any]) -> str:
        prompt = await self.get_prompt(message_context)
        result_json = await self.run_web_query(prompt)
        result_data = ""
        summary_src = ""
        logger.info(f"Web analysis prompt '{prompt}' delivers {len(result_json)} results.")
        if isinstance(result_json, list):
            total_tokens = 0
            for i, result in enumerate(result_json, 1):
                # Limit content size for each result
                result_data_limited = self.limit_text_for_api(result['data'], max_tokens=15000)  # Allow ~15000 tokens per result
                web_answer_instructions = f"""
                Fass das Resultat gemäss dem Auftrag zusammen in maximal rund 2000 Zeichen. Auftrag = '{prompt.replace("'","")}'
                Fasse die wichtigsten Erkenntnisse zusammen und setze sie in Bezug zur ursprünglichen Anfrage. Die Einleitung kannst Du weglassen.
                Achte darauf, nur relevante und qualitativ hochwertige Informationen zu extrahieren, welche einen Bezug zum Auftrag haben, und übersichtlich zu präsentieren. Vermittle ein ausgewogenes Bild der recherchierten Informationen.
                Dies ist das Resultat:
                {result_data_limited}
                """
                # Count tokens in the instructions to ensure we don't exceed API limits
                instruction_tokens = self.count_tokens(web_answer_instructions)
                if total_tokens + instruction_tokens > 60000:
                    logger.warning(f"Skipping result {i} to avoid exceeding token limit")
                    break
                total_tokens += instruction_tokens
                # Zusätzliche Anweisungen für Web-Recherche
                content_text = await self.chat_service.call_api(
                    messages=[
                        {
                            "role": "system",
                            "content": "Du bist ein Informationsanalyst, der Webinhalte präzise und relevant zusammenfasst."
                        },
                        {
                            "role": "user",
                            "content": web_answer_instructions
                        }
                    ]
                )
                # Create a summary but ensure we stay within token limits
                content_summary = content_text[:2000]  # Limit to ~2000 characters
                result_data += f"\n\n[{i}] {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}\nContent: {content_summary}"
                summary_src += f"\n{content_summary}"
                # Update token count
                total_tokens += self.count_tokens(content_summary) + 100  # Add buffer for formatting
        else:
            result_data = "no data received"
        logger.info(f"Web analysis result sent {len(result_data)}B")
        # Zusätzliche Zusammenfassung
        summary = ""
        if len(summary_src) > 1:
            # Limit summary source to ensure we don't exceed API limits
            summary_src_limited = self.limit_text_for_api(summary_src, max_tokens=10000)
            summary = await self.chat_service.call_api(
                messages=[
                    {
                        "role": "system",
                        "content": "Du erstellst prägnante Zusammenfassungen von Rechercheergbnissen."
                    },                    
                    {
                        "role": "user",
                        "content": f"Bitte fasse diese Erkenntnisse in maximal 5-6 Sätzen zusammen: {summary_src_limited}\n"
                    }
                ]
            )
        # Format the final result
        result = f"## Web-Recherche Ergebnisse\n\n### Zusammenfassung\n{summary}\n\n### Detaillierte Ergebnisse{result_data}"
        return result
    async def run_web_query(self, prompt: str) -> List[Dict]:
        if prompt=="":
            return []
        ptext=f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open.
        'url': A list of maximum {self.max_url} specific URLs extracted from the task string.
        'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
        Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition.
        """
        content_text = await self.chat_service.call_api(
            messages=[
                {
                    "role": "system",
                    "content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."
                },
                {
                    "role": "user",
                    "content": ptext
                }
            ]
        )
        # Remove markdown formatting if present
        if content_text.startswith("```json"):
            # Find the end of the JSON block
            end_marker = "```"
            end_index = content_text.rfind(end_marker)
            if end_index != -1:
                # Extract the JSON content without the markdown markers
                content_text = content_text[7:end_index].strip()
        # Now parse the JSON
        try:
            logger.info(f"Valid json received: {str(content_text)}")
            pjson = json.loads(content_text)
            # Now call scrape_json with the parsed dictionary
            result_json = await self.scrape_json(pjson)
            return result_json
        except json.JSONDecodeError as e:
            logger.error(f"Failed to parse JSON: {e}")
            logger.error(f"Cleaned content: {content_text[:100]}...")
            return []
    async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]:
        """
        Scrapes web content based on a research strategy JSON.
        Args:
            research_strategy: A dictionary containing:
                - 'skey': List of search keywords
                - 'url': List of direct URLs to scrape
        Returns:
            Dictionary with URLs as keys and scraped content as values
        """
        logger.info("Starting JSON-based web scraping")
        results = []
        # Validate input structure
        if not isinstance(research_strategy, dict):
            logger.error("Invalid research_strategy format: not a dictionary")
            return {"error": "Invalid research_strategy format: not a dictionary"}
        keys = research_strategy.get("skey", [])
        direct_urls = research_strategy.get("url", [])
        if not isinstance(keys, list) or not isinstance(direct_urls, list):
            logger.error("Invalid research_strategy format: keys, or url is not a list")
            return {"error": "Invalid research_strategy format: keys, or url is not a list"}
        # Process search keywords through search engine
        for keyword in keys:
            logger.info(f"Processing keyword: {keyword}")
            found_results = self.search_web(keyword) #  List with Dict: title,url,snippet,data
            logger.info(f"... {len(found_results)} results found")
            results.extend(found_results)
        # Process direct URLs
        logger.info(f"Processing {len(direct_urls)} direct URLs")
        for url in direct_urls:
            if url in results:
                logger.info(f"Skipping already scraped URL: {url}")
                continue
            soup=self.read_url(url)
            # Extract title from the page if it exists
            if isinstance(soup, BeautifulSoup):
                title_tag = soup.find('title')
                title = title_tag.text.strip() if title_tag else "No title"
                # Alternative: You could also look for h1 tags if the title tag is missing
                if title == "No title":
                    h1_tag = soup.find('h1')
                    if h1_tag:
                        title = h1_tag.text.strip()
            else:
                # Handle the case where soup is an error message string
                title = "Error fetching page"
            results.append(self.parse_result(soup,"No title",url))
        logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total")
        return results
    def extract_main_content(self, soup: BeautifulSoup, max_chars: int = 30000) -> str:
        """
        Extract the main content from an HTML page while limiting character count.
        Args:
            soup: BeautifulSoup object containing the page content
            max_chars: Maximum number of characters to extract
        Returns:
            Extracted main content as string
        """
        if not isinstance(soup, BeautifulSoup):
            return str(soup)[:max_chars]
        # Try to find main content elements in order of priority
        main_content = None
        for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
            content = soup.select_one(selector)
            if content:
                main_content = content
                break
        # If no main content found, use the body
        if not main_content:
            main_content = soup.find('body') or soup
        # Remove script, style, nav, footer elements that don't contribute to main content
        for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
            element.extract()
        # Extract text content
        text_content = main_content.get_text(separator=' ', strip=True)
        # Limit to max_chars
        return text_content[:max_chars]
    def tokenize_for_counting(self, text: str) -> List[str]:
        """
        Simple token counter for estimating token usage.
        This is an approximation since the exact tokenization depends on the model.
        Args:
            text: Input text
        Returns:
            List of tokens
        """
        # Simple tokenization by splitting on whitespace and punctuation
        import re
        return re.findall(r'\w+|[^\w\s]', text)
    def count_tokens(self, text: str) -> int:
        """
        Count the approximate number of tokens in a text.
        Args:
            text: Input text
        Returns:
            Estimated token count
        """
        tokens = self.tokenize_for_counting(text)
        return len(tokens)
    def limit_text_for_api(self, text: str, max_tokens: int = 60000) -> str:
        """
        Limit the text to a maximum number of tokens.
        Args:
            text: Input text
            max_tokens: Maximum number of tokens allowed
        Returns:
            Limited text
        """
        if not text:
            return ""
        tokens = self.tokenize_for_counting(text)
        # If text is already under the limit, return as is
        if len(tokens) <= max_tokens:
            return text
        # Otherwise, truncate text to max_tokens
        return " ".join(tokens[:max_tokens]) + "... [content truncated due to length]"
    def search_web(self, query: str) -> List[Dict]:
        formatted_query = quote_plus(query)
        url = f"https://html.duckduckgo.com/html/?q={formatted_query}"
        search_results_soup = self.read_url(url)
        if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0:
            logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
            return []
        # Extract search results
        results = []
        # Find all result containers
        result_elements = search_results_soup.select('.result')
        for result in result_elements:
            # Extract title
            title_element = result.select_one('.result__a')
            title = title_element.text.strip() if title_element else 'No title'
            # Extract URL (DuckDuckGo uses redirects, need to extract from href param)
            url_element = title_element.get('href') if title_element else ''
            extracted_url = 'No URL'
            if url_element:
                # Extract the actual URL from DuckDuckGo's redirect
                if url_element.startswith('/d.js?q='):
                    start = url_element.find('?q=') + 3  # Skip '?q='
                    end = url_element.find('&', start) if '&' in url_element[start:] else None
                    extracted_url = unquote(url_element[start:end])
                    # Make sure the URL has the correct protocol prefix
                    if not extracted_url.startswith(('http://', 'https://')):
                        if not extracted_url.startswith('//'):
                            extracted_url = 'https://' + extracted_url
                        else:
                            extracted_url = 'https:' + extracted_url
                else:
                    extracted_url = url_element
            # Extract snippet directly from search results page
            snippet_element = result.select_one('.result__snippet')
            snippet = snippet_element.text.strip() if snippet_element else 'No description'
            # Now fetch the actual page content for the data field
            target_page_soup = self.read_url(extracted_url)
            # Use the new content extraction method to limit content size
            content = self.extract_main_content(target_page_soup, max_chars=30000)
            results.append({
                'title': title,
                'url': extracted_url,
                'snippet': snippet,
                'data': content
            })
            # Limit the number of results if needed
            if len(results) >= self.max_result:
                break
        return results
    def read_url(self, url: str) -> BeautifulSoup:
        """
        Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
        Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben.
        Args:
            url: Die zu lesende URL
        Returns:
            BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
        """
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml',
            'Accept-Language': 'en-US,en;q=0.9',
        }
        try:
            import time
            # Initialer Request
            response = requests.get(url, headers=headers, timeout=10)
            # Polling für Status 202
            if response.status_code == 202:
                # Maximal 3 Versuche mit steigenden Intervallen
                backoff_times = [0.5, 1.0, 2.0, 5.0]  # 0.5s, dann 1s, dann 2s
                for wait_time in backoff_times:
                    time.sleep(wait_time)  # Warten mit steigender Zeit
                    response = requests.get(url, headers=headers, timeout=10)
                    # Wenn kein 202 mehr, dann abbrechen
                    if response.status_code != 202:
                        break
            # Für andere Fehler-Status einen Fehler auslösen
            response.raise_for_status()
            # HTML parsen
            return BeautifulSoup(response.text, 'html.parser')
        except Exception as e:
            # Leeres BeautifulSoup-Objekt erstellen
            return BeautifulSoup("<html><body></body></html>", 'html.parser')
    def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
        # Extract snippet/description
        snippet_element = data.select_one('.result__snippet')
        snippet = snippet_element.text.strip() if snippet_element else 'No description'
        result={
            'title': title,
            'url': url,
            'snippet': snippet,
            'data': data.prettify()
        }
        return result
 # Singleton-Instanz
 _webcrawler_agent = None
 def get_webcrawler_agent():
    """Gibt eine Singleton-Instanz des WebCrawler-Agenten zurück"""
    global _webcrawler_agent
    if _webcrawler_agent is None:
        _webcrawler_agent = WebcrawlerAgent()
    return _webcrawler_agent
--- a/gwserver/modules/agentservice_agent_analyst.py
+++ b/gwserver/modules/agentservice_agent_analyst.py
@ -39,10 +39,14 @@ class AnalystAgent(BaseAgent):
        self.capabilities = "data_analysis,pattern_recognition,statistics,visualization,data_interpretation"
        self.result_format = "AnalysisReport"
        # Initialize AI service
        self.ai_service = None
        # Document capabilities
        self.supports_documents = True
        self.document_capabilities = ["read", "analyze", "extract"]
        self.required_context = ["data_source", "analysis_objectives"]
        self.document_handler = None
        # Initialize protocol
        self.protocol = AgentCommunicationProtocol()
@ -69,6 +73,10 @@ class AnalystAgent(BaseAgent):
        })
        return info
    def set_document_handler(self, document_handler):
        """Set the document handler for file operations"""
        self.document_handler = document_handler
    async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Process a message and perform data analysis.
@ -277,7 +285,7 @@ class AnalystAgent(BaseAgent):
        for document in message.get("documents", []):
            source = document.get("source", {})
            filename = source.get("name", "")
-            file_id = source.get("id", "")
+            file_id = source.get("id", 0)
            content_type = source.get("content_type", "")
            # Skip if not a recognizable data file
@ -288,7 +296,7 @@ class AnalystAgent(BaseAgent):
                # Try to get file content through document handler first
                file_content = None
                if self.document_handler:
-                    file_content = await self.document_handler.get_file_content(file_id)
+                    file_content = self.document_handler.get_file_content_from_message(message, file_id=file_id)
                # Process based on file type
                if filename.lower().endswith('.csv'):
@ -323,32 +331,41 @@ class AnalystAgent(BaseAgent):
        return False
-    def _process_csv(self, file_content: bytes, filename: str) -> Optional[pd.DataFrame]:
+    def _process_csv(self, file_content: Union[bytes, str], filename: str) -> Optional[pd.DataFrame]:
        """Process CSV file content into a pandas DataFrame"""
        if file_content is None:
            return None
        try:
-            # Try various encodings
+            # Handle the case where file_content is already a string
-            for encoding in ['utf-8', 'latin1', 'cp1252']:
+            if isinstance(file_content, str):
-                try:
+                text_content = file_content
-                    # Use StringIO to create a file-like object
+                df = pd.read_csv(io.StringIO(text_content))
-                    text_content = file_content.decode(encoding)
+                df = self._preprocess_dataframe(df)
-                    df = pd.read_csv(io.StringIO(text_content))
+                return df
-                    # Basic preprocessing
+            # Handle the case where file_content is bytes
-                    df = self._preprocess_dataframe(df)
+            else:
-                    return df
+                # Try various encodings
-                except UnicodeDecodeError:
+                for encoding in ['utf-8', 'latin1', 'cp1252']:
-                    continue
+                    try:
-                except Exception as e:
+                        # Use StringIO to create a file-like object
-                    logger.error(f"Error processing CSV with {encoding} encoding: {str(e)}")
+                        text_content = file_content.decode(encoding)
                        df = pd.read_csv(io.StringIO(text_content))
-            # If all encodings fail, try one more time with errors='replace'
+                        # Basic preprocessing
-            text_content = file_content.decode('utf-8', errors='replace')
+                        df = self._preprocess_dataframe(df)
-            df = pd.read_csv(io.StringIO(text_content))
+                        return df
-            df = self._preprocess_dataframe(df)
+                    except UnicodeDecodeError:
-            return df
+                        continue
                    except Exception as e:
                        logger.error(f"Error processing CSV with {encoding} encoding: {str(e)}")
                # If all encodings fail, try one more time with errors='replace'
                text_content = file_content.decode('utf-8', errors='replace')
                df = pd.read_csv(io.StringIO(text_content))
                df = self._preprocess_dataframe(df)
                return df
        except Exception as e:
            logger.error(f"Failed to process CSV file {filename}: {str(e)}")
@ -1482,6 +1499,7 @@ class AnalystAgent(BaseAgent):
            Generated analysis
        """
        if not self.ai_service:
            logging.warning("AI service not available for analysis generation")
            return f"## Data Analysis ({analysis_type})\n\nUnable to generate analysis: AI service not available."
        # Create specialized prompt based on analysis type
--- a/gwserver/modules/agentservice_agent_coder.py
+++ b/gwserver/modules/agentservice_agent_coder.py
@ -1,6 +1,7 @@
 """
 CoderAgent - A unified agent for developing and executing Python code.
 Includes code execution capabilities previously in separate modules.
 Enhanced with auto-correction loop for handling execution errors.
 """
 import logging
@ -13,6 +14,7 @@ import subprocess
 import tempfile
 import shutil
 import sys
 import pandas as pd
 from datetime import datetime
 from typing import List, Dict, Any, Optional, Tuple
@ -23,7 +25,9 @@ from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtoc
 logger = logging.getLogger(__name__)
 # Existing SimpleCodeExecutor class remains unchanged
 class SimpleCodeExecutor:
    # ... existing code ...
    """
    A simplified executor that runs Python code in isolated virtual environments.
    """
@ -36,7 +40,8 @@ class SimpleCodeExecutor:
                 timeout: int = 30,
                 max_memory_mb: int = 512,
                 requirements: List[str] = None,
-                 blocked_packages: List[str] = None):
+                 blocked_packages: List[str] = None,
                 ai_service = None):
        """
        Initialize the SimpleCodeExecutor.
@ -57,6 +62,7 @@ class SimpleCodeExecutor:
            "tensorflow", "pytorch", "scikit-learn"  # Resource intensive
        ]
        self.is_persistent = workflow_id is not None
        self.ai_service = ai_service
    @classmethod
    def get_workflow_environment(cls, workflow_id: str) -> Optional[str]:
@ -176,50 +182,16 @@ class SimpleCodeExecutor:
            return False
    def _extract_required_packages(self, code: str) -> List[str]:
-        """Extract required packages from import statements and requirements comments in the code."""
+        #  Extract required packages from requirements comments in the 1st code line
        import re
        packages = set()
-        
+        # Check for special REQUIREMENTS comment - specific format we're looking for
-        # Check for special REQUIREMENTS comment
+        first_lines = code.split('\n')[:5]  # Only check first few lines
-        requirements_match = re.search(r'# REQUIREMENTS:\s*([^\n]+)', code)
+        for line in first_lines:
-        if requirements_match:
+            if line.strip().startswith("# REQUIREMENTS:"):
-            req_str = requirements_match.group(1).strip()
+                req_str = line.replace("# REQUIREMENTS:", "").strip()
-            for pkg in req_str.split(','):
+                for pkg in req_str.split(','):
-                if pkg.strip():
+                    if pkg.strip():
-                    packages.add(pkg.strip())
+                        packages.add(pkg.strip())
        # Add common base packages
        base_packages = [
            "requests", "urllib3", "pydantic", 
            "pandas", "numpy", "matplotlib"
        ]
        for pkg in base_packages:
            packages.add(pkg)
        # Detect pip install comments
        pip_comments = re.findall(r'#\s*pip\s+install\s+([^#\n]+)', code)
        for comment in pip_comments:
            for pkg in comment.split():
                if pkg and not pkg.startswith('-'):
                    packages.add(pkg.strip())
        # Analyze import statements
        import_lines = re.findall(r'^(?:import|from)\s+([^\s.]+)(?:\s+import|\s*$|\.)', code, re.MULTILINE)
        # Standard modules that don't need installation
        std_modules = {
            'os', 'sys', 'time', 'datetime', 'math', 're', 'random', 'json',
            'collections', 'itertools', 'functools', 'pathlib', 'shutil',
            'tempfile', 'uuid', 'subprocess', 'threading', 'logging',
            'traceback', 'io', 'copy', 'typing', 'asyncio'
        }
        # Process all imports
        for module in import_lines:
            if module not in std_modules:
                packages.add(module)
        return list(packages)
@ -247,8 +219,8 @@ class SimpleCodeExecutor:
        all_requirements = []
        # Add explicitly provided requirements
-        if self.requirements:
+        # if self.requirements:
-            all_requirements.extend(self.requirements)
+        #    all_requirements.extend(self.requirements)
        # Extract requirements from code
        extracted_requirements = self._extract_required_packages(code)
@ -340,12 +312,13 @@ class SimpleCodeExecutor:
            # Run the code from root dir
            working_dir = os.path.dirname(code_file)   # This should be the project root
            logger.info(f"DEBUG PATH Root: {os.getcwd()} Code: {code_file} Working Dir: {working_dir}")
            logger.debug(f"|{code}|")
            process = subprocess.run(
                [python_executable, code_file],
                timeout=self.timeout,
                capture_output=True,
                text=True,
-                cwd=self.temp_dir
+                cwd=working_dir
            )
            # Process the output
@ -393,7 +366,7 @@ class SimpleCodeExecutor:
            execution_result = {
                "success": False,
                "output": "",
-                "error": f"Execution error: {str(e)}",
+                "error": f"Execution error: {str(e)} for code {code}",
                "result": None,
                "exit_code": -1
            }
@ -426,7 +399,7 @@ class SimpleCodeExecutor:
        """Clean up during garbage collection."""
        self.cleanup()
-
+# Unchanged error recommendation function
 def get_error_recommendation(error_message: str) -> str:
    """Generate recommendations based on error message."""
    if "ImportError" in error_message or "ModuleNotFoundError" in error_message:
@ -460,7 +433,7 @@ To fix the error:
 class CoderAgent(BaseAgent):
-    """Agent for developing and executing Python code"""
+    """Agent for developing and executing Python code with auto-correction capabilities"""
    def __init__(self):
        """Initialize the coder agent with proper type and capabilities"""
@ -474,10 +447,14 @@ class CoderAgent(BaseAgent):
        self.capabilities = "code_development,data_processing,file_processing,automation"
        self.result_format = "python_code"
        # Initialize AI service
        self.ai_service = None
        # Add document capabilities
        self.supports_documents = True
        self.document_capabilities = ["read", "reference", "create"]
        self.required_context = ["workflow_id"]
        self.document_handler = None
        # Initialize protocol
        self.protocol = AgentCommunicationProtocol()
@ -491,23 +468,33 @@ class CoderAgent(BaseAgent):
        self.executor_memory_limit = 512  # MB
        # AI service settings
-        self.ai_temperature = 0.2  # Lower temperature for more deterministic code generation
+        self.ai_temperature = 0.1  # Lower temperature for more deterministic code generation
        self.ai_max_tokens = 2000  # Enough tokens for complex code
        # Auto-correction settings (new)
        self.max_correction_attempts = 3  # Maximum number of correction attempts
        self.correction_temperature = 0.1  # Even lower temperature for corrections
    def get_agent_info(self) -> Dict[str, Any]:
        """Get agent information for agent registry"""
        info = super().get_agent_info()
        info.update({
            "metadata": {
                "timeout": self.executor_timeout,
-                "memory_limit": self.executor_memory_limit
+                "memory_limit": self.executor_memory_limit,
                "max_correction_attempts": self.max_correction_attempts
            }
        })
        return info
    def set_document_handler(self, document_handler):
        """Set the document handler for file operations"""
        self.document_handler = document_handler
    async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
        """
-        Process a message to develop and execute Python code.
+        Process a message to develop and execute Python code with auto-correction.
        Args:
            message: The message to process
@ -539,34 +526,23 @@ class CoderAgent(BaseAgent):
            content = message.get("content", "")
            documents = message.get("documents", [])
            # Extract code from message content
            code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', content)
            code_to_execute = None
            requirements = []
-            if code_blocks:
+            # Generate code based on the message content using AI
-                # Use the first code block found
+            logging_utils.info("Generating new code with AI", "agents")
                code_to_execute = code_blocks[0]
                # Clean the code to remove any markdown formatting
                code_to_execute = self._clean_code(code_to_execute)
                logging_utils.info(f"Code extracted from message ({len(code_to_execute)} characters)", "agents")
            else:
                # Generate code based on the message content using AI
                logging_utils.info("No code found in message, generating new code with AI", "agents")
-                # Generate code using AI
+            # Generate code using AI
-                code_to_execute, requirements = await self._generate_code_from_prompt(content, documents)
+            code_to_execute, requirements = await self._generate_code_from_prompt(content, documents)
-                if not code_to_execute:
+            if not code_to_execute:
-                    logging_utils.warning("AI could not generate code", "agents")
+                logging_utils.warning("AI could not generate code", "agents")
-                    response["content"] = "I couldn't generate executable code based on your request. Please provide more detailed instructions."
+                response["content"] = "I couldn't generate executable code based on your request. Please provide more detailed instructions."
-                    self.message_utils.finalize_message(response)
+                self.message_utils.finalize_message(response)
-                    return response
+                return response
-                logging_utils.info(f"Code generated with AI ({len(code_to_execute)} characters)", "agents")            
+            logging_utils.info(f"Code generated with AI ({len(code_to_execute)} characters)", "agents")            
-            # Execute the code
+            # Execute the code with auto-correction loop
            if code_to_execute:
                logging_utils.info("Executing code", "execution")
                # Prepare execution context
                execution_context = {
                    "workflow_id": workflow_id,
@ -575,21 +551,16 @@ class CoderAgent(BaseAgent):
                    "log_func": log_func
                }
-                # Send a status update
+                # Enhanced execution with auto-correction
-                if log_func:
+                result, attempts_info = await self._execute_with_auto_correction(
-                    status_message = self.protocol.create_status_update_message(
+                    code_to_execute, 
-                        status_description="Processing code execution request",
+                    requirements, 
-                        sender_id=self.id,
+                    execution_context,
-                        status="in_progress",
+                    content,  # Original prompt/message
-                        progress=0.5,
+                    logging_utils
-                        context_id=workflow_id
+                )
                    )
                    log_func(workflow_id, status_message.content, "info", self.id, self.name)
-                # Execute code
+                # Prepare response based on the final result (success or failure)
                result = await self._execute_code(code_to_execute, requirements, execution_context)
                # Prepare response
                if result.get("success", False):
                    # Code execution successful
                    output = result.get("output", "")
@ -597,20 +568,39 @@ class CoderAgent(BaseAgent):
                    logging_utils.info("Code executed successfully", "execution")
                    # Format response content
-                    response_content = f"## Code executed successfully\n\n"
+                    response_content = f"## Code executed successfully"
                    # Add correction attempts info if any corrections were made
                    if attempts_info and len(attempts_info) > 1:
                        response_content += f" (after {len(attempts_info)-1} correction attempts)"
                    response_content += "\n\n"
                    # Include the executed code
-                    response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"
+                    response_content += f"### Final Executed Code\n\n```python\n{attempts_info[-1]['code']}\n```\n\n"
                    # Include the output if available
                    if output:
                        response_content += f"### Output\n\n```\n{output}\n```\n\n"
                        # Create document with results
                        data_document = self._create_document_from_result(execution_result)
                        if data_document:
                            response["documents"].append(data_document)
                    # Include the execution result if available
                    if execution_result:
                        result_str = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result)
                        response_content += f"### Result\n\n```\n{result_str}\n```\n\n"
                    # Include correction history if any corrections were made
                    if attempts_info and len(attempts_info) > 1:
                        response_content += f"### Code Correction History\n\n"
                        for i, attempt in enumerate(attempts_info[:-1], 1):
                            response_content += f"**Attempt {i}:**\n\n"
                            response_content += f"```python\n{attempt['code']}\n```\n\n"
                            response_content += f"**Error:**\n\n```\n{attempt['error']}\n```\n\n"
                    response["content"] = response_content
                    # Process any files created by the code
@ -628,17 +618,38 @@ class CoderAgent(BaseAgent):
                                }
                                response["documents"].append(doc)
                else:
-                    # Code execution failed
+                    # Code execution failed after all attempts
                    error = result.get("error", "Unknown error")
-                    logging_utils.error(f"Error during code execution: {error}", "execution")
+                    logging_utils.error(f"Error during code execution after all correction attempts: {error}", "execution")
                    # Format error response
                    response_content = f"## Error during code execution\n\n"
                    response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"
                    response_content += f"### Error\n\n```\n{error}\n```\n\n"
-                    # Add recommendation based on error
+                    # Include correction attempts information
-                    response_content += get_error_recommendation(error)
+                    if attempts_info:
                        response_content += f"I made {len(attempts_info)} attempts to correct the code, but couldn't resolve all issues.\n\n"
                        # Add the final attempt
                        response_content += f"### Final Code Attempt\n\n```python\n{attempts_info[-1]['code']}\n```\n\n"
                        response_content += f"### Final Error\n\n```\n{attempts_info[-1]['error']}\n```\n\n"
                        # Add recommendation based on error
                        response_content += get_error_recommendation(error)
                        # Add correction history
                        if len(attempts_info) > 1:
                            response_content += f"\n### Code Correction History\n\n"
                            for i, attempt in enumerate(attempts_info[:-1], 1):
                                response_content += f"**Attempt {i}:**\n\n"
                                response_content += f"```python\n{attempt['code']}\n```\n\n"
                                response_content += f"**Error:**\n\n```\n{attempt['error']}\n```\n\n"
                    else:
                        # Just show the code and error
                        response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"
                        response_content += f"### Error\n\n```\n{error}\n```\n\n"
                        # Add recommendation based on error
                        response_content += get_error_recommendation(error)
                    response["content"] = response_content
            else:
@ -663,6 +674,252 @@ class CoderAgent(BaseAgent):
            return response
    def _create_document_from_result(self, execution_result, output_format="json"):
        """
        Create a document object from execution results
        Args:
            execution_result: The data returned from code execution
            output_format: Desired format (json, csv, etc.)
        Returns:
            Document object for passing to other agents
        """
        if not execution_result:
            return None
        doc_id = f"data_{uuid.uuid4()}"
        # Determine filename and content type based on the data
        if isinstance(execution_result, pd.DataFrame):
            # Handle DataFrame result
            filename = "processed_data.csv"
            content_type = "text/csv"
            content = execution_result.to_csv(index=False)
        elif isinstance(execution_result, dict) or isinstance(execution_result, list):
            # Handle dictionary or list result
            filename = "processed_data.json"
            content_type = "application/json"
            content = json.dumps(execution_result)
        elif isinstance(execution_result, str):
            # Try to determine if string is JSON, CSV, or plain text
            if execution_result.strip().startswith('{') or execution_result.strip().startswith('['):
                filename = "processed_data.json"
                content_type = "application/json"
            elif ',' in execution_result and '\n' in execution_result:
                filename = "processed_data.csv" 
                content_type = "text/csv"
            else:
                filename = "processed_data.txt"
                content_type = "text/plain"
            content = str(execution_result)
        else:
            # Default case for other types
            filename = "processed_data.txt"
            content_type = "text/plain"
            content = str(execution_result)
        # Create document object
        document = {
            "id": doc_id,
            "source": {
                "type": "generated",
                "id": doc_id,
                "name": filename,
                "content_type": content_type,
            },
            "contents": [{
                "type": "text",
                "text": content,
                "is_extracted": True
            }]
        }
        return document
    async def _execute_with_auto_correction(
        self, 
        initial_code: str, 
        requirements: List[str], 
        context: Dict[str, Any],
        original_prompt: str,
        logging_utils: LoggingUtils = None
    ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
        """
        Execute code with automatic error correction and retries.
        Args:
            initial_code: The initial Python code to execute
            requirements: List of required packages
            context: Additional context for execution
            original_prompt: The original user request/prompt
            logging_utils: Optional logging utility
        Returns:
            Tuple of (final execution result, list of attempt info dictionaries)
        """
        # Initialize tracking data
        current_code = initial_code
        current_requirements = requirements.copy() if requirements else []
        attempts_info = []
        # Execute with correction loop
        for attempt in range(1, self.max_correction_attempts + 1):
            if logging_utils:
                if attempt == 1:
                    logging_utils.info(f"Executing code (attempt {attempt}/{self.max_correction_attempts})", "execution")
                else:
                    logging_utils.info(f"Executing corrected code (attempt {attempt}/{self.max_correction_attempts})", "execution")
            # Execute the current code version
            result = await self._execute_code(current_code, current_requirements, context)
            # Record attempt information
            attempts_info.append({
                "attempt": attempt,
                "code": current_code,
                "error": result.get("error", ""),
                "success": result.get("success", False)
            })
            # Check if execution was successful
            if result.get("success", False):
                # Success! Return the result and attempt info
                return result, attempts_info
            # Failed execution - check if we've reached the maximum attempt limit
            if attempt >= self.max_correction_attempts:
                if logging_utils:
                    logging_utils.warning(f"Maximum correction attempts ({self.max_correction_attempts}) reached, giving up", "execution")
                break
            # Need to correct the code - generate a fix based on the error
            error_message = result.get("error", "Unknown error")
            if logging_utils:
                logging_utils.info(f"Attempting to fix code error: {error_message[:200]}...", "execution")
            # Generate corrected code
            corrected_code, new_requirements = await self._generate_code_correction(
                current_code, 
                error_message, 
                original_prompt,
                current_requirements
            )
            # Update for next attempt
            if corrected_code:
                current_code = corrected_code
                # Add any new requirements
                if new_requirements:
                    for req in new_requirements:
                        if req not in current_requirements:
                            current_requirements.append(req)
                            if logging_utils:
                                logging_utils.info(f"Added new requirement: {req}", "execution")
            else:
                # Could not generate correction, break out of the loop
                if logging_utils:
                    logging_utils.warning("Could not generate code correction, giving up", "execution")
                break
        # If we get here, all attempts failed - return the last result and attempt info
        return result, attempts_info
    async def _generate_code_correction(
        self, 
        code: str, 
        error_message: str, 
        original_prompt: str,
        current_requirements: List[str] = None
    ) -> Tuple[str, List[str]]:
        """
        Generate a corrected version of code based on error messages.
        Args:
            code: The code that produced errors
            error_message: The error message to fix
            original_prompt: The original task/requirements
            current_requirements: List of currently required packages
        Returns:
            Tuple of (corrected code, new requirements list)
        """
        try:
            # Create a detailed prompt for code correction
            correction_prompt = f"""You need to fix an error in Python code. The code was written for this task:
 ORIGINAL TASK:
 {original_prompt}
 CURRENT CODE:
 ```python
 {code}
 ```
 ERROR MESSAGE:
 ```
 {error_message}
 ```
 CURRENT REQUIREMENTS: {', '.join(current_requirements) if current_requirements else "None"}
 Your task is to analyze the error and provide a corrected version of the code.
 Focus specifically on fixing the error while preserving the original functionality.
 Common fixes might include:
 - Fixing syntax errors (missing parentheses, indentation, etc.)
 - Resolving import errors by adding appropriate requirements
 - Correcting file paths or handling file not found errors
 - Adding error handling for specific edge cases
 - Fixing logical errors in the code
 FORMAT INSTRUCTIONS:
 1. Provide ONLY the complete fixed Python code without ANY explanation
 2. DO NOT include code block markers like ```python or ```
 3. DO NOT explain what the code does before or after it
 4. DO NOT include any text that is not valid Python code
 5. Start your response directly with the valid Python code
 6. End your response with valid Python code
 If you need to add new required packages, place them in a specially formatted comment at the top of your code like this:
 # REQUIREMENTS: package1,package2,package3
 Your entire response must be valid Python that can be executed without modification.
 """
            # Create messages for the API
            messages = [
                {"role": "system", "content": "You are a Python debugging expert. You provide ONLY clean, fixed Python code without any explanations, markdown formatting, or non-code text. Your response should be nothing but valid, fixed Python code that can be executed directly."},
                {"role": "user", "content": correction_prompt}
            ]
            # Call the API with very low temperature for deterministic fixes
            generated_content = await self.ai_service.call_api(
                messages, 
                temperature=self.correction_temperature, 
                max_tokens=self.ai_max_tokens
            )
            # Clean the generated content to ensure it's only valid Python code
            fixed_code = self._clean_code(generated_content)
            # Extract requirements from special comment at the top of the code
            new_requirements = []
            for line in fixed_code.split('\n'):
                if line.strip().startswith("# REQUIREMENTS:"):
                    req_str = line.replace("# REQUIREMENTS:", "").strip()
                    new_requirements = [r.strip() for r in req_str.split(',') if r.strip()]
                    break
            return fixed_code, new_requirements
        except Exception as e:
            logging.error(f"Error generating code correction: {str(e)}", exc_info=True)
            # Return None to indicate failure
            return None, []
    def _clean_code(self, code: str) -> str:
        """
        Clean up code by removing markdown code block markers and other formatting artifacts.
@ -728,9 +985,6 @@ class CoderAgent(BaseAgent):
            Tuple of (generated Python code, required packages)
        """
        try:
            # Initialize AI service
            chat_service = ChatService()
            # Prepare a prompt for code generation
            ai_prompt = f"""Generate Python code to solve the following task:
 {prompt}
@ -767,7 +1021,7 @@ FORMAT INSTRUCTIONS:
 - Start your response directly with valid Python code
 - End your response with valid Python code
-For required packages, place them in a specially formatted comment at the top of your code like this:
+For required packages, place them in a specially formatted comment at the top of your code one one line like this:
 # REQUIREMENTS: pandas,numpy,matplotlib,requests
 Your entire response must be valid Python that can be executed without modification.
@ -781,7 +1035,7 @@ Your entire response must be valid Python that can be executed without modificat
            # Call the API
            logging.info(f"Calling AI API to generate code")
-            generated_content = await chat_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens)
+            generated_content = await self.ai_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens)
            # Clean the generated content to ensure it's only valid Python code
            code = self._clean_code(generated_content)
@ -843,7 +1097,8 @@ result = {{"error": "Code generation failed", "message": "{error_str}"}}
                timeout=self.executor_timeout,
                max_memory_mb=self.executor_memory_limit,
                requirements=requirements,
-                blocked_packages=blocked_packages
+                blocked_packages=blocked_packages,
                ai_service = self.ai_service
            )
            # Prepare input data for the code
@ -924,7 +1179,6 @@ result = {{"error": "Code generation failed", "message": "{error_str}"}}
                    # Log error information
                    error = result.get("error", "Unknown error")
                    logging_utils.error(f"Error during code execution: {error}", "execution")
                    print("DEBUG CODE-ERROR:",code,"#END")
            # Clean up non-persistent environments
            if not executor.is_persistent:
--- a/gwserver/modules/agentservice_agent_documentation.py
+++ b/gwserver/modules/agentservice_agent_documentation.py
@ -13,7 +13,6 @@ from datetime import datetime
 import uuid
 from modules.agentservice_base import BaseAgent
 from connectors.connector_aichat_openai import ChatService
 from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
 from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol
 from modules.agentservice_filemanager import FileManager  # Import the file manager
@ -64,6 +63,10 @@ class DocumentationAgent(BaseAgent):
        })
        return info
    def set_document_handler(self, document_handler):
        """Set the document handler for file operations"""
        self.document_handler = document_handler
    async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Process a message and create documentation.
--- a/gwserver/modules/agentservice_agent_webcrawler.py
+++ b/gwserver/modules/agentservice_agent_webcrawler.py
@ -5,21 +5,16 @@ Angepasst für das refaktorisierte Core-Modul.
 import json
 import logging
 import random
 import time
 import traceback
-from typing import List, Dict, Any, Optional, Union
+from typing import List, Dict, Any, Optional
 import re
 import uuid
 from datetime import datetime
 from urllib.parse import quote_plus, unquote
 from bs4 import BeautifulSoup
 import requests
 from modules.agentservice_base import BaseAgent
-from connectors.connector_aichat_openai import ChatService
+from modules.agentservice_utils import MessageUtils, LoggingUtils
-from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
+from modules.agentservice_protocol import AgentCommunicationProtocol
 from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol
 logger = logging.getLogger(__name__)
@ -42,13 +37,14 @@ class WebcrawlerAgent(BaseAgent):
        self.supports_documents = True
        self.document_capabilities = ["read", "create"]
        self.required_context = ["workflow_id"]
        self.document_handler = None
        # Initialize AI service
        self.ai_service = None
        # Initialize protocol
        self.protocol = AgentCommunicationProtocol()
        # Chat-Service initialisieren
        self.chat_service = ChatService()
        # Utility-Klassen initialisieren
        self.message_utils = MessageUtils()
@ -57,17 +53,6 @@ class WebcrawlerAgent(BaseAgent):
        self.max_key = 3
        self.max_result = 3
        self.timeout = 10
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Referer': 'https://www.google.com/',
            'DNT': '1',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
        }
        self.max_urls = 10
        self.max_content_length = 100000
    def get_agent_info(self) -> Dict[str, Any]:
        """Get agent information for agent registry"""
@ -81,6 +66,10 @@ class WebcrawlerAgent(BaseAgent):
        })
        return info
    def set_document_handler(self, document_handler):
        """Set the document handler for file operations"""
        self.document_handler = document_handler
    async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Verarbeitet eine Nachricht und führt eine Web-Recherche durch.
@ -173,29 +162,6 @@ class WebcrawlerAgent(BaseAgent):
            return response
    def send_document_request(self, document_description: str, sender_id: str, receiver_id: str, filters: Dict[str, Any] = None, context_id: str = None) -> AgentMessage:
        """Send a document request using the protocol"""
        return self.protocol.create_document_request_message(
            document_description=document_description,
            sender_id=sender_id,
            receiver_id=receiver_id,
            filters=filters,
            context_id=context_id
        )
    def send_result_message(self, result_content: str, sender_id: str, receiver_id: str, task_id: str, 
                        output_data: Dict[str, Any] = None, context_id: str = None) -> AgentMessage:
        """Send a result message using the protocol"""
        return self.protocol.create_result_message(
            result_content=result_content,
            sender_id=sender_id,
            receiver_id=receiver_id,
            task_id=task_id,
            output_data=output_data,
            result_format="SearchResults",
            context_id=context_id
        )
    async def get_prompt(self, message_context: Dict[str, Any]) -> str:
        task = message_context.get("content", "")
        return task.strip()    
@ -232,7 +198,7 @@ class WebcrawlerAgent(BaseAgent):
                total_tokens += instruction_tokens
                # Zusätzliche Anweisungen für Web-Recherche
-                content_text = await self.chat_service.call_api(
+                content_text = await self.ai_service.call_api(
                    messages=[
                        {
                            "role": "system",
@ -263,7 +229,7 @@ class WebcrawlerAgent(BaseAgent):
            # Limit summary source to ensure we don't exceed API limits
            summary_src_limited = self.limit_text_for_api(summary_src, max_tokens=10000)
-            summary = await self.chat_service.call_api(
+            summary = await self.ai_service.call_api(
                messages=[
                    {
                        "role": "system",
@ -280,322 +246,329 @@ class WebcrawlerAgent(BaseAgent):
        result = f"## Web-Recherche Ergebnisse\n\n### Zusammenfassung\n{summary}\n\n### Detaillierte Ergebnisse{result_data}"
        return result
-        async def run_web_query(self, prompt: str) -> List[Dict]:
+    async def run_web_query(self, prompt: str) -> List[Dict]:
-            if prompt=="":
+        if prompt=="":
-                return []
+            return []
-            ptext=f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open.
+        ptext=f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open.
-            'url': A list of maximum {self.max_url} specific URLs extracted from the task string.
+        'url': A list of maximum {self.max_url} specific URLs extracted from the task string.
-            'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
+        'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
-            Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition.
+        Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition.
-            """
+        """
-            content_text = await self.chat_service.call_api(
+        content_text = await self.ai_service.call_api(
-                messages=[
+            messages=[
-                    {
+                {
-                        "role": "system",
+                    "role": "system",
-                        "content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."
+                    "content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."
-                    },
+                },
-                    {
+                {
-                        "role": "user",
+                    "role": "user",
-                        "content": ptext
+                    "content": ptext
                    }
                ]
            )
            # Remove markdown formatting if present
            if content_text.startswith("```json"):
                # Find the end of the JSON block
                end_marker = "```"
                end_index = content_text.rfind(end_marker)
                if end_index != -1:
                    # Extract the JSON content without the markdown markers
                    content_text = content_text[7:end_index].strip()
            # Now parse the JSON
            try:
                logger.info(f"Valid json received: {str(content_text)}")
                pjson = json.loads(content_text)
                # Now call scrape_json with the parsed dictionary
                result_json = await self.scrape_json(pjson)
                return result_json
            except json.JSONDecodeError as e:
                logger.error(f"Failed to parse JSON: {e}")
                logger.error(f"Cleaned content: {content_text[:100]}...")
                return []
        async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]:
            """
            Scrapes web content based on a research strategy JSON.
            Args:
                research_strategy: A dictionary containing:
                    - 'skey': List of search keywords
                    - 'url': List of direct URLs to scrape
            Returns:
                Dictionary with URLs as keys and scraped content as values
            """
            logger.info("Starting JSON-based web scraping")
            results = []
            # Validate input structure
            if not isinstance(research_strategy, dict):
                logger.error("Invalid research_strategy format: not a dictionary")
                return {"error": "Invalid research_strategy format: not a dictionary"}
            keys = research_strategy.get("skey", [])
            direct_urls = research_strategy.get("url", [])
            if not isinstance(keys, list) or not isinstance(direct_urls, list):
                logger.error("Invalid research_strategy format: keys, or url is not a list")
                return {"error": "Invalid research_strategy format: keys, or url is not a list"}
            # Process search keywords through search engine
            for keyword in keys:
                logger.info(f"Processing keyword: {keyword}")
                found_results = self.search_web(keyword) #  List with Dict: title,url,snippet,data
                logger.info(f"... {len(found_results)} results found")
                results.extend(found_results)
            # Process direct URLs
            logger.info(f"Processing {len(direct_urls)} direct URLs")
            for url in direct_urls:
                if url in results:
                    logger.info(f"Skipping already scraped URL: {url}")
                    continue
                soup=self.read_url(url)
                # Extract title from the page if it exists
                if isinstance(soup, BeautifulSoup):
                    title_tag = soup.find('title')
                    title = title_tag.text.strip() if title_tag else "No title"
                    # Alternative: You could also look for h1 tags if the title tag is missing
                    if title == "No title":
                        h1_tag = soup.find('h1')
                        if h1_tag:
                            title = h1_tag.text.strip()
                else:
                    # Handle the case where soup is an error message string
                    title = "Error fetching page"
                results.append(self.parse_result(soup,"No title",url))
            logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total")
            return results
        def extract_main_content(self, soup: BeautifulSoup, max_chars: int = 30000) -> str:
            """
            Extract the main content from an HTML page while limiting character count.
            Args:
                soup: BeautifulSoup object containing the page content
                max_chars: Maximum number of characters to extract
            Returns:
                Extracted main content as string
            """
            if not isinstance(soup, BeautifulSoup):
                return str(soup)[:max_chars]
            # Try to find main content elements in order of priority
            main_content = None
            for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
                content = soup.select_one(selector)
                if content:
                    main_content = content
                    break
            # If no main content found, use the body
            if not main_content:
                main_content = soup.find('body') or soup
            # Remove script, style, nav, footer elements that don't contribute to main content
            for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
                element.extract()
            # Extract text content
            text_content = main_content.get_text(separator=' ', strip=True)
            # Limit to max_chars
            return text_content[:max_chars]
        def tokenize_for_counting(self, text: str) -> List[str]:
            """
            Simple token counter for estimating token usage.
            This is an approximation since the exact tokenization depends on the model.
            Args:
                text: Input text
            Returns:
                List of tokens
            """
            # Simple tokenization by splitting on whitespace and punctuation
            import re
            return re.findall(r'\w+|[^\w\s]', text)
        def count_tokens(self, text: str) -> int:
            """
            Count the approximate number of tokens in a text.
            Args:
                text: Input text
            Returns:
                Estimated token count
            """
            tokens = self.tokenize_for_counting(text)
            return len(tokens)
        def limit_text_for_api(self, text: str, max_tokens: int = 60000) -> str:
            """
            Limit the text to a maximum number of tokens.
            Args:
                text: Input text
                max_tokens: Maximum number of tokens allowed
            Returns:
                Limited text
            """
            if not text:
                return ""
            tokens = self.tokenize_for_counting(text)
            # If text is already under the limit, return as is
            if len(tokens) <= max_tokens:
                return text
            # Otherwise, truncate text to max_tokens
            return " ".join(tokens[:max_tokens]) + "... [content truncated due to length]"
        def search_web(self, query: str) -> List[Dict]:
            formatted_query = quote_plus(query)
            url = f"https://html.duckduckgo.com/html/?q={formatted_query}"
            search_results_soup = self.read_url(url)
            if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0:
                logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
                return []
            # Extract search results
            results = []
            # Find all result containers
            result_elements = search_results_soup.select('.result')
            for result in result_elements:
                # Extract title
                title_element = result.select_one('.result__a')
                title = title_element.text.strip() if title_element else 'No title'
                # Extract URL (DuckDuckGo uses redirects, need to extract from href param)
                url_element = title_element.get('href') if title_element else ''
                extracted_url = 'No URL'
                if url_element:
                    # Extract the actual URL from DuckDuckGo's redirect
                    if url_element.startswith('/d.js?q='):
                        start = url_element.find('?q=') + 3  # Skip '?q='
                        end = url_element.find('&', start) if '&' in url_element[start:] else None
                        extracted_url = unquote(url_element[start:end])
                        # Make sure the URL has the correct protocol prefix
                        if not extracted_url.startswith(('http://', 'https://')):
                            if not extracted_url.startswith('//'):
                                extracted_url = 'https://' + extracted_url
                            else:
                                extracted_url = 'https:' + extracted_url
                    else:
                        extracted_url = url_element
                # Extract snippet directly from search results page
                snippet_element = result.select_one('.result__snippet')
                snippet = snippet_element.text.strip() if snippet_element else 'No description'
                # Now fetch the actual page content for the data field
                target_page_soup = self.read_url(extracted_url)
                # Use the new content extraction method to limit content size
                content = self.extract_main_content(target_page_soup, max_chars=30000)
                results.append({
                    'title': title,
                    'url': extracted_url,
                    'snippet': snippet,
                    'data': content
                })
                # Limit the number of results if needed
                if len(results) >= self.max_result:
                    break
            return results
            def read_url(self, url: str) -> BeautifulSoup:
                """
                Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
                Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben.
                Args:
                    url: Die zu lesende URL
                Returns:
                    BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
                """
                headers = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
                    'Accept': 'text/html,application/xhtml+xml,application/xml',
                    'Accept-Language': 'en-US,en;q=0.9',
                }
            ]
        )
        # Remove markdown formatting if present
        if content_text.startswith("```json"):
            # Find the end of the JSON block
            end_marker = "```"
            end_index = content_text.rfind(end_marker)
            if end_index != -1:
                # Extract the JSON content without the markdown markers
                content_text = content_text[7:end_index].strip()
-                try:
+        # Now parse the JSON
-                    import time
+        try:
            logger.info(f"Valid json received: {str(content_text)}")
            pjson = json.loads(content_text)
            # Now call scrape_json with the parsed dictionary
            result_json = await self.scrape_json(pjson)
            return result_json
        except json.JSONDecodeError as e:
            logger.error(f"Failed to parse JSON: {e}")
            logger.error(f"Cleaned content: {content_text[:100]}...")
            return []
-                    # Initialer Request
+    async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]:
        """
        Scrapes web content based on a research strategy JSON.
        Args:
            research_strategy: A dictionary containing:
                - 'skey': List of search keywords
                - 'url': List of direct URLs to scrape
        Returns:
            Dictionary with URLs as keys and scraped content as values
        """
        logger.info("Starting JSON-based web scraping")
        results = []
        # Validate input structure
        if not isinstance(research_strategy, dict):
            logger.error("Invalid research_strategy format: not a dictionary")
            return {"error": "Invalid research_strategy format: not a dictionary"}
        keys = research_strategy.get("skey", [])
        direct_urls = research_strategy.get("url", [])
        if not isinstance(keys, list) or not isinstance(direct_urls, list):
            logger.error("Invalid research_strategy format: keys, or url is not a list")
            return {"error": "Invalid research_strategy format: keys, or url is not a list"}
        # Process search keywords through search engine
        for keyword in keys:
            logger.info(f"Processing keyword: {keyword}")
            found_results = self.search_web(keyword) #  List with Dict: title,url,snippet,data
            logger.info(f"... {len(found_results)} results found")
            results.extend(found_results)
        # Process direct URLs
        logger.info(f"Processing {len(direct_urls)} direct URLs")
        for url in direct_urls:
            if url in results:
                logger.info(f"Skipping already scraped URL: {url}")
                continue
            soup = self.read_url(url)
            # Extract title from the page if it exists
            if isinstance(soup, BeautifulSoup):
                title_tag = soup.find('title')
                title = title_tag.text.strip() if title_tag else "No title"
                # Alternative: You could also look for h1 tags if the title tag is missing
                if title == "No title":
                    h1_tag = soup.find('h1')
                    if h1_tag:
                        title = h1_tag.text.strip()
            else:
                # Handle the case where soup is an error message string
                title = "Error fetching page"
            results.append(self.parse_result(soup, title, url))
        logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total")
        return results
    def extract_main_content(self, soup: BeautifulSoup, max_chars: int = 30000) -> str:
        """
        Extract the main content from an HTML page while limiting character count.
        Args:
            soup: BeautifulSoup object containing the page content
            max_chars: Maximum number of characters to extract
        Returns:
            Extracted main content as string
        """
        if not isinstance(soup, BeautifulSoup):
            return str(soup)[:max_chars]
        # Try to find main content elements in order of priority
        main_content = None
        for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
            content = soup.select_one(selector)
            if content:
                main_content = content
                break
        # If no main content found, use the body
        if not main_content:
            main_content = soup.find('body') or soup
        # Remove script, style, nav, footer elements that don't contribute to main content
        for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
            element.extract()
        # Extract text content
        text_content = main_content.get_text(separator=' ', strip=True)
        # Limit to max_chars
        return text_content[:max_chars]
    def tokenize_for_counting(self, text: str) -> List[str]:
        """
        Simple token counter for estimating token usage.
        This is an approximation since the exact tokenization depends on the model.
        Args:
            text: Input text
        Returns:
            List of tokens
        """
        # Simple tokenization by splitting on whitespace and punctuation
        import re
        return re.findall(r'\w+|[^\w\s]', text)
    def count_tokens(self, text: str) -> int:
        """
        Count the approximate number of tokens in a text.
        Args:
            text: Input text
        Returns:
            Estimated token count
        """
        tokens = self.tokenize_for_counting(text)
        return len(tokens)
    def limit_text_for_api(self, text: str, max_tokens: int = 60000) -> str:
        """
        Limit the text to a maximum number of tokens.
        Args:
            text: Input text
            max_tokens: Maximum number of tokens allowed
        Returns:
            Limited text
        """
        if not text:
            return ""
        tokens = self.tokenize_for_counting(text)
        # If text is already under the limit, return as is
        if len(tokens) <= max_tokens:
            return text
        # Otherwise, truncate text to max_tokens
        return " ".join(tokens[:max_tokens]) + "... [content truncated due to length]"
    def search_web(self, query: str) -> List[Dict]:
        formatted_query = quote_plus(query)
        url = f"https://html.duckduckgo.com/html/?q={formatted_query}"
        search_results_soup = self.read_url(url)
        if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0:
            logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
            return []
        # Extract search results
        results = []
        # Find all result containers
        result_elements = search_results_soup.select('.result')
        for result in result_elements:
            # Extract title
            title_element = result.select_one('.result__a')
            title = title_element.text.strip() if title_element else 'No title'
            # Extract URL (DuckDuckGo uses redirects, need to extract from href param)
            url_element = title_element.get('href') if title_element else ''
            extracted_url = 'No URL'
            if url_element:
                # Extract the actual URL from DuckDuckGo's redirect
                if url_element.startswith('/d.js?q='):
                    start = url_element.find('?q=') + 3  # Skip '?q='
                    end = url_element.find('&', start) if '&' in url_element[start:] else None
                    extracted_url = unquote(url_element[start:end])
                    # Make sure the URL has the correct protocol prefix
                    if not extracted_url.startswith(('http://', 'https://')):
                        if not extracted_url.startswith('//'):
                            extracted_url = 'https://' + extracted_url
                        else:
                            extracted_url = 'https:' + extracted_url
                else:
                    extracted_url = url_element
            # Extract snippet directly from search results page
            snippet_element = result.select_one('.result__snippet')
            snippet = snippet_element.text.strip() if snippet_element else 'No description'
            # Now fetch the actual page content for the data field
            target_page_soup = self.read_url(extracted_url)
            # Use the new content extraction method to limit content size
            content = self.extract_main_content(target_page_soup, max_chars=30000)
            results.append({
                'title': title,
                'url': extracted_url,
                'snippet': snippet,
                'data': content
            })
            # Limit the number of results if needed
            if len(results) >= self.max_result:
                break
        return results
    def read_url(self, url: str) -> BeautifulSoup:
        """
        Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
        Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben.
        Args:
            url: Die zu lesende URL
        Returns:
            BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
        """
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml',
            'Accept-Language': 'en-US,en;q=0.9',
        }
        try:
            # Initialer Request
            response = requests.get(url, headers=headers, timeout=10)
            # Polling für Status 202
            if response.status_code == 202:
                # Maximal 3 Versuche mit steigenden Intervallen
                backoff_times = [0.5, 1.0, 2.0, 5.0]  # 0.5s, dann 1s, dann 2s
                for wait_time in backoff_times:
                    time.sleep(wait_time)  # Warten mit steigender Zeit
                    response = requests.get(url, headers=headers, timeout=10)
-                    # Polling für Status 202
+                    # Wenn kein 202 mehr, dann abbrechen
-                    if response.status_code == 202:
+                    if response.status_code != 202:
-                        # Maximal 3 Versuche mit steigenden Intervallen
+                        break
                        backoff_times = [0.5, 1.0, 2.0, 5.0]  # 0.5s, dann 1s, dann 2s
-                        for wait_time in backoff_times:
+            # Für andere Fehler-Status einen Fehler auslösen
-                            time.sleep(wait_time)  # Warten mit steigender Zeit
+            response.raise_for_status()
                            response = requests.get(url, headers=headers, timeout=10)
-                            # Wenn kein 202 mehr, dann abbrechen
+            # HTML parsen
-                            if response.status_code != 202:
+            return BeautifulSoup(response.text, 'html.parser')
                                break
-                    # Für andere Fehler-Status einen Fehler auslösen
+        except Exception as e:
-                    response.raise_for_status()
+            # Leeres BeautifulSoup-Objekt erstellen
            return BeautifulSoup("<html><body></body></html>", 'html.parser')
-                    # HTML parsen
+    def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
-                    return BeautifulSoup(response.text, 'html.parser')
+        """
        Parse a BeautifulSoup object into a result dictionary.
-                except Exception as e:
+        Args:
-                    # Leeres BeautifulSoup-Objekt erstellen
+            data: BeautifulSoup object containing the page content
-                    return BeautifulSoup("<html><body></body></html>", 'html.parser')
+            title: Page title
            url: Page URL
-            def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
+        Returns:
-                # Extract snippet/description
+            Dictionary with result data
-                snippet_element = data.select_one('.result__snippet')
+        """
-                snippet = snippet_element.text.strip() if snippet_element else 'No description'
+        # Extract content using the main content extraction method
        content = self.extract_main_content(data, max_chars=30000)
-                result={
+        result = {
-                    'title': title,
+            'title': title,
-                    'url': url,
+            'url': url,
-                    'snippet': snippet,
+            'snippet': 'No description',  # Default value
-                    'data': data.prettify()
+            'data': content
-                }
+        }
-                return result
+        return result
 # Singleton-Instanz
--- a/gwserver/modules/agentservice_dataextraction.py
+++ b/gwserver/modules/agentservice_dataextraction.py
@ -712,7 +712,7 @@ def _extract_document_contents_from_messages(file_id: int, messages: List[Dict[s
            # Check if file ID matches (handle both string and int comparison)
            if (source.get("id") == file_id or 
                (isinstance(source.get("id"), str) and source.get("id") == str(file_id)) or
-                (isinstance(file_id, str) and source.get("id") == int(file_id))):
+                (isinstance(file_id, str) and source.get("id") == file_id)):
                # Add contents of the file
                doc_contents = document.get("contents", [])
--- a/gwserver/modules/agentservice_registry.py
+++ b/gwserver/modules/agentservice_registry.py
@ -95,14 +95,18 @@ class AgentRegistry:
        self.ai_service = ai_service
        self.document_handler = document_handler
        self.lucydom_interface = lucydom_interface
        # Update all registered agents
        self.update_agent_dependencies()
-        # Update dependencies for all registered agents
+
    def update_agent_dependencies(self):
        """Update dependencies for all registered agents"""
        for agent_id, agent in self.agents.items():
            if hasattr(agent, 'set_dependencies'):
                agent.set_dependencies(
-                    ai_service=ai_service,
+                    ai_service=self.ai_service,
-                    document_handler=document_handler,
+                    document_handler=self.document_handler,
-                    lucydom_interface=lucydom_interface
+                    lucydom_interface=self.lucydom_interface
                )
    def register_agent(self, agent: 'BaseAgent'):
--- a/gwserver/modules/agentservice_workflow_execution.py
+++ b/gwserver/modules/agentservice_workflow_execution.py
@ -11,6 +11,7 @@ from datetime import datetime
 from typing import List, Dict, Any, Optional, Tuple, Union
 logger = logging.getLogger(__name__)
 logging.getLogger('matplotlib.font_manager').setLevel(logging.INFO)
 class WorkflowExecution:
    """
@ -39,10 +40,21 @@ class WorkflowExecution:
        # Initialize agent registry
        self.agent_registry = AgentRegistry.get_instance()
        # Set dependencies for agents
        # Initialize file manager
        self.file_manager = get_workflow_file_manager(workflow_id, lucydom_interface)
        # Import and initialize document handler
        from modules.agentservice_document_handler import get_document_handler
        self.document_handler = get_document_handler(workflow_id, lucydom_interface, ai_service)
        self.agent_registry.set_dependencies(
            ai_service=ai_service,
            document_handler=self.document_handler,
            lucydom_interface=lucydom_interface
        )
    async def execute(self, message: Dict[str, Any], workflow: Dict[str, Any], files: List[Dict[str, Any]] = None, is_user_input: bool = False):
        """
        Execute the workflow with integrated planning and agent selection.
@ -67,6 +79,7 @@ class WorkflowExecution:
            # 3. Create agent-aware work plan
            work_plan = await self._create_agent_aware_work_plan(workflow, user_message)
            self.logging_utils.info(f"Created agent-aware work plan with {len(work_plan)} activities", "planning")
            self.logging_utils.debug(f"{work_plan}.", "planning")
            # 4. Execute the activities in the work plan
            results = await self._execute_work_plan(workflow, work_plan)
@ -165,7 +178,7 @@ class WorkflowExecution:
                "content_type": source.get("content_type", "unknown")
            })
-        # Create the planning prompt with agent awareness
+        # Create the planning prompt with agent awareness and document handling information
        plan_prompt = f"""
 As an AI workflow manager, create a detailed agent-aware work plan for the following task:
@ -177,21 +190,25 @@ AVAILABLE AGENTS:
 AVAILABLE DOCUMENTS:
 {document_info if document_info else "No documents provided"}
 IMPORTANT: Document extraction happens automatically in the workflow. Documents in the message are already available to all agents. DO NOT assign agent_coder or any other agent specifically for just reading or extracting document content. Only assign agents for tasks that require specific processing beyond what the document handler already provides.
 The work plan should include a structured list of activities. Each activity should have:
 1. title - A short descriptive title for the activity
 2. description - What needs to be done in this activity
 3. assigned_agents - List of agent IDs that should handle this activity (can be multiple in sequence)
 4. agent_prompts - Specific instructions for each agent (matched by index to assigned_agents)
-5. document_requirements - Description of which documents are needed for this activity
+5. document_requirements - Description of which documents are needed for this activity (these will be automatically extracted)
 6. expected_output - The expected output format and content
 7. dependencies - List of previous activities this depends on (by index)
 IMPORTANT GUIDELINES:
 - Each activity should have clear objectives and be assigned to the most appropriate agent(s)
 - When multiple agents are assigned to an activity, specify the sequence and how outputs should flow between them
- Documents are processed on-demand, so each activity should specify which documents it requires
+- Documents are processed on-demand by the system's document handler, so only specify which documents are needed, not how to extract them
 - DO NOT create activities that only read or extract document content - this happens automatically
 - Create a logical sequence where later activities can use outputs from earlier ones
 - If no specialized agent is needed for a task, use the default "assistant" agent
 - Only use the agent_coder for tasks that require actual coding or complex data analysis, not for simply reading documents
 Return the work plan as a JSON array of activity objects, each with the above properties.
 """
@ -357,6 +374,20 @@ Return the work plan as a JSON array of activity objects, each with the above pr
        for i, agent_id in enumerate(agent_ids):
            # Get the agent
            agent = self.agent_registry.get_agent(agent_id)
            if agent:
                # Ensure dependencies are set
                if hasattr(agent, 'set_dependencies'):
                    agent.set_dependencies(
                        ai_service=self.ai_service,
                        document_handler=self.document_handler,
                        lucydom_interface=self.lucydom_interface
                    )
                # Set document handler if agent supports it
                if hasattr(agent, 'set_document_handler') and hasattr(self, 'document_handler'):
                    agent.set_document_handler(self.document_handler)
            if not agent:
                self.logging_utils.warning(f"Agent '{agent_id}' not found, using assistant instead", "agents")
                agent = self.agent_registry.get_agent("assistant")
@ -380,9 +411,18 @@ Return the work plan as a JSON array of activity objects, each with the above pr
            agent_message = self._create_message(workflow, "user")
            agent_message["content"] = enhanced_prompt
-            # Add any documents from previous agent if this is a continuation
+            # IMPORTANT FIX: Document handling logic
            # First, check if we have documents from previous agent if this is a continuation
            if last_documents and i > 0:
                agent_message["documents"] = last_documents
            # For the first agent, make sure we pass any documents from the most recent user message 
            elif i == 0:
                # Find the most recent user message with documents
                for msg in reversed(workflow.get("messages", [])):
                    if msg.get("role") == "user" and msg.get("documents"):
                        agent_message["documents"] = msg.get("documents", [])
                        self.logging_utils.info(f"Passing {len(agent_message['documents'])} documents from user message to {agent_id}", "agents")
                        break
            # Log agent execution
            self.logging_utils.info(f"Executing agent: {agent_id}", "agents")
@ -402,6 +442,7 @@ Return the work plan as a JSON array of activity objects, each with the above pr
            if "documents" in agent_response:
                response_message["documents"] = agent_response["documents"]
                last_documents = agent_response["documents"]
                self.logging_utils.info(f"Agent {agent_id} produced {len(last_documents)} documents", "agents")
            # Add to workflow
            workflow["messages"].append(response_message)
@ -419,6 +460,7 @@ Return the work plan as a JSON array of activity objects, each with the above pr
            "format": "Text"
        }
    async def _extract_required_documents(self, workflow: Dict[str, Any], doc_requirements: str) -> Dict[str, Any]:
        """
        Extract required documents based on requirements description.
--- a/gwserver/workflow_test_result.json
+++ b/gwserver/workflow_test_result.json