backend running for mvp1

2025-04-15 01:04:38 +02:00 · 2025-04-15 01:04:38 +02:00 · c75a3b67ce
commit c75a3b67ce
parent b0c45fb798
10 changed files with 2529 additions and 615 deletions
--- a/gwserver/modules/BAKcoder.py
+++ b/gwserver/modules/BAKcoder.py
--- a/gwserver/modules/BAKwebcrawler.py
+++ b/gwserver/modules/BAKwebcrawler.py
@ -0,0 +1,613 @@
+"""
+WebCrawler-Agent für die Recherche und Beschaffung von Informationen aus dem Web.
+Angepasst für das refaktorisierte Core-Modul.
+"""
+
+import json
+import logging
+import random
+import time
+import traceback
+from typing import List, Dict, Any, Optional, Union
+import re
+import uuid
+from datetime import datetime
+from urllib.parse import quote_plus, unquote
+
+from bs4 import BeautifulSoup
+import requests
+from modules.agentservice_base import BaseAgent
+from connectors.connector_aichat_openai import ChatService
+from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
+from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol
+
+
+logger = logging.getLogger(__name__)
+
+class WebcrawlerAgent(BaseAgent):
+    
+    """Agent für Web-Recherche und Informationsbeschaffung"""
+    
+    def __init__(self):
+        """Initialisiert den WebCrawler-Agenten"""
+        super().__init__()
+        self.id = "webcrawler"
+        self.name = "Webscraper"
+        self.type = "scraper"
+        self.description = "Recherchiert Informationen im Web"
+        self.capabilities = "web_search,information_retrieval,data_collection,source_verification,content_integration"
+        self.result_format = "SearchResults"
+        
+        # Add enhanced document capabilities
+        self.supports_documents = True
+        self.document_capabilities = ["read", "create"]
+        self.required_context = ["workflow_id"]
+        self.document_handler = None
+        
+        # Initialize protocol
+        self.protocol = AgentCommunicationProtocol()
+        
+        # Chat-Service initialisieren
+        self.chat_service = ChatService()
+        
+        # Utility-Klassen initialisieren
+        self.message_utils = MessageUtils()
+        
+        # Web-Crawling-Konfiguration
+        self.max_url = 3
+        self.max_key = 3
+        self.max_result = 3
+        self.timeout = 10
+        self.headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Referer': 'https://www.google.com/',
+            'DNT': '1',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1',
+        }
+        self.max_urls = 10
+        self.max_content_length = 100000
+
+    def get_agent_info(self) -> Dict[str, Any]:
+        """Get agent information for agent registry"""
+        info = super().get_agent_info()
+        info.update({
+            "metadata": {
+                "max_url": self.max_url,
+                "max_result": self.max_result,
+                "timeout": self.timeout
+            }
+        })
+        return info
+
+    def set_document_handler(self, document_handler):
+        """Set the document handler for file operations"""
+        self.document_handler = document_handler
+            
+    async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
+        """
+        Verarbeitet eine Nachricht und führt eine Web-Recherche durch.
+        
+        Args:
+            message: Die zu verarbeitende Nachricht
+            context: Zusätzlicher Kontext
+            
+        Returns:
+            Die generierte Antwort mit der Web-Recherche
+        """
+        # Extract workflow_id from context or message
+        workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
+        
+        # Get or create logging_utils
+        log_func = context.get("log_func") if context else None
+        logging_utils = LoggingUtils(workflow_id, log_func)
+        
+        # Send status update using protocol
+        if log_func:
+            status_message = self.protocol.create_status_update_message(
+                status_description="Starte Web-Recherche",
+                sender_id=self.id,
+                status="in_progress",
+                progress=0.0,
+                context_id=workflow_id
+            )
+            log_func(workflow_id, status_message.content, "info", self.id, self.name)
+        
+        # Create response structure
+        response = {
+            "role": "assistant",
+            "content": "",
+            "agent_id": self.id,
+            "agent_type": self.type,
+            "agent_name": self.name,
+            "result_format": self.result_format,
+            "workflow_id": workflow_id
+        }
+        
+        try:
+            # Get the query from the message
+            prompt = await self.get_prompt(message)
+            logging_utils.info(f"Web-Recherche für: {prompt[:50]}...", "agents")
+            
+            # Update progress using protocol
+            if log_func:
+                status_message = self.protocol.create_status_update_message(
+                    status_description=f"Recherchiere: {prompt[:30]}...",
+                    sender_id=self.id,
+                    status="in_progress",
+                    progress=0.3,
+                    context_id=workflow_id
+                )
+                log_func(workflow_id, status_message.content, "info", self.id, self.name)
+            
+            # Führe die Web-Recherche durch
+            web_query_result = await self.get_web_query(message)
+            
+            # Final status update
+            if log_func:
+                status_message = self.protocol.create_status_update_message(
+                    status_description="Web-Recherche abgeschlossen",
+                    sender_id=self.id,
+                    status="completed",
+                    progress=1.0,
+                    context_id=workflow_id
+                )
+                log_func(workflow_id, status_message.content, "info", self.id, self.name)
+            
+            # Set the content in the response
+            response["content"] = web_query_result
+            
+            return response
+            
+        except Exception as e:
+            error_msg = f"Fehler bei der Web-Recherche: {str(e)}"
+            logging_utils.error(error_msg, "error")
+            
+            # Create error response using protocol
+            error_message = self.protocol.create_error_message(
+                error_description=error_msg,
+                sender_id=self.id,
+                error_type="web_search",
+                error_details={"traceback": traceback.format_exc()},
+                context_id=workflow_id
+            )
+            
+            response["content"] = f"## Fehler bei der Web-Recherche\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
+            
+            return response
+
+    def send_document_request(self, document_description: str, sender_id: str, receiver_id: str, filters: Dict[str, Any] = None, context_id: str = None) -> AgentMessage:
+        """Send a document request using the protocol"""
+        return self.protocol.create_document_request_message(
+            document_description=document_description,
+            sender_id=sender_id,
+            receiver_id=receiver_id,
+            filters=filters,
+            context_id=context_id
+        )
+
+    def send_result_message(self, result_content: str, sender_id: str, receiver_id: str, task_id: str, 
+                        output_data: Dict[str, Any] = None, context_id: str = None) -> AgentMessage:
+        """Send a result message using the protocol"""
+        return self.protocol.create_result_message(
+            result_content=result_content,
+            sender_id=sender_id,
+            receiver_id=receiver_id,
+            task_id=task_id,
+            output_data=output_data,
+            result_format="SearchResults",
+            context_id=context_id
+        )
+        
+    async def get_prompt(self, message_context: Dict[str, Any]) -> str:
+        task = message_context.get("content", "")
+        return task.strip()    
+    
+    async def get_web_query(self, message_context: Dict[str, Any]) -> str:
+        prompt = await self.get_prompt(message_context)
+        result_json = await self.run_web_query(prompt)
+        result_data = ""
+        summary_src = ""
+
+        logger.info(f"Web analysis prompt '{prompt}' delivers {len(result_json)} results.")
+        if isinstance(result_json, list):
+            total_tokens = 0
+            
+            for i, result in enumerate(result_json, 1):
+                # Limit content size for each result
+                result_data_limited = self.limit_text_for_api(result['data'], max_tokens=15000)  # Allow ~15000 tokens per result
+                
+                web_answer_instructions = f"""
+                Fass das Resultat gemäss dem Auftrag zusammen in maximal rund 2000 Zeichen. Auftrag = '{prompt.replace("'","")}'
+                Fasse die wichtigsten Erkenntnisse zusammen und setze sie in Bezug zur ursprünglichen Anfrage. Die Einleitung kannst Du weglassen.
+                Achte darauf, nur relevante und qualitativ hochwertige Informationen zu extrahieren, welche einen Bezug zum Auftrag haben, und übersichtlich zu präsentieren. Vermittle ein ausgewogenes Bild der recherchierten Informationen.
+
+                Dies ist das Resultat:
+                {result_data_limited}
+                """
+
+                # Count tokens in the instructions to ensure we don't exceed API limits
+                instruction_tokens = self.count_tokens(web_answer_instructions)
+                if total_tokens + instruction_tokens > 60000:
+                    logger.warning(f"Skipping result {i} to avoid exceeding token limit")
+                    break
+                    
+                total_tokens += instruction_tokens
+                
+                # Zusätzliche Anweisungen für Web-Recherche
+                content_text = await self.chat_service.call_api(
+                    messages=[
+                        {
+                            "role": "system",
+                            "content": "Du bist ein Informationsanalyst, der Webinhalte präzise und relevant zusammenfasst."
+                        },
+                        {
+                            "role": "user",
+                            "content": web_answer_instructions
+                        }
+                    ]
+                )
+                
+                # Create a summary but ensure we stay within token limits
+                content_summary = content_text[:2000]  # Limit to ~2000 characters
+                result_data += f"\n\n[{i}] {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}\nContent: {content_summary}"
+                summary_src += f"\n{content_summary}"
+                
+                # Update token count
+                total_tokens += self.count_tokens(content_summary) + 100  # Add buffer for formatting
+        else:
+            result_data = "no data received"
+
+        logger.info(f"Web analysis result sent {len(result_data)}B")
+
+        # Zusätzliche Zusammenfassung
+        summary = ""
+        if len(summary_src) > 1:
+            # Limit summary source to ensure we don't exceed API limits
+            summary_src_limited = self.limit_text_for_api(summary_src, max_tokens=10000)
+            
+            summary = await self.chat_service.call_api(
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "Du erstellst prägnante Zusammenfassungen von Rechercheergbnissen."
+                    },                    
+                    {
+                        "role": "user",
+                        "content": f"Bitte fasse diese Erkenntnisse in maximal 5-6 Sätzen zusammen: {summary_src_limited}\n"
+                    }
+                ]
+            )
+            
+        # Format the final result
+        result = f"## Web-Recherche Ergebnisse\n\n### Zusammenfassung\n{summary}\n\n### Detaillierte Ergebnisse{result_data}"
+        return result
+
+    async def run_web_query(self, prompt: str) -> List[Dict]:
+        if prompt=="":
+            return []
+
+        ptext=f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open.
+
+        'url': A list of maximum {self.max_url} specific URLs extracted from the task string.
+
+        'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
+        
+        Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition.
+        """
+
+        content_text = await self.chat_service.call_api(
+            messages=[
+                {
+                    "role": "system",
+                    "content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."
+                },
+                {
+                    "role": "user",
+                    "content": ptext
+                }
+            ]
+        )
+        # Remove markdown formatting if present
+        if content_text.startswith("```json"):
+            # Find the end of the JSON block
+            end_marker = "```"
+            end_index = content_text.rfind(end_marker)
+            if end_index != -1:
+                # Extract the JSON content without the markdown markers
+                content_text = content_text[7:end_index].strip()
+        
+        # Now parse the JSON
+        try:
+            logger.info(f"Valid json received: {str(content_text)}")
+            pjson = json.loads(content_text)
+            # Now call scrape_json with the parsed dictionary
+            result_json = await self.scrape_json(pjson)
+            return result_json
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse JSON: {e}")
+            logger.error(f"Cleaned content: {content_text[:100]}...")
+            return []
+
+    async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]:
+        """
+        Scrapes web content based on a research strategy JSON.
+        
+        Args:
+            research_strategy: A dictionary containing:
+                - 'skey': List of search keywords
+                - 'url': List of direct URLs to scrape
+                
+        Returns:
+            Dictionary with URLs as keys and scraped content as values
+        """
+
+        logger.info("Starting JSON-based web scraping")
+        results = []
+        
+        # Validate input structure
+        if not isinstance(research_strategy, dict):
+            logger.error("Invalid research_strategy format: not a dictionary")
+            return {"error": "Invalid research_strategy format: not a dictionary"}
+        
+        keys = research_strategy.get("skey", [])
+        direct_urls = research_strategy.get("url", [])
+        
+        if not isinstance(keys, list) or not isinstance(direct_urls, list):
+            logger.error("Invalid research_strategy format: keys, or url is not a list")
+            return {"error": "Invalid research_strategy format: keys, or url is not a list"}
+        
+        # Process search keywords through search engine
+        for keyword in keys:
+            logger.info(f"Processing keyword: {keyword}")
+            found_results = self.search_web(keyword) #  List with Dict: title,url,snippet,data
+            logger.info(f"... {len(found_results)} results found")
+            results.extend(found_results)
+
+        # Process direct URLs
+        logger.info(f"Processing {len(direct_urls)} direct URLs")
+        for url in direct_urls:
+            if url in results:
+                logger.info(f"Skipping already scraped URL: {url}")
+                continue
+            soup=self.read_url(url)
+
+            # Extract title from the page if it exists
+            if isinstance(soup, BeautifulSoup):
+                title_tag = soup.find('title')
+                title = title_tag.text.strip() if title_tag else "No title"
+                
+                # Alternative: You could also look for h1 tags if the title tag is missing
+                if title == "No title":
+                    h1_tag = soup.find('h1')
+                    if h1_tag:
+                        title = h1_tag.text.strip()
+            else:
+                # Handle the case where soup is an error message string
+                title = "Error fetching page"
+        
+            results.append(self.parse_result(soup,"No title",url))
+        logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total")
+        return results
+
+    def extract_main_content(self, soup: BeautifulSoup, max_chars: int = 30000) -> str:
+        """
+        Extract the main content from an HTML page while limiting character count.
+        
+        Args:
+            soup: BeautifulSoup object containing the page content
+            max_chars: Maximum number of characters to extract
+            
+        Returns:
+            Extracted main content as string
+        """
+        if not isinstance(soup, BeautifulSoup):
+            return str(soup)[:max_chars]
+        
+        # Try to find main content elements in order of priority
+        main_content = None
+        for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
+            content = soup.select_one(selector)
+            if content:
+                main_content = content
+                break
+        
+        # If no main content found, use the body
+        if not main_content:
+            main_content = soup.find('body') or soup
+        
+        # Remove script, style, nav, footer elements that don't contribute to main content
+        for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
+            element.extract()
+        
+        # Extract text content
+        text_content = main_content.get_text(separator=' ', strip=True)
+        
+        # Limit to max_chars
+        return text_content[:max_chars]
+
+    def tokenize_for_counting(self, text: str) -> List[str]:
+        """
+        Simple token counter for estimating token usage.
+        This is an approximation since the exact tokenization depends on the model.
+        
+        Args:
+            text: Input text
+            
+        Returns:
+            List of tokens
+        """
+        # Simple tokenization by splitting on whitespace and punctuation
+        import re
+        return re.findall(r'\w+|[^\w\s]', text)
+
+    def count_tokens(self, text: str) -> int:
+        """
+        Count the approximate number of tokens in a text.
+        
+        Args:
+            text: Input text
+            
+        Returns:
+            Estimated token count
+        """
+        tokens = self.tokenize_for_counting(text)
+        return len(tokens)
+
+    def limit_text_for_api(self, text: str, max_tokens: int = 60000) -> str:
+        """
+        Limit the text to a maximum number of tokens.
+        
+        Args:
+            text: Input text
+            max_tokens: Maximum number of tokens allowed
+            
+        Returns:
+            Limited text
+        """
+        if not text:
+            return ""
+        
+        tokens = self.tokenize_for_counting(text)
+        
+        # If text is already under the limit, return as is
+        if len(tokens) <= max_tokens:
+            return text
+        
+        # Otherwise, truncate text to max_tokens
+        return " ".join(tokens[:max_tokens]) + "... [content truncated due to length]"
+
+    def search_web(self, query: str) -> List[Dict]:
+        formatted_query = quote_plus(query)
+        url = f"https://html.duckduckgo.com/html/?q={formatted_query}"
+        
+        search_results_soup = self.read_url(url)
+        if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0:
+            logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
+            return []
+        
+        # Extract search results
+        results = []
+        
+        # Find all result containers
+        result_elements = search_results_soup.select('.result')
+
+        for result in result_elements:
+            # Extract title
+            title_element = result.select_one('.result__a')
+            title = title_element.text.strip() if title_element else 'No title'
+            
+            # Extract URL (DuckDuckGo uses redirects, need to extract from href param)
+            url_element = title_element.get('href') if title_element else ''
+            extracted_url = 'No URL'
+            
+            if url_element:
+                # Extract the actual URL from DuckDuckGo's redirect
+                if url_element.startswith('/d.js?q='):
+                    start = url_element.find('?q=') + 3  # Skip '?q='
+                    end = url_element.find('&', start) if '&' in url_element[start:] else None
+                    extracted_url = unquote(url_element[start:end])
+                    
+                    # Make sure the URL has the correct protocol prefix
+                    if not extracted_url.startswith(('http://', 'https://')):
+                        if not extracted_url.startswith('//'):
+                            extracted_url = 'https://' + extracted_url
+                        else:
+                            extracted_url = 'https:' + extracted_url
+                else:
+                    extracted_url = url_element
+            
+            # Extract snippet directly from search results page
+            snippet_element = result.select_one('.result__snippet')
+            snippet = snippet_element.text.strip() if snippet_element else 'No description'
+            
+            # Now fetch the actual page content for the data field
+            target_page_soup = self.read_url(extracted_url)
+            
+            # Use the new content extraction method to limit content size
+            content = self.extract_main_content(target_page_soup, max_chars=30000)
+            
+            results.append({
+                'title': title,
+                'url': extracted_url,
+                'snippet': snippet,
+                'data': content
+            })
+            
+            # Limit the number of results if needed
+            if len(results) >= self.max_result:
+                break
+                
+        return results
+
+    def read_url(self, url: str) -> BeautifulSoup:
+        """
+        Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
+        Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben.
+        
+        Args:
+            url: Die zu lesende URL
+            
+        Returns:
+            BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
+        """
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml',
+            'Accept-Language': 'en-US,en;q=0.9',
+        }
+        
+        try:
+            import time
+            
+            # Initialer Request
+            response = requests.get(url, headers=headers, timeout=10)
+            
+            # Polling für Status 202
+            if response.status_code == 202:
+                # Maximal 3 Versuche mit steigenden Intervallen
+                backoff_times = [0.5, 1.0, 2.0, 5.0]  # 0.5s, dann 1s, dann 2s
+                
+                for wait_time in backoff_times:
+                    time.sleep(wait_time)  # Warten mit steigender Zeit
+                    response = requests.get(url, headers=headers, timeout=10)
+                    
+                    # Wenn kein 202 mehr, dann abbrechen
+                    if response.status_code != 202:
+                        break
+            
+            # Für andere Fehler-Status einen Fehler auslösen
+            response.raise_for_status()
+            
+            # HTML parsen
+            return BeautifulSoup(response.text, 'html.parser')
+            
+        except Exception as e:
+            # Leeres BeautifulSoup-Objekt erstellen
+            return BeautifulSoup("<html><body></body></html>", 'html.parser')
+        
+    def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
+        # Extract snippet/description
+        snippet_element = data.select_one('.result__snippet')
+        snippet = snippet_element.text.strip() if snippet_element else 'No description'
+        
+        result={
+            'title': title,
+            'url': url,
+            'snippet': snippet,
+            'data': data.prettify()
+        }
+        return result
+
+
+# Singleton-Instanz
+_webcrawler_agent = None
+
+def get_webcrawler_agent():
+    """Gibt eine Singleton-Instanz des WebCrawler-Agenten zurück"""
+    global _webcrawler_agent
+    if _webcrawler_agent is None:
+        _webcrawler_agent = WebcrawlerAgent()
+    return _webcrawler_agent
--- a/gwserver/modules/agentservice_agent_analyst.py
+++ b/gwserver/modules/agentservice_agent_analyst.py
@ -38,11 +38,15 @@ class AnalystAgent(BaseAgent):
        self.description = "Analyzes and interprets data"
        self.capabilities = "data_analysis,pattern_recognition,statistics,visualization,data_interpretation"
        self.result_format = "AnalysisReport"
-        
+
+        # Initialize AI service
+        self.ai_service = None
+
        # Document capabilities
        self.supports_documents = True
        self.document_capabilities = ["read", "analyze", "extract"]
        self.required_context = ["data_source", "analysis_objectives"]
+        self.document_handler = None
        
        # Initialize protocol
        self.protocol = AgentCommunicationProtocol()
@ -68,7 +72,11 @@ class AnalystAgent(BaseAgent):
            }
        })
        return info
-    
+
+    def set_document_handler(self, document_handler):
+        """Set the document handler for file operations"""
+        self.document_handler = document_handler
+            
    async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Process a message and perform data analysis.
@ -277,7 +285,7 @@ class AnalystAgent(BaseAgent):
        for document in message.get("documents", []):
            source = document.get("source", {})
            filename = source.get("name", "")
-            file_id = source.get("id", "")
+            file_id = source.get("id", 0)
            content_type = source.get("content_type", "")
            
            # Skip if not a recognizable data file
@ -288,8 +296,8 @@ class AnalystAgent(BaseAgent):
                # Try to get file content through document handler first
                file_content = None
                if self.document_handler:
-                    file_content = await self.document_handler.get_file_content(file_id)
-                
+                    file_content = self.document_handler.get_file_content_from_message(message, file_id=file_id)
+                                            
                # Process based on file type
                if filename.lower().endswith('.csv'):
                    df = self._process_csv(file_content, filename)
@ -323,33 +331,42 @@ class AnalystAgent(BaseAgent):
        
        return False
    
-    def _process_csv(self, file_content: bytes, filename: str) -> Optional[pd.DataFrame]:
+    def _process_csv(self, file_content: Union[bytes, str], filename: str) -> Optional[pd.DataFrame]:
        """Process CSV file content into a pandas DataFrame"""
        if file_content is None:
            return None
        
        try:
-            # Try various encodings
-            for encoding in ['utf-8', 'latin1', 'cp1252']:
-                try:
-                    # Use StringIO to create a file-like object
-                    text_content = file_content.decode(encoding)
-                    df = pd.read_csv(io.StringIO(text_content))
-                    
-                    # Basic preprocessing
-                    df = self._preprocess_dataframe(df)
-                    return df
-                except UnicodeDecodeError:
-                    continue
-                except Exception as e:
-                    logger.error(f"Error processing CSV with {encoding} encoding: {str(e)}")
-            
-            # If all encodings fail, try one more time with errors='replace'
-            text_content = file_content.decode('utf-8', errors='replace')
-            df = pd.read_csv(io.StringIO(text_content))
-            df = self._preprocess_dataframe(df)
-            return df
-            
+            # Handle the case where file_content is already a string
+            if isinstance(file_content, str):
+                text_content = file_content
+                df = pd.read_csv(io.StringIO(text_content))
+                df = self._preprocess_dataframe(df)
+                return df
+                
+            # Handle the case where file_content is bytes
+            else:
+                # Try various encodings
+                for encoding in ['utf-8', 'latin1', 'cp1252']:
+                    try:
+                        # Use StringIO to create a file-like object
+                        text_content = file_content.decode(encoding)
+                        df = pd.read_csv(io.StringIO(text_content))
+                        
+                        # Basic preprocessing
+                        df = self._preprocess_dataframe(df)
+                        return df
+                    except UnicodeDecodeError:
+                        continue
+                    except Exception as e:
+                        logger.error(f"Error processing CSV with {encoding} encoding: {str(e)}")
+                
+                # If all encodings fail, try one more time with errors='replace'
+                text_content = file_content.decode('utf-8', errors='replace')
+                df = pd.read_csv(io.StringIO(text_content))
+                df = self._preprocess_dataframe(df)
+                return df
+                
        except Exception as e:
            logger.error(f"Failed to process CSV file {filename}: {str(e)}")
            return None
@ -1482,6 +1499,7 @@ class AnalystAgent(BaseAgent):
            Generated analysis
        """
        if not self.ai_service:
+            logging.warning("AI service not available for analysis generation")
            return f"## Data Analysis ({analysis_type})\n\nUnable to generate analysis: AI service not available."
        
        # Create specialized prompt based on analysis type
--- a/gwserver/modules/agentservice_agent_coder.py
+++ b/gwserver/modules/agentservice_agent_coder.py
@ -1,6 +1,7 @@
 """
 CoderAgent - A unified agent for developing and executing Python code.
 Includes code execution capabilities previously in separate modules.
+Enhanced with auto-correction loop for handling execution errors.
 """

 import logging
@ -13,6 +14,7 @@ import subprocess
 import tempfile
 import shutil
 import sys
+import pandas as pd
 from datetime import datetime
 from typing import List, Dict, Any, Optional, Tuple

@ -23,7 +25,9 @@ from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtoc

 logger = logging.getLogger(__name__)

+# Existing SimpleCodeExecutor class remains unchanged
 class SimpleCodeExecutor:
+    # ... existing code ...
    """
    A simplified executor that runs Python code in isolated virtual environments.
    """
@ -36,7 +40,8 @@ class SimpleCodeExecutor:
                 timeout: int = 30,
                 max_memory_mb: int = 512,
                 requirements: List[str] = None,
-                 blocked_packages: List[str] = None):
+                 blocked_packages: List[str] = None,
+                 ai_service = None):
        """
        Initialize the SimpleCodeExecutor.
        
@ -57,6 +62,7 @@ class SimpleCodeExecutor:
            "tensorflow", "pytorch", "scikit-learn"  # Resource intensive
        ]
        self.is_persistent = workflow_id is not None
+        self.ai_service = ai_service
        
    @classmethod
    def get_workflow_environment(cls, workflow_id: str) -> Optional[str]:
@ -67,7 +73,7 @@ class SimpleCodeExecutor:
    def set_workflow_environment(cls, workflow_id: str, env_path: str) -> None:
        """Store a workflow environment path."""
        cls._workflow_environments[workflow_id] = env_path
-    
+
    def _create_venv(self) -> str:
        """Creates a virtual environment and returns the path."""
        # Check for existing environment if using workflow_id
@ -176,50 +182,16 @@ class SimpleCodeExecutor:
            return False
    
    def _extract_required_packages(self, code: str) -> List[str]:
-        """Extract required packages from import statements and requirements comments in the code."""
-        import re
+        #  Extract required packages from requirements comments in the 1st code line
        packages = set()
-        
-        # Check for special REQUIREMENTS comment
-        requirements_match = re.search(r'# REQUIREMENTS:\s*([^\n]+)', code)
-        if requirements_match:
-            req_str = requirements_match.group(1).strip()
-            for pkg in req_str.split(','):
-                if pkg.strip():
-                    packages.add(pkg.strip())
-        
-        # Add common base packages
-        base_packages = [
-            "requests", "urllib3", "pydantic", 
-            "pandas", "numpy", "matplotlib"
-        ]
-        
-        for pkg in base_packages:
-            packages.add(pkg)
-                                
-        # Detect pip install comments
-        pip_comments = re.findall(r'#\s*pip\s+install\s+([^#\n]+)', code)
-        for comment in pip_comments:
-            for pkg in comment.split():
-                if pkg and not pkg.startswith('-'):
-                    packages.add(pkg.strip())
-        
-        # Analyze import statements
-        import_lines = re.findall(r'^(?:import|from)\s+([^\s.]+)(?:\s+import|\s*$|\.)', code, re.MULTILINE)
-        
-        # Standard modules that don't need installation
-        std_modules = {
-            'os', 'sys', 'time', 'datetime', 'math', 're', 'random', 'json',
-            'collections', 'itertools', 'functools', 'pathlib', 'shutil',
-            'tempfile', 'uuid', 'subprocess', 'threading', 'logging',
-            'traceback', 'io', 'copy', 'typing', 'asyncio'
-        }
-        
-        # Process all imports
-        for module in import_lines:
-            if module not in std_modules:
-                packages.add(module)
-        
+        # Check for special REQUIREMENTS comment - specific format we're looking for
+        first_lines = code.split('\n')[:5]  # Only check first few lines
+        for line in first_lines:
+            if line.strip().startswith("# REQUIREMENTS:"):
+                req_str = line.replace("# REQUIREMENTS:", "").strip()
+                for pkg in req_str.split(','):
+                    if pkg.strip():
+                        packages.add(pkg.strip())
        return list(packages)


@ -247,8 +219,8 @@ class SimpleCodeExecutor:
        all_requirements = []
        
        # Add explicitly provided requirements
-        if self.requirements:
-            all_requirements.extend(self.requirements)
+        # if self.requirements:
+        #    all_requirements.extend(self.requirements)
        
        # Extract requirements from code
        extracted_requirements = self._extract_required_packages(code)
@ -340,12 +312,13 @@ class SimpleCodeExecutor:
            # Run the code from root dir
            working_dir = os.path.dirname(code_file)   # This should be the project root
            logger.info(f"DEBUG PATH Root: {os.getcwd()} Code: {code_file} Working Dir: {working_dir}")
+            logger.debug(f"|{code}|")
            process = subprocess.run(
                [python_executable, code_file],
                timeout=self.timeout,
                capture_output=True,
                text=True,
-                cwd=self.temp_dir
+                cwd=working_dir
            )

            # Process the output
@ -393,7 +366,7 @@ class SimpleCodeExecutor:
            execution_result = {
                "success": False,
                "output": "",
-                "error": f"Execution error: {str(e)}",
+                "error": f"Execution error: {str(e)} for code {code}",
                "result": None,
                "exit_code": -1
            }
@ -426,7 +399,7 @@ class SimpleCodeExecutor:
        """Clean up during garbage collection."""
        self.cleanup()

-
+# Unchanged error recommendation function
 def get_error_recommendation(error_message: str) -> str:
    """Generate recommendations based on error message."""
    if "ImportError" in error_message or "ModuleNotFoundError" in error_message:
@ -460,7 +433,7 @@ To fix the error:


 class CoderAgent(BaseAgent):
-    """Agent for developing and executing Python code"""
+    """Agent for developing and executing Python code with auto-correction capabilities"""
    
    def __init__(self):
        """Initialize the coder agent with proper type and capabilities"""
@ -473,11 +446,15 @@ class CoderAgent(BaseAgent):
        self.description = "Develops and executes Python code"
        self.capabilities = "code_development,data_processing,file_processing,automation"
        self.result_format = "python_code"
-        
+
+        # Initialize AI service
+        self.ai_service = None
+
        # Add document capabilities
        self.supports_documents = True
        self.document_capabilities = ["read", "reference", "create"]
        self.required_context = ["workflow_id"]
+        self.document_handler = None
        
        # Initialize protocol
        self.protocol = AgentCommunicationProtocol()
@ -491,8 +468,12 @@ class CoderAgent(BaseAgent):
        self.executor_memory_limit = 512  # MB

        # AI service settings
-        self.ai_temperature = 0.2  # Lower temperature for more deterministic code generation
+        self.ai_temperature = 0.1  # Lower temperature for more deterministic code generation
        self.ai_max_tokens = 2000  # Enough tokens for complex code
+
+        # Auto-correction settings (new)
+        self.max_correction_attempts = 3  # Maximum number of correction attempts
+        self.correction_temperature = 0.1  # Even lower temperature for corrections
                
    def get_agent_info(self) -> Dict[str, Any]:
        """Get agent information for agent registry"""
@ -500,14 +481,20 @@ class CoderAgent(BaseAgent):
        info.update({
            "metadata": {
                "timeout": self.executor_timeout,
-                "memory_limit": self.executor_memory_limit
+                "memory_limit": self.executor_memory_limit,
+                "max_correction_attempts": self.max_correction_attempts
            }
        })
        return info
-    
+
+    def set_document_handler(self, document_handler):
+        """Set the document handler for file operations"""
+        self.document_handler = document_handler
+
+
    async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
        """
-        Process a message to develop and execute Python code.
+        Process a message to develop and execute Python code with auto-correction.
        
        Args:
            message: The message to process
@ -539,34 +526,23 @@ class CoderAgent(BaseAgent):
            content = message.get("content", "")
            documents = message.get("documents", [])
            
-            # Extract code from message content
-            code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', content)
            code_to_execute = None
            requirements = []
            
-            if code_blocks:
-                # Use the first code block found
-                code_to_execute = code_blocks[0]
-                # Clean the code to remove any markdown formatting
-                code_to_execute = self._clean_code(code_to_execute)
-                logging_utils.info(f"Code extracted from message ({len(code_to_execute)} characters)", "agents")
-            else:
-                # Generate code based on the message content using AI
-                logging_utils.info("No code found in message, generating new code with AI", "agents")
-                
-                # Generate code using AI
-                code_to_execute, requirements = await self._generate_code_from_prompt(content, documents)
-                if not code_to_execute:
-                    logging_utils.warning("AI could not generate code", "agents")
-                    response["content"] = "I couldn't generate executable code based on your request. Please provide more detailed instructions."
-                    self.message_utils.finalize_message(response)
-                    return response
-                logging_utils.info(f"Code generated with AI ({len(code_to_execute)} characters)", "agents")            
+            # Generate code based on the message content using AI
+            logging_utils.info("Generating new code with AI", "agents")
+            
+            # Generate code using AI
+            code_to_execute, requirements = await self._generate_code_from_prompt(content, documents)
+            if not code_to_execute:
+                logging_utils.warning("AI could not generate code", "agents")
+                response["content"] = "I couldn't generate executable code based on your request. Please provide more detailed instructions."
+                self.message_utils.finalize_message(response)
+                return response
+            logging_utils.info(f"Code generated with AI ({len(code_to_execute)} characters)", "agents")            

-            # Execute the code
+            # Execute the code with auto-correction loop
            if code_to_execute:
-                logging_utils.info("Executing code", "execution")
-                
                # Prepare execution context
                execution_context = {
                    "workflow_id": workflow_id,
@ -575,21 +551,16 @@ class CoderAgent(BaseAgent):
                    "log_func": log_func
                }
                
-                # Send a status update
-                if log_func:
-                    status_message = self.protocol.create_status_update_message(
-                        status_description="Processing code execution request",
-                        sender_id=self.id,
-                        status="in_progress",
-                        progress=0.5,
-                        context_id=workflow_id
-                    )
-                    log_func(workflow_id, status_message.content, "info", self.id, self.name)
-
-                # Execute code
-                result = await self._execute_code(code_to_execute, requirements, execution_context)
+                # Enhanced execution with auto-correction
+                result, attempts_info = await self._execute_with_auto_correction(
+                    code_to_execute, 
+                    requirements, 
+                    execution_context,
+                    content,  # Original prompt/message
+                    logging_utils
+                )
                
-                # Prepare response
+                # Prepare response based on the final result (success or failure)
                if result.get("success", False):
                    # Code execution successful
                    output = result.get("output", "")
@ -597,20 +568,39 @@ class CoderAgent(BaseAgent):
                    logging_utils.info("Code executed successfully", "execution")
                    
                    # Format response content
-                    response_content = f"## Code executed successfully\n\n"
+                    response_content = f"## Code executed successfully"
+                    
+                    # Add correction attempts info if any corrections were made
+                    if attempts_info and len(attempts_info) > 1:
+                        response_content += f" (after {len(attempts_info)-1} correction attempts)"
+                    
+                    response_content += "\n\n"
                    
                    # Include the executed code
-                    response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"
+                    response_content += f"### Final Executed Code\n\n```python\n{attempts_info[-1]['code']}\n```\n\n"
                    
                    # Include the output if available
                    if output:
                        response_content += f"### Output\n\n```\n{output}\n```\n\n"
+                        
+                        # Create document with results
+                        data_document = self._create_document_from_result(execution_result)
+                        if data_document:
+                            response["documents"].append(data_document)
                    
                    # Include the execution result if available
                    if execution_result:
                        result_str = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result)
                        response_content += f"### Result\n\n```\n{result_str}\n```\n\n"
                    
+                    # Include correction history if any corrections were made
+                    if attempts_info and len(attempts_info) > 1:
+                        response_content += f"### Code Correction History\n\n"
+                        for i, attempt in enumerate(attempts_info[:-1], 1):
+                            response_content += f"**Attempt {i}:**\n\n"
+                            response_content += f"```python\n{attempt['code']}\n```\n\n"
+                            response_content += f"**Error:**\n\n```\n{attempt['error']}\n```\n\n"
+                
                    response["content"] = response_content
                    
                    # Process any files created by the code
@ -628,17 +618,38 @@ class CoderAgent(BaseAgent):
                                }
                                response["documents"].append(doc)
                else:
-                    # Code execution failed
+                    # Code execution failed after all attempts
                    error = result.get("error", "Unknown error")
-                    logging_utils.error(f"Error during code execution: {error}", "execution")
+                    logging_utils.error(f"Error during code execution after all correction attempts: {error}", "execution")
                    
                    # Format error response
                    response_content = f"## Error during code execution\n\n"
-                    response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"
-                    response_content += f"### Error\n\n```\n{error}\n```\n\n"
                    
-                    # Add recommendation based on error
-                    response_content += get_error_recommendation(error)
+                    # Include correction attempts information
+                    if attempts_info:
+                        response_content += f"I made {len(attempts_info)} attempts to correct the code, but couldn't resolve all issues.\n\n"
+                        
+                        # Add the final attempt
+                        response_content += f"### Final Code Attempt\n\n```python\n{attempts_info[-1]['code']}\n```\n\n"
+                        response_content += f"### Final Error\n\n```\n{attempts_info[-1]['error']}\n```\n\n"
+                        
+                        # Add recommendation based on error
+                        response_content += get_error_recommendation(error)
+                        
+                        # Add correction history
+                        if len(attempts_info) > 1:
+                            response_content += f"\n### Code Correction History\n\n"
+                            for i, attempt in enumerate(attempts_info[:-1], 1):
+                                response_content += f"**Attempt {i}:**\n\n"
+                                response_content += f"```python\n{attempt['code']}\n```\n\n"
+                                response_content += f"**Error:**\n\n```\n{attempt['error']}\n```\n\n"
+                    else:
+                        # Just show the code and error
+                        response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"
+                        response_content += f"### Error\n\n```\n{error}\n```\n\n"
+                        
+                        # Add recommendation based on error
+                        response_content += get_error_recommendation(error)
                    
                    response["content"] = response_content
            else:
@ -650,7 +661,7 @@ class CoderAgent(BaseAgent):
            
            # Log success
            logging_utils.info("CoderAgent has successfully processed the request", "agents")
-            
+
            return response
            
        except Exception as e:
@ -662,7 +673,253 @@ class CoderAgent(BaseAgent):
            self.message_utils.finalize_message(response)
            
            return response
+
+    def _create_document_from_result(self, execution_result, output_format="json"):
+        """
+        Create a document object from execution results
+        
+        Args:
+            execution_result: The data returned from code execution
+            output_format: Desired format (json, csv, etc.)
+            
+        Returns:
+            Document object for passing to other agents
+        """
+        if not execution_result:
+            return None
+            
+        doc_id = f"data_{uuid.uuid4()}"
+        
+        # Determine filename and content type based on the data
+        if isinstance(execution_result, pd.DataFrame):
+            # Handle DataFrame result
+            filename = "processed_data.csv"
+            content_type = "text/csv"
+            content = execution_result.to_csv(index=False)
+        elif isinstance(execution_result, dict) or isinstance(execution_result, list):
+            # Handle dictionary or list result
+            filename = "processed_data.json"
+            content_type = "application/json"
+            content = json.dumps(execution_result)
+        elif isinstance(execution_result, str):
+            # Try to determine if string is JSON, CSV, or plain text
+            if execution_result.strip().startswith('{') or execution_result.strip().startswith('['):
+                filename = "processed_data.json"
+                content_type = "application/json"
+            elif ',' in execution_result and '\n' in execution_result:
+                filename = "processed_data.csv" 
+                content_type = "text/csv"
+            else:
+                filename = "processed_data.txt"
+                content_type = "text/plain"
+            content = str(execution_result)
+        else:
+            # Default case for other types
+            filename = "processed_data.txt"
+            content_type = "text/plain"
+            content = str(execution_result)
+        
+        # Create document object
+        document = {
+            "id": doc_id,
+            "source": {
+                "type": "generated",
+                "id": doc_id,
+                "name": filename,
+                "content_type": content_type,
+            },
+            "contents": [{
+                "type": "text",
+                "text": content,
+                "is_extracted": True
+            }]
+        }
+        
+        return document
    
+    async def _execute_with_auto_correction(
+        self, 
+        initial_code: str, 
+        requirements: List[str], 
+        context: Dict[str, Any],
+        original_prompt: str,
+        logging_utils: LoggingUtils = None
+    ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
+        """
+        Execute code with automatic error correction and retries.
+        
+        Args:
+            initial_code: The initial Python code to execute
+            requirements: List of required packages
+            context: Additional context for execution
+            original_prompt: The original user request/prompt
+            logging_utils: Optional logging utility
+            
+        Returns:
+            Tuple of (final execution result, list of attempt info dictionaries)
+        """
+        # Initialize tracking data
+        current_code = initial_code
+        current_requirements = requirements.copy() if requirements else []
+        attempts_info = []
+        
+        # Execute with correction loop
+        for attempt in range(1, self.max_correction_attempts + 1):
+            if logging_utils:
+                if attempt == 1:
+                    logging_utils.info(f"Executing code (attempt {attempt}/{self.max_correction_attempts})", "execution")
+                else:
+                    logging_utils.info(f"Executing corrected code (attempt {attempt}/{self.max_correction_attempts})", "execution")
+            
+            # Execute the current code version
+            result = await self._execute_code(current_code, current_requirements, context)
+            
+            # Record attempt information
+            attempts_info.append({
+                "attempt": attempt,
+                "code": current_code,
+                "error": result.get("error", ""),
+                "success": result.get("success", False)
+            })
+
+            # Check if execution was successful
+            if result.get("success", False):
+                # Success! Return the result and attempt info
+                return result, attempts_info
+            
+            # Failed execution - check if we've reached the maximum attempt limit
+            if attempt >= self.max_correction_attempts:
+                if logging_utils:
+                    logging_utils.warning(f"Maximum correction attempts ({self.max_correction_attempts}) reached, giving up", "execution")
+                break
+                
+            # Need to correct the code - generate a fix based on the error
+            error_message = result.get("error", "Unknown error")
+            
+            if logging_utils:
+                logging_utils.info(f"Attempting to fix code error: {error_message[:200]}...", "execution")
+            
+            # Generate corrected code
+            corrected_code, new_requirements = await self._generate_code_correction(
+                current_code, 
+                error_message, 
+                original_prompt,
+                current_requirements
+            )
+            
+            # Update for next attempt
+            if corrected_code:
+                current_code = corrected_code
+                
+                # Add any new requirements
+                if new_requirements:
+                    for req in new_requirements:
+                        if req not in current_requirements:
+                            current_requirements.append(req)
+                            if logging_utils:
+                                logging_utils.info(f"Added new requirement: {req}", "execution")
+            else:
+                # Could not generate correction, break out of the loop
+                if logging_utils:
+                    logging_utils.warning("Could not generate code correction, giving up", "execution")
+                break
+        
+        # If we get here, all attempts failed - return the last result and attempt info
+        return result, attempts_info
+
+    async def _generate_code_correction(
+        self, 
+        code: str, 
+        error_message: str, 
+        original_prompt: str,
+        current_requirements: List[str] = None
+    ) -> Tuple[str, List[str]]:
+        """
+        Generate a corrected version of code based on error messages.
+        
+        Args:
+            code: The code that produced errors
+            error_message: The error message to fix
+            original_prompt: The original task/requirements
+            current_requirements: List of currently required packages
+            
+        Returns:
+            Tuple of (corrected code, new requirements list)
+        """
+        try:
+            # Create a detailed prompt for code correction
+            correction_prompt = f"""You need to fix an error in Python code. The code was written for this task:
+
+ORIGINAL TASK:
+{original_prompt}
+
+CURRENT CODE:
+```python
+{code}
+```
+
+ERROR MESSAGE:
+```
+{error_message}
+```
+
+CURRENT REQUIREMENTS: {', '.join(current_requirements) if current_requirements else "None"}
+
+Your task is to analyze the error and provide a corrected version of the code.
+Focus specifically on fixing the error while preserving the original functionality.
+
+Common fixes might include:
+- Fixing syntax errors (missing parentheses, indentation, etc.)
+- Resolving import errors by adding appropriate requirements
+- Correcting file paths or handling file not found errors
+- Adding error handling for specific edge cases
+- Fixing logical errors in the code
+
+FORMAT INSTRUCTIONS:
+1. Provide ONLY the complete fixed Python code without ANY explanation
+2. DO NOT include code block markers like ```python or ```
+3. DO NOT explain what the code does before or after it
+4. DO NOT include any text that is not valid Python code
+5. Start your response directly with the valid Python code
+6. End your response with valid Python code
+
+If you need to add new required packages, place them in a specially formatted comment at the top of your code like this:
+# REQUIREMENTS: package1,package2,package3
+
+Your entire response must be valid Python that can be executed without modification.
+"""
+            
+            # Create messages for the API
+            messages = [
+                {"role": "system", "content": "You are a Python debugging expert. You provide ONLY clean, fixed Python code without any explanations, markdown formatting, or non-code text. Your response should be nothing but valid, fixed Python code that can be executed directly."},
+                {"role": "user", "content": correction_prompt}
+            ]
+            
+            # Call the API with very low temperature for deterministic fixes
+            generated_content = await self.ai_service.call_api(
+                messages, 
+                temperature=self.correction_temperature, 
+                max_tokens=self.ai_max_tokens
+            )
+            
+            # Clean the generated content to ensure it's only valid Python code
+            fixed_code = self._clean_code(generated_content)
+            
+            # Extract requirements from special comment at the top of the code
+            new_requirements = []
+            for line in fixed_code.split('\n'):
+                if line.strip().startswith("# REQUIREMENTS:"):
+                    req_str = line.replace("# REQUIREMENTS:", "").strip()
+                    new_requirements = [r.strip() for r in req_str.split(',') if r.strip()]
+                    break
+            
+            return fixed_code, new_requirements
+                    
+        except Exception as e:
+            logging.error(f"Error generating code correction: {str(e)}", exc_info=True)
+            # Return None to indicate failure
+            return None, []
+
    def _clean_code(self, code: str) -> str:
        """
        Clean up code by removing markdown code block markers and other formatting artifacts.
@ -728,9 +985,6 @@ class CoderAgent(BaseAgent):
            Tuple of (generated Python code, required packages)
        """
        try:
-            # Initialize AI service
-            chat_service = ChatService()
-            
            # Prepare a prompt for code generation
            ai_prompt = f"""Generate Python code to solve the following task:
 {prompt}
@ -767,7 +1021,7 @@ FORMAT INSTRUCTIONS:
 - Start your response directly with valid Python code
 - End your response with valid Python code

-For required packages, place them in a specially formatted comment at the top of your code like this:
+For required packages, place them in a specially formatted comment at the top of your code one one line like this:
 # REQUIREMENTS: pandas,numpy,matplotlib,requests

 Your entire response must be valid Python that can be executed without modification.
@ -781,7 +1035,7 @@ Your entire response must be valid Python that can be executed without modificat
            
            # Call the API
            logging.info(f"Calling AI API to generate code")
-            generated_content = await chat_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens)
+            generated_content = await self.ai_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens)
            
            # Clean the generated content to ensure it's only valid Python code
            code = self._clean_code(generated_content)
@ -843,7 +1097,8 @@ result = {{"error": "Code generation failed", "message": "{error_str}"}}
                timeout=self.executor_timeout,
                max_memory_mb=self.executor_memory_limit,
                requirements=requirements,
-                blocked_packages=blocked_packages
+                blocked_packages=blocked_packages,
+                ai_service = self.ai_service
            )
            
            # Prepare input data for the code
@ -924,7 +1179,6 @@ result = {{"error": "Code generation failed", "message": "{error_str}"}}
                    # Log error information
                    error = result.get("error", "Unknown error")
                    logging_utils.error(f"Error during code execution: {error}", "execution")
-                    print("DEBUG CODE-ERROR:",code,"#END")
            
            # Clean up non-persistent environments
            if not executor.is_persistent:
--- a/gwserver/modules/agentservice_agent_documentation.py
+++ b/gwserver/modules/agentservice_agent_documentation.py
@ -13,7 +13,6 @@ from datetime import datetime
 import uuid

 from modules.agentservice_base import BaseAgent
-from connectors.connector_aichat_openai import ChatService
 from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
 from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol
 from modules.agentservice_filemanager import FileManager  # Import the file manager
@ -63,7 +62,11 @@ class DocumentationAgent(BaseAgent):
            }
        })
        return info
-    
+
+    def set_document_handler(self, document_handler):
+        """Set the document handler for file operations"""
+        self.document_handler = document_handler
+
    async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Process a message and create documentation.
--- a/gwserver/modules/agentservice_agent_webcrawler.py
+++ b/gwserver/modules/agentservice_agent_webcrawler.py
@ -5,21 +5,16 @@ Angepasst für das refaktorisierte Core-Modul.

 import json
 import logging
-import random
 import time
 import traceback
-from typing import List, Dict, Any, Optional, Union
-import re
-import uuid
-from datetime import datetime
+from typing import List, Dict, Any, Optional
 from urllib.parse import quote_plus, unquote

 from bs4 import BeautifulSoup
 import requests
 from modules.agentservice_base import BaseAgent
-from connectors.connector_aichat_openai import ChatService
-from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
-from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol
+from modules.agentservice_utils import MessageUtils, LoggingUtils
+from modules.agentservice_protocol import AgentCommunicationProtocol


 logger = logging.getLogger(__name__)
@ -42,13 +37,14 @@ class WebcrawlerAgent(BaseAgent):
        self.supports_documents = True
        self.document_capabilities = ["read", "create"]
        self.required_context = ["workflow_id"]
-        
+        self.document_handler = None
+
+        # Initialize AI service
+        self.ai_service = None
+
        # Initialize protocol
        self.protocol = AgentCommunicationProtocol()
        
-        # Chat-Service initialisieren
-        self.chat_service = ChatService()
-        
        # Utility-Klassen initialisieren
        self.message_utils = MessageUtils()
        
@ -57,17 +53,6 @@ class WebcrawlerAgent(BaseAgent):
        self.max_key = 3
        self.max_result = 3
        self.timeout = 10
-        self.headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-            'Accept-Language': 'en-US,en;q=0.5',
-            'Referer': 'https://www.google.com/',
-            'DNT': '1',
-            'Connection': 'keep-alive',
-            'Upgrade-Insecure-Requests': '1',
-        }
-        self.max_urls = 10
-        self.max_content_length = 100000

    def get_agent_info(self) -> Dict[str, Any]:
        """Get agent information for agent registry"""
@ -80,7 +65,11 @@ class WebcrawlerAgent(BaseAgent):
            }
        })
        return info
-    
+
+    def set_document_handler(self, document_handler):
+        """Set the document handler for file operations"""
+        self.document_handler = document_handler
+            
    async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Verarbeitet eine Nachricht und führt eine Web-Recherche durch.
@ -172,29 +161,6 @@ class WebcrawlerAgent(BaseAgent):
            response["content"] = f"## Fehler bei der Web-Recherche\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
            
            return response
-
-    def send_document_request(self, document_description: str, sender_id: str, receiver_id: str, filters: Dict[str, Any] = None, context_id: str = None) -> AgentMessage:
-        """Send a document request using the protocol"""
-        return self.protocol.create_document_request_message(
-            document_description=document_description,
-            sender_id=sender_id,
-            receiver_id=receiver_id,
-            filters=filters,
-            context_id=context_id
-        )
-
-    def send_result_message(self, result_content: str, sender_id: str, receiver_id: str, task_id: str, 
-                        output_data: Dict[str, Any] = None, context_id: str = None) -> AgentMessage:
-        """Send a result message using the protocol"""
-        return self.protocol.create_result_message(
-            result_content=result_content,
-            sender_id=sender_id,
-            receiver_id=receiver_id,
-            task_id=task_id,
-            output_data=output_data,
-            result_format="SearchResults",
-            context_id=context_id
-        )
        
    async def get_prompt(self, message_context: Dict[str, Any]) -> str:
        task = message_context.get("content", "")
@ -232,7 +198,7 @@ class WebcrawlerAgent(BaseAgent):
                total_tokens += instruction_tokens
                
                # Zusätzliche Anweisungen für Web-Recherche
-                content_text = await self.chat_service.call_api(
+                content_text = await self.ai_service.call_api(
                    messages=[
                        {
                            "role": "system",
@ -263,7 +229,7 @@ class WebcrawlerAgent(BaseAgent):
            # Limit summary source to ensure we don't exceed API limits
            summary_src_limited = self.limit_text_for_api(summary_src, max_tokens=10000)
            
-            summary = await self.chat_service.call_api(
+            summary = await self.ai_service.call_api(
                messages=[
                    {
                        "role": "system",
@ -280,322 +246,329 @@ class WebcrawlerAgent(BaseAgent):
        result = f"## Web-Recherche Ergebnisse\n\n### Zusammenfassung\n{summary}\n\n### Detaillierte Ergebnisse{result_data}"
        return result

-        async def run_web_query(self, prompt: str) -> List[Dict]:
-            if prompt=="":
-                return []
+    async def run_web_query(self, prompt: str) -> List[Dict]:
+        if prompt=="":
+            return []

-            ptext=f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open.
+        ptext=f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open.

-            'url': A list of maximum {self.max_url} specific URLs extracted from the task string.
+        'url': A list of maximum {self.max_url} specific URLs extracted from the task string.

-            'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
-            
-            Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition.
-            """
+        'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
+        
+        Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition.
+        """

-            content_text = await self.chat_service.call_api(
-                messages=[
-                    {
-                        "role": "system",
-                        "content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."
-                    },
-                    {
-                        "role": "user",
-                        "content": ptext
-                    }
-                ]
-            )
-            # Remove markdown formatting if present
-            if content_text.startswith("```json"):
-                # Find the end of the JSON block
-                end_marker = "```"
-                end_index = content_text.rfind(end_marker)
-                if end_index != -1:
-                    # Extract the JSON content without the markdown markers
-                    content_text = content_text[7:end_index].strip()
-            
-            # Now parse the JSON
-            try:
-                logger.info(f"Valid json received: {str(content_text)}")
-                pjson = json.loads(content_text)
-                # Now call scrape_json with the parsed dictionary
-                result_json = await self.scrape_json(pjson)
-                return result_json
-            except json.JSONDecodeError as e:
-                logger.error(f"Failed to parse JSON: {e}")
-                logger.error(f"Cleaned content: {content_text[:100]}...")
-                return []
-
-        async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]:
-            """
-            Scrapes web content based on a research strategy JSON.
-            
-            Args:
-                research_strategy: A dictionary containing:
-                    - 'skey': List of search keywords
-                    - 'url': List of direct URLs to scrape
-                    
-            Returns:
-                Dictionary with URLs as keys and scraped content as values
-            """
-
-            logger.info("Starting JSON-based web scraping")
-            results = []
-            
-            # Validate input structure
-            if not isinstance(research_strategy, dict):
-                logger.error("Invalid research_strategy format: not a dictionary")
-                return {"error": "Invalid research_strategy format: not a dictionary"}
-            
-            keys = research_strategy.get("skey", [])
-            direct_urls = research_strategy.get("url", [])
-            
-            if not isinstance(keys, list) or not isinstance(direct_urls, list):
-                logger.error("Invalid research_strategy format: keys, or url is not a list")
-                return {"error": "Invalid research_strategy format: keys, or url is not a list"}
-            
-            # Process search keywords through search engine
-            for keyword in keys:
-                logger.info(f"Processing keyword: {keyword}")
-                found_results = self.search_web(keyword) #  List with Dict: title,url,snippet,data
-                logger.info(f"... {len(found_results)} results found")
-                results.extend(found_results)
-
-            # Process direct URLs
-            logger.info(f"Processing {len(direct_urls)} direct URLs")
-            for url in direct_urls:
-                if url in results:
-                    logger.info(f"Skipping already scraped URL: {url}")
-                    continue
-                soup=self.read_url(url)
-
-                # Extract title from the page if it exists
-                if isinstance(soup, BeautifulSoup):
-                    title_tag = soup.find('title')
-                    title = title_tag.text.strip() if title_tag else "No title"
-                    
-                    # Alternative: You could also look for h1 tags if the title tag is missing
-                    if title == "No title":
-                        h1_tag = soup.find('h1')
-                        if h1_tag:
-                            title = h1_tag.text.strip()
-                else:
-                    # Handle the case where soup is an error message string
-                    title = "Error fetching page"
-            
-                results.append(self.parse_result(soup,"No title",url))
-            logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total")
-            return results
-
-        def extract_main_content(self, soup: BeautifulSoup, max_chars: int = 30000) -> str:
-            """
-            Extract the main content from an HTML page while limiting character count.
-            
-            Args:
-                soup: BeautifulSoup object containing the page content
-                max_chars: Maximum number of characters to extract
-                
-            Returns:
-                Extracted main content as string
-            """
-            if not isinstance(soup, BeautifulSoup):
-                return str(soup)[:max_chars]
-            
-            # Try to find main content elements in order of priority
-            main_content = None
-            for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
-                content = soup.select_one(selector)
-                if content:
-                    main_content = content
-                    break
-            
-            # If no main content found, use the body
-            if not main_content:
-                main_content = soup.find('body') or soup
-            
-            # Remove script, style, nav, footer elements that don't contribute to main content
-            for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
-                element.extract()
-            
-            # Extract text content
-            text_content = main_content.get_text(separator=' ', strip=True)
-            
-            # Limit to max_chars
-            return text_content[:max_chars]
-
-        def tokenize_for_counting(self, text: str) -> List[str]:
-            """
-            Simple token counter for estimating token usage.
-            This is an approximation since the exact tokenization depends on the model.
-            
-            Args:
-                text: Input text
-                
-            Returns:
-                List of tokens
-            """
-            # Simple tokenization by splitting on whitespace and punctuation
-            import re
-            return re.findall(r'\w+|[^\w\s]', text)
-
-        def count_tokens(self, text: str) -> int:
-            """
-            Count the approximate number of tokens in a text.
-            
-            Args:
-                text: Input text
-                
-            Returns:
-                Estimated token count
-            """
-            tokens = self.tokenize_for_counting(text)
-            return len(tokens)
-
-        def limit_text_for_api(self, text: str, max_tokens: int = 60000) -> str:
-            """
-            Limit the text to a maximum number of tokens.
-            
-            Args:
-                text: Input text
-                max_tokens: Maximum number of tokens allowed
-                
-            Returns:
-                Limited text
-            """
-            if not text:
-                return ""
-            
-            tokens = self.tokenize_for_counting(text)
-            
-            # If text is already under the limit, return as is
-            if len(tokens) <= max_tokens:
-                return text
-            
-            # Otherwise, truncate text to max_tokens
-            return " ".join(tokens[:max_tokens]) + "... [content truncated due to length]"
-
-        def search_web(self, query: str) -> List[Dict]:
-            formatted_query = quote_plus(query)
-            url = f"https://html.duckduckgo.com/html/?q={formatted_query}"
-            
-            search_results_soup = self.read_url(url)
-            if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0:
-                logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
-                return []
-            
-            # Extract search results
-            results = []
-            
-            # Find all result containers
-            result_elements = search_results_soup.select('.result')
-
-            for result in result_elements:
-                # Extract title
-                title_element = result.select_one('.result__a')
-                title = title_element.text.strip() if title_element else 'No title'
-                
-                # Extract URL (DuckDuckGo uses redirects, need to extract from href param)
-                url_element = title_element.get('href') if title_element else ''
-                extracted_url = 'No URL'
-                
-                if url_element:
-                    # Extract the actual URL from DuckDuckGo's redirect
-                    if url_element.startswith('/d.js?q='):
-                        start = url_element.find('?q=') + 3  # Skip '?q='
-                        end = url_element.find('&', start) if '&' in url_element[start:] else None
-                        extracted_url = unquote(url_element[start:end])
-                        
-                        # Make sure the URL has the correct protocol prefix
-                        if not extracted_url.startswith(('http://', 'https://')):
-                            if not extracted_url.startswith('//'):
-                                extracted_url = 'https://' + extracted_url
-                            else:
-                                extracted_url = 'https:' + extracted_url
-                    else:
-                        extracted_url = url_element
-                
-                # Extract snippet directly from search results page
-                snippet_element = result.select_one('.result__snippet')
-                snippet = snippet_element.text.strip() if snippet_element else 'No description'
-                
-                # Now fetch the actual page content for the data field
-                target_page_soup = self.read_url(extracted_url)
-                
-                # Use the new content extraction method to limit content size
-                content = self.extract_main_content(target_page_soup, max_chars=30000)
-                
-                results.append({
-                    'title': title,
-                    'url': extracted_url,
-                    'snippet': snippet,
-                    'data': content
-                })
-                
-                # Limit the number of results if needed
-                if len(results) >= self.max_result:
-                    break
-                    
-            return results
-
-
-            def read_url(self, url: str) -> BeautifulSoup:
-                """
-                Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
-                Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben.
-                
-                Args:
-                    url: Die zu lesende URL
-                    
-                Returns:
-                    BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
-                """
-                headers = {
-                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-                    'Accept': 'text/html,application/xhtml+xml,application/xml',
-                    'Accept-Language': 'en-US,en;q=0.9',
+        content_text = await self.ai_service.call_api(
+            messages=[
+                {
+                    "role": "system",
+                    "content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."
+                },
+                {
+                    "role": "user",
+                    "content": ptext
                }
+            ]
+        )
+        # Remove markdown formatting if present
+        if content_text.startswith("```json"):
+            # Find the end of the JSON block
+            end_marker = "```"
+            end_index = content_text.rfind(end_marker)
+            if end_index != -1:
+                # Extract the JSON content without the markdown markers
+                content_text = content_text[7:end_index].strip()
+        
+        # Now parse the JSON
+        try:
+            logger.info(f"Valid json received: {str(content_text)}")
+            pjson = json.loads(content_text)
+            # Now call scrape_json with the parsed dictionary
+            result_json = await self.scrape_json(pjson)
+            return result_json
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse JSON: {e}")
+            logger.error(f"Cleaned content: {content_text[:100]}...")
+            return []
+
+    async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]:
+        """
+        Scrapes web content based on a research strategy JSON.
+        
+        Args:
+            research_strategy: A dictionary containing:
+                - 'skey': List of search keywords
+                - 'url': List of direct URLs to scrape
                
-                try:
-                    import time
+        Returns:
+            Dictionary with URLs as keys and scraped content as values
+        """
+
+        logger.info("Starting JSON-based web scraping")
+        results = []
+        
+        # Validate input structure
+        if not isinstance(research_strategy, dict):
+            logger.error("Invalid research_strategy format: not a dictionary")
+            return {"error": "Invalid research_strategy format: not a dictionary"}
+        
+        keys = research_strategy.get("skey", [])
+        direct_urls = research_strategy.get("url", [])
+        
+        if not isinstance(keys, list) or not isinstance(direct_urls, list):
+            logger.error("Invalid research_strategy format: keys, or url is not a list")
+            return {"error": "Invalid research_strategy format: keys, or url is not a list"}
+        
+        # Process search keywords through search engine
+        for keyword in keys:
+            logger.info(f"Processing keyword: {keyword}")
+            found_results = self.search_web(keyword) #  List with Dict: title,url,snippet,data
+            logger.info(f"... {len(found_results)} results found")
+            results.extend(found_results)
+
+        # Process direct URLs
+        logger.info(f"Processing {len(direct_urls)} direct URLs")
+        for url in direct_urls:
+            if url in results:
+                logger.info(f"Skipping already scraped URL: {url}")
+                continue
+            soup = self.read_url(url)
+
+            # Extract title from the page if it exists
+            if isinstance(soup, BeautifulSoup):
+                title_tag = soup.find('title')
+                title = title_tag.text.strip() if title_tag else "No title"
+                
+                # Alternative: You could also look for h1 tags if the title tag is missing
+                if title == "No title":
+                    h1_tag = soup.find('h1')
+                    if h1_tag:
+                        title = h1_tag.text.strip()
+            else:
+                # Handle the case where soup is an error message string
+                title = "Error fetching page"
+        
+            results.append(self.parse_result(soup, title, url))
+        logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total")
+        return results
+
+    def extract_main_content(self, soup: BeautifulSoup, max_chars: int = 30000) -> str:
+        """
+        Extract the main content from an HTML page while limiting character count.
+        
+        Args:
+            soup: BeautifulSoup object containing the page content
+            max_chars: Maximum number of characters to extract
+            
+        Returns:
+            Extracted main content as string
+        """
+        if not isinstance(soup, BeautifulSoup):
+            return str(soup)[:max_chars]
+        
+        # Try to find main content elements in order of priority
+        main_content = None
+        for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
+            content = soup.select_one(selector)
+            if content:
+                main_content = content
+                break
+        
+        # If no main content found, use the body
+        if not main_content:
+            main_content = soup.find('body') or soup
+        
+        # Remove script, style, nav, footer elements that don't contribute to main content
+        for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
+            element.extract()
+        
+        # Extract text content
+        text_content = main_content.get_text(separator=' ', strip=True)
+        
+        # Limit to max_chars
+        return text_content[:max_chars]
+
+    def tokenize_for_counting(self, text: str) -> List[str]:
+        """
+        Simple token counter for estimating token usage.
+        This is an approximation since the exact tokenization depends on the model.
+        
+        Args:
+            text: Input text
+            
+        Returns:
+            List of tokens
+        """
+        # Simple tokenization by splitting on whitespace and punctuation
+        import re
+        return re.findall(r'\w+|[^\w\s]', text)
+
+    def count_tokens(self, text: str) -> int:
+        """
+        Count the approximate number of tokens in a text.
+        
+        Args:
+            text: Input text
+            
+        Returns:
+            Estimated token count
+        """
+        tokens = self.tokenize_for_counting(text)
+        return len(tokens)
+
+    def limit_text_for_api(self, text: str, max_tokens: int = 60000) -> str:
+        """
+        Limit the text to a maximum number of tokens.
+        
+        Args:
+            text: Input text
+            max_tokens: Maximum number of tokens allowed
+            
+        Returns:
+            Limited text
+        """
+        if not text:
+            return ""
+        
+        tokens = self.tokenize_for_counting(text)
+        
+        # If text is already under the limit, return as is
+        if len(tokens) <= max_tokens:
+            return text
+        
+        # Otherwise, truncate text to max_tokens
+        return " ".join(tokens[:max_tokens]) + "... [content truncated due to length]"
+
+    def search_web(self, query: str) -> List[Dict]:
+        formatted_query = quote_plus(query)
+        url = f"https://html.duckduckgo.com/html/?q={formatted_query}"
+        
+        search_results_soup = self.read_url(url)
+        if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0:
+            logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
+            return []
+        
+        # Extract search results
+        results = []
+        
+        # Find all result containers
+        result_elements = search_results_soup.select('.result')
+
+        for result in result_elements:
+            # Extract title
+            title_element = result.select_one('.result__a')
+            title = title_element.text.strip() if title_element else 'No title'
+            
+            # Extract URL (DuckDuckGo uses redirects, need to extract from href param)
+            url_element = title_element.get('href') if title_element else ''
+            extracted_url = 'No URL'
+            
+            if url_element:
+                # Extract the actual URL from DuckDuckGo's redirect
+                if url_element.startswith('/d.js?q='):
+                    start = url_element.find('?q=') + 3  # Skip '?q='
+                    end = url_element.find('&', start) if '&' in url_element[start:] else None
+                    extracted_url = unquote(url_element[start:end])
                    
-                    # Initialer Request
+                    # Make sure the URL has the correct protocol prefix
+                    if not extracted_url.startswith(('http://', 'https://')):
+                        if not extracted_url.startswith('//'):
+                            extracted_url = 'https://' + extracted_url
+                        else:
+                            extracted_url = 'https:' + extracted_url
+                else:
+                    extracted_url = url_element
+            
+            # Extract snippet directly from search results page
+            snippet_element = result.select_one('.result__snippet')
+            snippet = snippet_element.text.strip() if snippet_element else 'No description'
+            
+            # Now fetch the actual page content for the data field
+            target_page_soup = self.read_url(extracted_url)
+            
+            # Use the new content extraction method to limit content size
+            content = self.extract_main_content(target_page_soup, max_chars=30000)
+            
+            results.append({
+                'title': title,
+                'url': extracted_url,
+                'snippet': snippet,
+                'data': content
+            })
+            
+            # Limit the number of results if needed
+            if len(results) >= self.max_result:
+                break
+                
+        return results
+
+    def read_url(self, url: str) -> BeautifulSoup:
+        """
+        Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
+        Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben.
+        
+        Args:
+            url: Die zu lesende URL
+            
+        Returns:
+            BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
+        """
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml',
+            'Accept-Language': 'en-US,en;q=0.9',
+        }
+        
+        try:
+            # Initialer Request
+            response = requests.get(url, headers=headers, timeout=10)
+            
+            # Polling für Status 202
+            if response.status_code == 202:
+                # Maximal 3 Versuche mit steigenden Intervallen
+                backoff_times = [0.5, 1.0, 2.0, 5.0]  # 0.5s, dann 1s, dann 2s
+                
+                for wait_time in backoff_times:
+                    time.sleep(wait_time)  # Warten mit steigender Zeit
                    response = requests.get(url, headers=headers, timeout=10)
                    
-                    # Polling für Status 202
-                    if response.status_code == 202:
-                        # Maximal 3 Versuche mit steigenden Intervallen
-                        backoff_times = [0.5, 1.0, 2.0, 5.0]  # 0.5s, dann 1s, dann 2s
-                        
-                        for wait_time in backoff_times:
-                            time.sleep(wait_time)  # Warten mit steigender Zeit
-                            response = requests.get(url, headers=headers, timeout=10)
-                            
-                            # Wenn kein 202 mehr, dann abbrechen
-                            if response.status_code != 202:
-                                break
-                    
-                    # Für andere Fehler-Status einen Fehler auslösen
-                    response.raise_for_status()
-                    
-                    # HTML parsen
-                    return BeautifulSoup(response.text, 'html.parser')
-                    
-                except Exception as e:
-                    # Leeres BeautifulSoup-Objekt erstellen
-                    return BeautifulSoup("<html><body></body></html>", 'html.parser')
-                
-            def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
-                # Extract snippet/description
-                snippet_element = data.select_one('.result__snippet')
-                snippet = snippet_element.text.strip() if snippet_element else 'No description'
-                
-                result={
-                    'title': title,
-                    'url': url,
-                    'snippet': snippet,
-                    'data': data.prettify()
-                }
-                return result
+                    # Wenn kein 202 mehr, dann abbrechen
+                    if response.status_code != 202:
+                        break
+            
+            # Für andere Fehler-Status einen Fehler auslösen
+            response.raise_for_status()
+            
+            # HTML parsen
+            return BeautifulSoup(response.text, 'html.parser')
+            
+        except Exception as e:
+            # Leeres BeautifulSoup-Objekt erstellen
+            return BeautifulSoup("<html><body></body></html>", 'html.parser')
+        
+    def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
+        """
+        Parse a BeautifulSoup object into a result dictionary.
+        
+        Args:
+            data: BeautifulSoup object containing the page content
+            title: Page title
+            url: Page URL
+            
+        Returns:
+            Dictionary with result data
+        """
+        # Extract content using the main content extraction method
+        content = self.extract_main_content(data, max_chars=30000)
+        
+        result = {
+            'title': title,
+            'url': url,
+            'snippet': 'No description',  # Default value
+            'data': content
+        }
+        return result


 # Singleton-Instanz
--- a/gwserver/modules/agentservice_dataextraction.py
+++ b/gwserver/modules/agentservice_dataextraction.py
@ -712,7 +712,7 @@ def _extract_document_contents_from_messages(file_id: int, messages: List[Dict[s
            # Check if file ID matches (handle both string and int comparison)
            if (source.get("id") == file_id or 
                (isinstance(source.get("id"), str) and source.get("id") == str(file_id)) or
-                (isinstance(file_id, str) and source.get("id") == int(file_id))):
+                (isinstance(file_id, str) and source.get("id") == file_id)):
                
                # Add contents of the file
                doc_contents = document.get("contents", [])
--- a/gwserver/modules/agentservice_registry.py
+++ b/gwserver/modules/agentservice_registry.py
@ -95,14 +95,18 @@ class AgentRegistry:
        self.ai_service = ai_service
        self.document_handler = document_handler
        self.lucydom_interface = lucydom_interface
-        
-        # Update dependencies for all registered agents
+        # Update all registered agents
+        self.update_agent_dependencies()
+
+
+    def update_agent_dependencies(self):
+        """Update dependencies for all registered agents"""
        for agent_id, agent in self.agents.items():
            if hasattr(agent, 'set_dependencies'):
                agent.set_dependencies(
-                    ai_service=ai_service,
-                    document_handler=document_handler,
-                    lucydom_interface=lucydom_interface
+                    ai_service=self.ai_service,
+                    document_handler=self.document_handler,
+                    lucydom_interface=self.lucydom_interface
                )

    def register_agent(self, agent: 'BaseAgent'):
--- a/gwserver/modules/agentservice_workflow_execution.py
+++ b/gwserver/modules/agentservice_workflow_execution.py
@ -11,6 +11,7 @@ from datetime import datetime
 from typing import List, Dict, Any, Optional, Tuple, Union

 logger = logging.getLogger(__name__)
+logging.getLogger('matplotlib.font_manager').setLevel(logging.INFO)

 class WorkflowExecution:
    """
@ -39,10 +40,21 @@ class WorkflowExecution:
        
        # Initialize agent registry
        self.agent_registry = AgentRegistry.get_instance()
-        
+        # Set dependencies for agents
+
        # Initialize file manager
        self.file_manager = get_workflow_file_manager(workflow_id, lucydom_interface)
-        
+
+        # Import and initialize document handler
+        from modules.agentservice_document_handler import get_document_handler
+        self.document_handler = get_document_handler(workflow_id, lucydom_interface, ai_service)
+    
+        self.agent_registry.set_dependencies(
+            ai_service=ai_service,
+            document_handler=self.document_handler,
+            lucydom_interface=lucydom_interface
+        )
+    
    async def execute(self, message: Dict[str, Any], workflow: Dict[str, Any], files: List[Dict[str, Any]] = None, is_user_input: bool = False):
        """
        Execute the workflow with integrated planning and agent selection.
@ -67,6 +79,7 @@ class WorkflowExecution:
            # 3. Create agent-aware work plan
            work_plan = await self._create_agent_aware_work_plan(workflow, user_message)
            self.logging_utils.info(f"Created agent-aware work plan with {len(work_plan)} activities", "planning")
+            self.logging_utils.debug(f"{work_plan}.", "planning")
            
            # 4. Execute the activities in the work plan
            results = await self._execute_work_plan(workflow, work_plan)
@ -165,7 +178,7 @@ class WorkflowExecution:
                "content_type": source.get("content_type", "unknown")
            })
        
-        # Create the planning prompt with agent awareness
+        # Create the planning prompt with agent awareness and document handling information
        plan_prompt = f"""
 As an AI workflow manager, create a detailed agent-aware work plan for the following task:

@ -177,21 +190,25 @@ AVAILABLE AGENTS:
 AVAILABLE DOCUMENTS:
 {document_info if document_info else "No documents provided"}

+IMPORTANT: Document extraction happens automatically in the workflow. Documents in the message are already available to all agents. DO NOT assign agent_coder or any other agent specifically for just reading or extracting document content. Only assign agents for tasks that require specific processing beyond what the document handler already provides.
+
 The work plan should include a structured list of activities. Each activity should have:
 1. title - A short descriptive title for the activity
 2. description - What needs to be done in this activity
 3. assigned_agents - List of agent IDs that should handle this activity (can be multiple in sequence)
 4. agent_prompts - Specific instructions for each agent (matched by index to assigned_agents)
-5. document_requirements - Description of which documents are needed for this activity
+5. document_requirements - Description of which documents are needed for this activity (these will be automatically extracted)
 6. expected_output - The expected output format and content
 7. dependencies - List of previous activities this depends on (by index)

 IMPORTANT GUIDELINES:
 - Each activity should have clear objectives and be assigned to the most appropriate agent(s)
 - When multiple agents are assigned to an activity, specify the sequence and how outputs should flow between them
- Documents are processed on-demand, so each activity should specify which documents it requires
+- Documents are processed on-demand by the system's document handler, so only specify which documents are needed, not how to extract them
+- DO NOT create activities that only read or extract document content - this happens automatically
 - Create a logical sequence where later activities can use outputs from earlier ones
 - If no specialized agent is needed for a task, use the default "assistant" agent
+- Only use the agent_coder for tasks that require actual coding or complex data analysis, not for simply reading documents

 Return the work plan as a JSON array of activity objects, each with the above properties.
 """
@ -357,6 +374,20 @@ Return the work plan as a JSON array of activity objects, each with the above pr
        for i, agent_id in enumerate(agent_ids):
            # Get the agent
            agent = self.agent_registry.get_agent(agent_id)
+            if agent:
+                # Ensure dependencies are set
+                if hasattr(agent, 'set_dependencies'):
+                    agent.set_dependencies(
+                        ai_service=self.ai_service,
+                        document_handler=self.document_handler,
+                        lucydom_interface=self.lucydom_interface
+                    )
+                
+                # Set document handler if agent supports it
+                if hasattr(agent, 'set_document_handler') and hasattr(self, 'document_handler'):
+                    agent.set_document_handler(self.document_handler)
+
+                    
            if not agent:
                self.logging_utils.warning(f"Agent '{agent_id}' not found, using assistant instead", "agents")
                agent = self.agent_registry.get_agent("assistant")
@ -380,9 +411,18 @@ Return the work plan as a JSON array of activity objects, each with the above pr
            agent_message = self._create_message(workflow, "user")
            agent_message["content"] = enhanced_prompt
            
-            # Add any documents from previous agent if this is a continuation
+            # IMPORTANT FIX: Document handling logic
+            # First, check if we have documents from previous agent if this is a continuation
            if last_documents and i > 0:
                agent_message["documents"] = last_documents
+            # For the first agent, make sure we pass any documents from the most recent user message 
+            elif i == 0:
+                # Find the most recent user message with documents
+                for msg in reversed(workflow.get("messages", [])):
+                    if msg.get("role") == "user" and msg.get("documents"):
+                        agent_message["documents"] = msg.get("documents", [])
+                        self.logging_utils.info(f"Passing {len(agent_message['documents'])} documents from user message to {agent_id}", "agents")
+                        break
            
            # Log agent execution
            self.logging_utils.info(f"Executing agent: {agent_id}", "agents")
@ -402,6 +442,7 @@ Return the work plan as a JSON array of activity objects, each with the above pr
            if "documents" in agent_response:
                response_message["documents"] = agent_response["documents"]
                last_documents = agent_response["documents"]
+                self.logging_utils.info(f"Agent {agent_id} produced {len(last_documents)} documents", "agents")
            
            # Add to workflow
            workflow["messages"].append(response_message)
@ -418,7 +459,8 @@ Return the work plan as a JSON array of activity objects, each with the above pr
            "content": "No agent response was generated.",
            "format": "Text"
        }
-
+        
+    
    async def _extract_required_documents(self, workflow: Dict[str, Any], doc_requirements: str) -> Dict[str, Any]:
        """
        Extract required documents based on requirements description.
--- a/gwserver/workflow_test_result.json
+++ b/gwserver/workflow_test_result.json