web refactored

2025-07-12 16:26:20 +02:00 · 2025-07-12 16:26:20 +02:00 · b1be8dd81c
commit b1be8dd81c
parent cfb34c6a38
6 changed files with 1242 additions and 186 deletions
--- a/app.py
+++ b/app.py
@ -37,6 +37,22 @@ def initLogging():
                       ('.well-known/appspecific/com.chrome.devtools.json' in record.msg or
                        'Request: /index.html' in record.msg))
    # Add filter to exclude HTTP debug messages
    class HTTPDebugFilter(logging.Filter):
        def filter(self, record):
            if isinstance(record.msg, str):
                # Filter out HTTP debug messages
                http_debug_patterns = [
                    'receive_response_body.started',
                    'receive_response_body.complete', 
                    'response_closed.started',
                    '_send_single_request',
                    'httpcore.http11',
                    'httpx._client'
                ]
                return not any(pattern in record.msg for pattern in http_debug_patterns)
            return True
    # Configure handlers based on config
    handlers = []
@ -45,6 +61,7 @@ def initLogging():
        consoleHandler = logging.StreamHandler()
        consoleHandler.setFormatter(consoleFormatter)
        consoleHandler.addFilter(ChromeDevToolsFilter())
        consoleHandler.addFilter(HTTPDebugFilter())
        handlers.append(consoleHandler)
    # Add file handler if enabled
@ -71,6 +88,7 @@ def initLogging():
        )
        fileHandler.setFormatter(fileFormatter)
        fileHandler.addFilter(ChromeDevToolsFilter())
        fileHandler.addFilter(HTTPDebugFilter())
        handlers.append(fileHandler)
    # Configure the root logger
@ -83,7 +101,7 @@ def initLogging():
    )
    # Silence noisy third-party libraries - use the same level as the root logger
-    noisyLoggers = ["httpx", "urllib3", "asyncio", "fastapi.security.oauth2"]
+    noisyLoggers = ["httpx", "httpcore", "urllib3", "asyncio", "fastapi.security.oauth2"]
    for loggerName in noisyLoggers:
        logging.getLogger(loggerName).setLevel(logLevel)
--- a/modules/chat/managerChat.py
+++ b/modules/chat/managerChat.py
@ -317,8 +317,8 @@ class ChatManager:
                'workflow_id': workflow.id
            })
-            # Get AI response
+            # Get AI response with fallback mechanism
-            response = await self.service.callAiTextAdvanced(prompt)
+            response = await self._callAIWithCircuitBreaker(prompt, "task_planning")
            # Parse and validate task plan
            task_plan_dict = self._parseTaskPlanResponse(response)
@ -372,8 +372,22 @@ class ChatManager:
            return task_plan
        except Exception as e:
-            logger.error(f"Error in high-level task planning: {str(e)}")
+            error_message = str(e)
-            raise Exception(f"AI is required for task planning but failed: {str(e)}")
+            logger.error(f"Error in high-level task planning: {error_message}")
            # Provide more specific error messages based on the error type
            if "overloaded" in error_message.lower() or "529" in error_message:
                detailed_error = "AI service is currently overloaded. Please try again in a few minutes."
            elif "rate limit" in error_message.lower() or "429" in error_message:
                detailed_error = "Rate limit exceeded. Please wait before making another request."
            elif "api key" in error_message.lower() or "401" in error_message:
                detailed_error = "Invalid API key. Please check your AI service configuration."
            elif "timeout" in error_message.lower():
                detailed_error = "AI service request timed out. Please try again."
            else:
                detailed_error = f"AI service error: {error_message}"
            raise Exception(detailed_error)
    # Phase 2: Task Definition and Action Generation
    async def defineTaskActions(self, task_step: TaskStep, workflow: ChatWorkflow, previous_results: List[str] = None, 
@ -641,29 +655,127 @@ class ChatManager:
    # ===== Enhanced Task Planning Methods =====
    async def _callAIWithCircuitBreaker(self, prompt: str, context: str) -> str:
-        """Call AI with circuit breaker pattern for fault tolerance"""
+        """Call AI with intelligent routing based on complexity and circuit breaker pattern"""
-        try:
+        max_retries = 3
-            # Check circuit breaker
+        base_delay = 2  # Start with 2 seconds
-            if self._isCircuitBreakerOpen():
+        
-                raise Exception("AI circuit breaker is open - too many recent failures")
+        for attempt in range(max_retries):
-            
+            try:
-            # Call AI with timeout
+                # Check circuit breaker
-            logger.debug(f"ACTION GENERATION PROMPT: {prompt}")
+                if self._isCircuitBreakerOpen():
-            response = await asyncio.wait_for(
+                    raise Exception("AI circuit breaker is open - too many recent failures")
-                self._callAI(prompt, context),
+                
-                timeout=self.ai_call_timeout
+                # Determine which AI service to use based on complexity
-            )
+                ai_choice = self._determineAIChoice(prompt, context)
-            
+                logger.debug(f"AI choice for {context}: {ai_choice} (attempt {attempt + 1}/{max_retries})")
-            # Reset failure count on success
+                
-            self.ai_failure_count = 0
+                if ai_choice == "advanced":
-            return response
+                    # Use advanced AI for complex tasks
-            
+                    try:
-        except asyncio.TimeoutError:
+                        response = await asyncio.wait_for(
-            self._recordAIFailure("Timeout")
+                            self._callAdvancedAI(prompt, context),
-            raise Exception(f"AI call timed out after {self.ai_call_timeout} seconds")
+                            timeout=self.ai_call_timeout
-        except Exception as e:
+                        )
-            self._recordAIFailure(str(e))
+                        
-            raise
+                        # Reset failure count on success
                        self.ai_failure_count = 0
                        logger.info(f"Advanced AI call successful for {context}")
                        return response
                    except Exception as advanced_error:
                        error_message = str(advanced_error)
                        logger.warning(f"Advanced AI call failed for {context}: {error_message}")
                        # Fall back to basic AI for complex tasks
                        logger.info(f"Falling back to basic AI for complex task: {context}")
                        try:
                            response = await asyncio.wait_for(
                                self._callStandardAI(prompt, context),
                                timeout=self.ai_call_timeout
                            )
                            # Reset failure count on success
                            self.ai_failure_count = 0
                            logger.info(f"Basic AI fallback successful for complex task: {context}")
                            return response
                        except Exception as standard_error:
                            # Both failed for complex task
                            error_message = f"Advanced AI failed: {str(advanced_error)}. Basic AI failed: {str(standard_error)}"
                            raise Exception(error_message)
                else:  # basic
                    # Use basic AI for simple tasks
                    try:
                        response = await asyncio.wait_for(
                            self._callStandardAI(prompt, context),
                            timeout=self.ai_call_timeout
                        )
                        # Reset failure count on success
                        self.ai_failure_count = 0
                        logger.info(f"Basic AI call successful for {context}")
                        return response
                    except Exception as basic_error:
                        error_message = str(basic_error)
                        logger.warning(f"Basic AI call failed for {context}: {error_message}")
                        # Only upgrade to advanced AI for critical simple tasks
                        if self._isCriticalTask(context):
                            logger.info(f"Upgrading to advanced AI for critical simple task: {context}")
                            try:
                                response = await asyncio.wait_for(
                                    self._callAdvancedAI(prompt, context),
                                    timeout=self.ai_call_timeout
                                )
                                # Reset failure count on success
                                self.ai_failure_count = 0
                                logger.info(f"Advanced AI upgrade successful for critical task: {context}")
                                return response
                            except Exception as advanced_error:
                                # Both failed for critical task
                                error_message = f"Basic AI failed: {str(basic_error)}. Advanced AI failed: {str(advanced_error)}"
                                raise Exception(error_message)
                        else:
                            # Non-critical simple task failed
                            raise Exception(f"Basic AI failed for simple task: {error_message}")
            except asyncio.TimeoutError:
                self._recordAIFailure("Timeout")
                if attempt < max_retries - 1:
                    delay = base_delay * (2 ** attempt)  # Exponential backoff
                    logger.warning(f"AI call timed out, retrying in {delay} seconds (attempt {attempt + 1}/{max_retries})")
                    await asyncio.sleep(delay)
                    continue
                else:
                    raise Exception(f"AI call timed out after {self.ai_call_timeout} seconds")
            except Exception as e:
                error_message = str(e)
                # Special handling for overloaded service (529 error)
                if "overloaded" in error_message.lower() or "529" in error_message:
                    if attempt < max_retries - 1:
                        delay = base_delay * (2 ** attempt)  # Exponential backoff
                        logger.warning(f"AI service overloaded, retrying in {delay} seconds (attempt {attempt + 1}/{max_retries})")
                        await asyncio.sleep(delay)
                        continue
                    else:
                        # Don't record this as a circuit breaker failure since it's a service issue
                        raise Exception("AI service is currently overloaded. Please try again in a few minutes.")
                # For other errors, record failure and potentially retry
                self._recordAIFailure(error_message)
                if attempt < max_retries - 1:
                    delay = base_delay * (2 ** attempt)  # Exponential backoff
                    logger.warning(f"AI call failed, retrying in {delay} seconds (attempt {attempt + 1}/{max_retries}): {error_message}")
                    await asyncio.sleep(delay)
                    continue
                else:
                    raise
    def _isCircuitBreakerOpen(self) -> bool:
        """Check if circuit breaker is open"""
@ -678,6 +790,146 @@ class ChatManager:
                    self.ai_last_failure_time = None
        return False
    def _determineAIChoice(self, prompt: str, context: str) -> str:
        """Determine whether to use advanced or basic AI based on task complexity"""
        # Check for forced AI choice based on context
        forced_choice = self._getForcedAIChoice(context)
        if forced_choice:
            logger.debug(f"Forced AI choice for {context}: {forced_choice}")
            return forced_choice
        # Define complex task patterns that require advanced AI
        complex_patterns = [
            # Task planning and workflow management
            "task_planning", "action_generation", "result_review", "task_completion_validation",
            # Complex document analysis
            "document", "extract", "analysis", "comprehensive", "detailed analysis",
            # Multi-step reasoning
            "plan", "strategy", "evaluate", "assess", "compare", "analyze",
            # Complex business logic
            "workflow", "task", "action", "validation", "review", "assessment",
            # Critical decision making
            "decision", "recommendation", "evaluation", "quality", "success criteria",
            # Complex prompts
            "JSON", "structured", "format", "validation", "improvements", "quality_score"
        ]
        # Define simple task patterns that can use basic AI
        simple_patterns = [
            # Basic text processing
            "summarize", "translate", "format", "convert", "extract text",
            # Simple queries
            "find", "search", "list", "get", "retrieve",
            # Basic operations
            "send", "upload", "download", "create", "delete",
            # Simple responses
            "confirm", "acknowledge", "status", "info"
        ]
        # Check prompt and context for complexity indicators
        combined_text = f"{prompt} {context}".lower()
        # Count complex indicators
        complex_count = sum(1 for pattern in complex_patterns if pattern in combined_text)
        # Count simple indicators
        simple_count = sum(1 for pattern in simple_patterns if pattern in combined_text)
        # Additional complexity factors
        prompt_length = len(prompt)
        has_json_requirement = "json" in combined_text and ("{" in prompt or "}" in prompt)
        has_structured_output = any(word in combined_text for word in ["format", "structure", "template"])
        has_validation = any(word in combined_text for word in ["validate", "check", "verify", "quality"])
        # Calculate complexity score
        complexity_score = 0
        complexity_score += complex_count * 2  # Complex patterns worth more
        complexity_score += simple_count * 1   # Simple patterns worth less
        complexity_score += (prompt_length > 1000) * 3  # Long prompts are complex
        complexity_score += has_json_requirement * 5  # JSON requirements are complex
        complexity_score += has_structured_output * 3  # Structured output is complex
        complexity_score += has_validation * 4  # Validation is complex
        # Determine AI choice based on complexity score
        if complexity_score >= 5:
            logger.debug(f"Complex task detected (score: {complexity_score}) - using advanced AI for {context}")
            return "advanced"
        else:
            logger.debug(f"Simple task detected (score: {complexity_score}) - using basic AI for {context}")
            return "basic"
    def _getForcedAIChoice(self, context: str) -> str:
        """Get forced AI choice for specific contexts (can be overridden)"""
        # Define contexts that always use advanced AI
        advanced_contexts = [
            "task_planning",           # Always use advanced for task planning
            "action_generation",       # Always use advanced for action generation
            "result_review",           # Always use advanced for result review
            "task_completion_validation"  # Always use advanced for validation
        ]
        # Define contexts that always use basic AI
        basic_contexts = [
            "summarize",              # Always use basic for summarization
            "translate",              # Always use basic for translation
            "format",                 # Always use basic for formatting
            "status",                 # Always use basic for status updates
            "info"                    # Always use basic for info queries
        ]
        context_lower = context.lower()
        # Check for forced advanced AI
        for advanced_context in advanced_contexts:
            if advanced_context in context_lower:
                return "advanced"
        # Check for forced basic AI
        for basic_context in basic_contexts:
            if basic_context in context_lower:
                return "basic"
        # No forced choice
        return None
    def _isCriticalTask(self, context: str) -> bool:
        """Determine if a simple task is critical enough to warrant advanced AI upgrade"""
        # Define critical task patterns
        critical_patterns = [
            # Workflow critical tasks
            "task_planning", "workflow", "critical", "essential",
            # User-facing decisions
            "decision", "recommendation", "evaluation", "assessment",
            # Quality-sensitive tasks
            "quality", "validation", "review", "check",
            # Business-critical operations
            "business", "strategy", "planning", "analysis"
        ]
        context_lower = context.lower()
        # Check if context contains critical patterns
        is_critical = any(pattern in context_lower for pattern in critical_patterns)
        if is_critical:
            logger.debug(f"Critical task detected - {context}")
        return is_critical
    def _recordAIFailure(self, error: str):
        """Record AI failure for circuit breaker"""
        self.ai_failure_count += 1
@ -1753,14 +2005,35 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
            logger.error(f"Error parsing review response: {str(e)}")
            return {'status': 'failed', 'reason': f'Parse error: {str(e)}'}
-    async def _callAI(self, prompt: str, context: str) -> str:
+    async def _callAdvancedAI(self, prompt: str, context: str) -> str:
-        """Call AI service with prompt"""
+        """Call advanced AI service with prompt (primary method)"""
        try:
-            # Use the existing AI call mechanism through service
+            # Use the advanced AI call mechanism through service
            if hasattr(self, 'service') and self.service:
-                # Ensure service is properly initialized
+                # Try advanced AI call first
                if hasattr(self.service, 'callAiTextAdvanced'):
                    response = await self.service.callAiTextAdvanced(prompt)
                    logger.debug(f"Advanced AI call successful for {context}")
                    return response
                else:
                    raise Exception("Service does not have callAiTextAdvanced method")
            else:
                raise Exception("No service available for AI calls")
        except Exception as e:
            error_message = str(e)
            logger.warning(f"Advanced AI call failed for {context}: {error_message}")
            raise Exception(f"Advanced AI failed: {error_message}")
    async def _callStandardAI(self, prompt: str, context: str) -> str:
        """Call standard AI service with prompt (fallback method)"""
        try:
            # Use the standard AI call mechanism through service
            if hasattr(self, 'service') and self.service:
                # Try standard AI call as fallback
                if hasattr(self.service, 'callAiTextBasic'):
                    response = await self.service.callAiTextBasic(prompt)
                    logger.debug(f"Standard AI call successful for {context}")
                    return response
                else:
                    raise Exception("Service does not have callAiTextBasic method")
@ -1768,8 +2041,26 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
                raise Exception("No service available for AI calls")
        except Exception as e:
-            logger.error(f"Error calling AI for {context}: {str(e)}")
+            error_message = str(e)
-            raise 
+            logger.error(f"Standard AI call failed for {context}: {error_message}")
            # Provide more specific error messages based on the error type
            if "overloaded" in error_message.lower() or "529" in error_message:
                detailed_error = "AI service is currently overloaded. Please try again in a few minutes."
            elif "rate limit" in error_message.lower() or "429" in error_message:
                detailed_error = "Rate limit exceeded. Please wait before making another request."
            elif "api key" in error_message.lower() or "401" in error_message:
                detailed_error = "Invalid API key. Please check your AI service configuration."
            elif "timeout" in error_message.lower():
                detailed_error = "AI service request timed out. Please try again."
            else:
                detailed_error = f"AI service error: {error_message}"
            raise Exception(detailed_error)
    async def _callAI(self, prompt: str, context: str) -> str:
        """Call AI service with prompt (legacy method - now uses the circuit breaker)"""
        return await self._callAIWithCircuitBreaker(prompt, context) 
    # ===== WORKFLOW FEEDBACK GENERATION =====
@ -1866,11 +2157,20 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
                task_description = task_step.description
                logger.info(f"=== PROCESSING TASK {i+1}/{len(task_plan.tasks)}: {task_description} ===")
-                # Create user-friendly task start log
+                # Create user-friendly task start log with action details
                progress = 20 + (i * 60 // len(task_plan.tasks))
                # Get actions for this task to show in the log
                task_actions = await self.defineTaskActions(task_step, workflow, previous_results)
                action_details = []
                for j, action in enumerate(task_actions):
                    action_details.append(f"  {j+1}. {action.execMethod}.{action.execAction}")
                action_summary = "\n".join(action_details) if action_details else "  (Actions will be generated during execution)"
                self.chatInterface.createWorkflowLog({
                    "workflowId": workflow.id,
-                    "message": f"Executing task {i+1}/{len(task_plan.tasks)}: {task_description}",
+                    "message": f"Executing task {i+1}/{len(task_plan.tasks)}: {task_description}\nActions to be executed:\n{action_summary}",
                    "type": "info",
                    "status": "running",
                    "progress": progress,
@ -2032,6 +2332,16 @@ Please review the task requirements and try again with different input or approa
                for i, action in enumerate(actions):
                    logger.info(f"Executing action {i+1}/{len(actions)}: {action.execMethod}.{action.execAction}")
                    # Add action start log
                    self.chatInterface.createWorkflowLog({
                        "workflowId": workflow.id,
                        "message": f"Starting action {i+1}/{len(actions)}: {action.execMethod}.{action.execAction}",
                        "type": "info",
                        "status": "running",
                        "progress": 0,
                        "agentName": "System"
                    })
                    # Execute action with validation
                    result = await self.executeActionWithValidation(action, workflow, context)
@ -2039,7 +2349,37 @@ Please review the task requirements and try again with different input or approa
                        state.addSuccessfulAction(result)
                        logger.info(f"Action {i+1} completed successfully")
                        # Add action completion message with result documents
                        documents_info = ""
                        if result.documents and len(result.documents) > 0:
                            doc_names = [doc.filename if hasattr(doc, 'filename') else f"Document {j+1}" 
                                       for j, doc in enumerate(result.documents)]
                            documents_info = f"\n📄 Generated documents: {', '.join(doc_names)}"
                        # Create completion message
                        completion_message = f"✅ Action {i+1}/{len(actions)} completed: {action.execMethod}.{action.execAction}{documents_info}"
                        # Add as log entry instead of message
                        self.chatInterface.createWorkflowLog({
                            "workflowId": workflow.id,
                            "message": completion_message,
                            "type": "success",
                            "status": "running",
                            "progress": 0,
                            "agentName": "System"
                        })
                    elif result.validation.get('status') == 'retry':
                        # Add retry log
                        self.chatInterface.createWorkflowLog({
                            "workflowId": workflow.id,
                            "message": f"🔄 Action {i+1}/{len(actions)} needs retry: {action.execMethod}.{action.execAction}",
                            "type": "warning",
                            "status": "running",
                            "progress": 0,
                            "agentName": "System"
                        })
                        # Retry individual action
                        improvements = result.validation.get('improvements', [])
                        retry_result = await self.retryActionWithImprovements(action, result, improvements)
@ -2047,15 +2387,51 @@ Please review the task requirements and try again with different input or approa
                        if retry_result.validation.get('status') == 'success':
                            state.addSuccessfulAction(retry_result)
                            logger.info(f"Action {i+1} retry successful")
                            # Add retry success log
                            retry_documents_info = ""
                            if retry_result.documents and len(retry_result.documents) > 0:
                                doc_names = [doc.filename if hasattr(doc, 'filename') else f"Document {j+1}" 
                                           for j, doc in enumerate(retry_result.documents)]
                                retry_documents_info = f"\n📄 Generated documents: {', '.join(doc_names)}"
                            self.chatInterface.createWorkflowLog({
                                "workflowId": workflow.id,
                                "message": f"✅ Action {i+1}/{len(actions)} retry successful: {action.execMethod}.{action.execAction}{retry_documents_info}",
                                "type": "success",
                                "status": "running",
                                "progress": 0,
                                "agentName": "System"
                            })
                        else:
                            state.addFailedAction(retry_result)
                            logger.error(f"Action {i+1} retry failed")
                            # Add retry failure log
                            self.chatInterface.createWorkflowLog({
                                "workflowId": workflow.id,
                                "message": f"❌ Action {i+1}/{len(actions)} retry failed: {action.execMethod}.{action.execAction}",
                                "type": "error",
                                "status": "running",
                                "progress": 0,
                                "agentName": "System"
                            })
                            # Action failed after retry - stop task execution and regenerate
                            break
                    else:  # fail
                        state.addFailedAction(result)
                        logger.error(f"Action {i+1} failed validation - stopping task execution")
                        # Add failure log
                        self.chatInterface.createWorkflowLog({
                            "workflowId": workflow.id,
                            "message": f"❌ Action {i+1}/{len(actions)} failed: {action.execMethod}.{action.execAction}",
                            "type": "error",
                            "status": "running",
                            "progress": 0,
                            "agentName": "System"
                        })
                        # Action failed - stop task execution and regenerate
                        break
@ -2412,12 +2788,47 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
            if validation['status'] == 'success':
                action.setSuccess()
                logger.info(f"Action {action.execMethod}.{action.execAction} validated successfully")
                # Only create action message if documents were produced
                if result.documents and len(result.documents) > 0:
                    await self._createActionMessage(action, result, workflow, action.execResultLabel)
                else:
                    # Add validation success log instead of message
                    self.chatInterface.createWorkflowLog({
                        "workflowId": workflow.id,
                        "message": f"✅ Action validation successful: {action.execMethod}.{action.execAction}",
                        "type": "success",
                        "status": "running",
                        "progress": 0,
                        "agentName": "System"
                    })
            elif validation['status'] == 'retry':
                action.status = TaskStatus.PENDING  # Keep pending for retry
                logger.warning(f"Action {action.execMethod}.{action.execAction} needs retry: {validation.get('reason', 'No reason')}")
                # Add validation retry log
                self.chatInterface.createWorkflowLog({
                    "workflowId": workflow.id,
                    "message": f"🔄 Action validation requires retry: {action.execMethod}.{action.execAction} - {validation.get('reason', 'No reason')}",
                    "type": "warning",
                    "status": "running",
                    "progress": 0,
                    "agentName": "System"
                })
            else:  # fail
                action.setError(validation.get('reason', 'Action failed validation'))
                logger.error(f"Action {action.execMethod}.{action.execAction} failed validation: {validation.get('reason', 'No reason')}")
                # Add validation failure log
                self.chatInterface.createWorkflowLog({
                    "workflowId": workflow.id,
                    "message": f"❌ Action validation failed: {action.execMethod}.{action.execAction} - {validation.get('reason', 'No reason')}",
                    "type": "error",
                    "status": "running",
                    "progress": 0,
                    "agentName": "System"
                })
            return action_result
--- a/modules/connectors/connectorAiAnthropic.py
+++ b/modules/connectors/connectorAiAnthropic.py
@ -76,8 +76,22 @@ class AiAnthropic:
            )
            if response.status_code != 200:
-                logger.error(f"Anthropic API error: {response.status_code} - {response.text}")
+                error_detail = f"Anthropic API error: {response.status_code} - {response.text}"
-                raise HTTPException(status_code=500, detail="Error communicating with Anthropic API")
+                logger.error(error_detail)
                # Provide more specific error messages based on status code
                if response.status_code == 529:
                    error_message = "Anthropic API is currently overloaded. Please try again in a few minutes."
                elif response.status_code == 429:
                    error_message = "Rate limit exceeded. Please wait before making another request."
                elif response.status_code == 401:
                    error_message = "Invalid API key. Please check your Anthropic API configuration."
                elif response.status_code == 400:
                    error_message = f"Invalid request to Anthropic API: {response.text}"
                else:
                    error_message = f"Anthropic API error ({response.status_code}): {response.text}"
                raise HTTPException(status_code=500, detail=error_message)
            # Parse response
            anthropicResponse = response.json()
--- a/modules/interfaces/interfaceChatObjects.py
+++ b/modules/interfaces/interfaceChatObjects.py
@ -269,6 +269,10 @@ class ChatObjects:
        # Get messages for this workflow
        messages = self.db.getRecordset("workflowMessages", recordFilter={"workflowId": workflowId})
        # Sort messages by publishedAt timestamp to ensure chronological order
        messages.sort(key=lambda x: x.get("publishedAt", x.get("timestamp", "0")))
        return [ChatMessage(**msg) for msg in messages]
    def createWorkflowMessage(self, messageData: Dict[str, Any]) -> ChatMessage:
@ -545,7 +549,12 @@ class ChatObjects:
            return []
        # Get logs for this workflow
-        return [ChatLog(**log) for log in self.db.getRecordset("workflowLogs", recordFilter={"workflowId": workflowId})]
+        logs = self.db.getRecordset("workflowLogs", recordFilter={"workflowId": workflowId})
        # Sort logs by timestamp (Unix timestamps)
        logs.sort(key=lambda x: float(x.get("timestamp", 0)))
        return [ChatLog(**log) for log in logs]
    def updateWorkflowStats(self, workflowId: str, bytesSent: int = 0, bytesReceived: int = 0) -> bool:
        """Updates workflow statistics during execution with incremental values."""
@ -867,12 +876,25 @@ class ChatObjects:
                    raise ValueError(f"Workflow {workflowId} not found")
-                # Update workflow
+                # Update workflow - set status back to running for resumed workflows
                self.updateWorkflow(workflowId, {
                    "status": "running",  # Set status back to running for resumed workflows
                    "lastActivity": currentTime,
                    "currentRound": workflow.currentRound + 1
                })
                # Update the workflow object status as well
                workflow.status = "running"
                # Add log entry for workflow resumption
                self.createWorkflowLog({
                    "workflowId": workflowId,
                    "message": f"Workflow resumed (round {workflow.currentRound + 1})",
                    "type": "info",
                    "status": "running",
                    "progress": 0
                })
            else:
                # Create new workflow
                workflowData = {
--- a/modules/methods/methodWeb.py
+++ b/modules/methods/methodWeb.py
@ -10,6 +10,7 @@ import requests
 from bs4 import BeautifulSoup
 import time
 import uuid
 import json # Added for JSON parsing
 from modules.chat.methodBase import MethodBase, ActionResult, action
 from modules.shared.configuration import APP_CONFIG
@ -38,40 +39,105 @@ class MethodWeb(MethodBase):
        self.timeout = 30
    def _readUrl(self, url: str) -> BeautifulSoup:
-        """Read a URL and return a BeautifulSoup parser for the content"""
+        """Read a URL and return a BeautifulSoup parser for the content with enhanced error handling"""
        if not url or not url.startswith(('http://', 'https://')):
            logger.error(f"Invalid URL: {url}")
            return None
-            
+        
        # Enhanced headers to mimic real browser
        headers = {
-            'User-Agent': self.user_agent,
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-            'Accept': 'text/html,application/xhtml+xml,application/xml',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
-            'Accept-Language': 'en-US,en;q=0.9',
+            'Accept-Language': 'en-US,en;q=0.9,de;q=0.8',
            'Accept-Encoding': 'gzip, deflate, br',
            'DNT': '1',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
            'Sec-Fetch-Dest': 'document',
            'Sec-Fetch-Mode': 'navigate',
            'Sec-Fetch-Site': 'none',
            'Cache-Control': 'max-age=0'
        }
        try:
-            # Initial request
+            # Use session for better connection handling
-            response = requests.get(url, headers=headers, timeout=self.timeout)
+            session = requests.Session()
            session.headers.update(headers)
-            # Handling for status 202
+            # Initial request with allow_redirects
-            if response.status_code == 202:
+            response = session.get(url, timeout=self.timeout, allow_redirects=True)
-                # Retry with backoff
+            
-                backoff_times = [0.5, 1.0, 2.0, 5.0]
+            # Handle various status codes
            if response.status_code == 200:
                # Success - parse content
                logger.debug(f"Successfully read URL: {url}")
                return BeautifulSoup(response.text, 'html.parser')
            elif response.status_code == 202:
                # Accepted - retry with backoff
                logger.info(f"Status 202 for {url}, retrying with backoff...")
                backoff_times = [1.0, 2.0, 5.0, 10.0]
                for wait_time in backoff_times:
                    time.sleep(wait_time)
-                    response = requests.get(url, headers=headers, timeout=self.timeout)
+                    retry_response = session.get(url, timeout=self.timeout, allow_redirects=True)
-                    if response.status_code != 202:
+                    if retry_response.status_code == 200:
                        logger.debug(f"Successfully read URL after retry: {url}")
                        return BeautifulSoup(retry_response.text, 'html.parser')
                    elif retry_response.status_code != 202:
                        break
-            
+                
-            # Raise for error status codes
+                logger.warning(f"Failed to read URL after retries: {url}")
-            response.raise_for_status()
+                return None
-            
+                
-            # Parse HTML
+            elif response.status_code in [301, 302, 307, 308]:
-            return BeautifulSoup(response.text, 'html.parser')
+                # Redirect - should be handled by allow_redirects=True
-            
+                logger.warning(f"Unexpected redirect status {response.status_code} for {url}")
                return None
            elif response.status_code == 403:
                # Forbidden - try with different user agent
                logger.warning(f"403 Forbidden for {url}, trying with different user agent...")
                headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
                session.headers.update(headers)
                retry_response = session.get(url, timeout=self.timeout, allow_redirects=True)
                if retry_response.status_code == 200:
                    logger.debug(f"Successfully read URL with different user agent: {url}")
                    return BeautifulSoup(retry_response.text, 'html.parser')
                else:
                    logger.error(f"Still getting {retry_response.status_code} for {url}")
                    return None
            elif response.status_code == 429:
                # Rate limited - wait and retry
                logger.warning(f"Rate limited for {url}, waiting 30 seconds...")
                time.sleep(30)
                retry_response = session.get(url, timeout=self.timeout, allow_redirects=True)
                if retry_response.status_code == 200:
                    logger.debug(f"Successfully read URL after rate limit: {url}")
                    return BeautifulSoup(retry_response.text, 'html.parser')
                else:
                    logger.error(f"Still getting {retry_response.status_code} after rate limit wait for {url}")
                    return None
            else:
                # Other error status codes
                logger.error(f"HTTP {response.status_code} for {url}")
                return None
        except requests.exceptions.Timeout:
            logger.error(f"Timeout reading URL: {url}")
            return None
        except requests.exceptions.ConnectionError:
            logger.error(f"Connection error reading URL: {url}")
            return None
        except requests.exceptions.RequestException as e:
            logger.error(f"Request error reading URL {url}: {str(e)}")
            return None
        except Exception as e:
-            logger.error(f"Error reading URL {url}: {str(e)}")
+            logger.error(f"Unexpected error reading URL {url}: {str(e)}")
            return None
    def _extractTitle(self, soup: BeautifulSoup, url: str) -> str:
@ -91,32 +157,109 @@ class MethodWeb(MethodBase):
        return title
-    def _extractMainContent(self, soup: BeautifulSoup, max_chars: int = 10000) -> str:
+    def _extractMainContent(self, soup: BeautifulSoup, max_chars: int = 50000) -> str:
-        """Extract the main content from an HTML page"""
+        """Extract the main content from an HTML page with enhanced content detection"""
        if not soup:
            return ""
-        # Try to find main content elements in priority order
+        # Try to find main content elements in priority order with more selectors
        main_content = None
-        for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
+        content_selectors = [
            'main',
            'article',
            '#content',
            '.content',
            '#main',
            '.main',
            '.post-content',
            '.entry-content',
            '.article-content',
            '.page-content',
            '[role="main"]',
            '.container',
            '.wrapper'
        ]
        for selector in content_selectors:
            content = soup.select_one(selector)
            if content:
                main_content = content
                logger.debug(f"Found main content using selector: {selector}")
                break
        # If no main content found, use the body
        if not main_content:
            main_content = soup.find('body') or soup
            logger.debug("Using body as main content")
-        # Remove script, style, nav, footer elements that don't contribute to main content
+        # Create a copy to avoid modifying the original
-        for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
+        content_copy = main_content.copy()
            element.extract()
-        # Extract text content
+        # Remove elements that don't contribute to main content (less aggressive)
-        text_content = main_content.get_text(separator=' ', strip=True)
+        elements_to_remove = [
            'script', 'style', 'noscript',
            'nav', 'footer', 'header', 'aside',
            '.sidebar', '#sidebar', '.comments', '#comments',
            '.advertisement', '.ads', '.ad', '.banner',
            'iframe', '.social-share', '.share-buttons',
            '.breadcrumb', '.breadcrumbs', '.pagination',
            '.related-posts', '.related-articles',
            '.newsletter', '.subscribe', '.signup',
            '.cookie-notice', '.privacy-notice',
            '.popup', '.modal', '.overlay'
        ]
-        # Limit to max_chars
+        for selector in elements_to_remove:
-        return text_content[:max_chars]
+            for element in content_copy.select(selector):
                element.extract()
        # Extract text content with better formatting
        text_content = content_copy.get_text(separator='\n', strip=True)
        # Clean up the text
        lines = text_content.split('\n')
        cleaned_lines = []
        for line in lines:
            line = line.strip()
            if line and len(line) > 10:  # Only keep meaningful lines
                cleaned_lines.append(line)
        # Join lines with proper spacing
        cleaned_content = '\n\n'.join(cleaned_lines)
        # If content is too short, try alternative extraction
        if len(cleaned_content) < 500:
            logger.debug("Content too short, trying alternative extraction...")
            # Try to extract from all paragraphs
            paragraphs = soup.find_all(['p', 'div', 'section'])
            alt_content = []
            for p in paragraphs:
                text = p.get_text(strip=True)
                if text and len(text) > 20:  # Only meaningful paragraphs
                    alt_content.append(text)
            if alt_content:
                cleaned_content = '\n\n'.join(alt_content[:20])  # Limit to first 20 paragraphs
        # Limit to max_chars but preserve complete sentences
        if len(cleaned_content) > max_chars:
            # Try to cut at a sentence boundary
            sentences = cleaned_content.split('. ')
            truncated_content = ""
            for sentence in sentences:
                if len(truncated_content + sentence) < max_chars:
                    truncated_content += sentence + ". "
                else:
                    break
            cleaned_content = truncated_content.strip()
        logger.debug(f"Extracted {len(cleaned_content)} characters of content")
        return cleaned_content
    def _checkAccessibility(self, soup: BeautifulSoup) -> Dict[str, Any]:
        """Check basic accessibility features"""
@ -214,10 +357,413 @@ class MethodWeb(MethodBase):
            }
        }
    def _detectJavaScriptRendering(self, soup: BeautifulSoup) -> bool:
        """Detect if a page likely requires JavaScript rendering"""
        if not soup:
            return False
        # Check for common indicators of JavaScript-rendered content
        indicators = [
            # Angular, React, Vue indicators
            soup.find('div', {'ng-app': True}),
            soup.find('div', {'id': 'root'}),
            soup.find('div', {'id': 'app'}),
            soup.find('div', {'id': 'react-root'}),
            # SPA indicators
            soup.find('div', {'id': 'spa-root'}),
            soup.find('div', {'class': 'spa-container'}),
            # Modern framework indicators
            soup.find('div', {'data-reactroot': True}),
            soup.find('div', {'data-ng-controller': True}),
            # Empty content with scripts
            len(soup.get_text(strip=True)) < 100 and len(soup.find_all('script')) > 2
        ]
        return any(indicators)
    def _extractMetaInformation(self, soup: BeautifulSoup, url: str) -> Dict[str, Any]:
        """Extract meta information from the page"""
        meta_info = {
            "url": url,
            "title": self._extractTitle(soup, url),
            "description": "",
            "keywords": "",
            "author": "",
            "language": "",
            "robots": "",
            "viewport": "",
            "charset": "",
            "canonical": ""
        }
        # Extract meta tags
        meta_tags = soup.find_all('meta')
        for meta in meta_tags:
            name = meta.get('name', '').lower()
            property = meta.get('property', '').lower()
            content = meta.get('content', '')
            if name == 'description' or property == 'og:description':
                meta_info['description'] = content
            elif name == 'keywords':
                meta_info['keywords'] = content
            elif name == 'author':
                meta_info['author'] = content
            elif name == 'language':
                meta_info['language'] = content
            elif name == 'robots':
                meta_info['robots'] = content
            elif name == 'viewport':
                meta_info['viewport'] = content
            elif property == 'og:title':
                meta_info['title'] = content
            elif property == 'og:url':
                meta_info['canonical'] = content
        # Extract charset
        charset_meta = soup.find('meta', charset=True)
        if charset_meta:
            meta_info['charset'] = charset_meta.get('charset', '')
        # Extract canonical URL
        canonical_link = soup.find('link', rel='canonical')
        if canonical_link:
            meta_info['canonical'] = canonical_link.get('href', '')
        return meta_info
    def _getAlternativeApproaches(self, url: str, requires_js: bool, content_length: int) -> List[str]:
        """Get alternative approaches for sites that are difficult to crawl"""
        approaches = []
        if requires_js:
            approaches.extend([
                "Site requires JavaScript rendering - consider using a headless browser",
                "Try accessing the site's API endpoints directly",
                "Look for RSS feeds or sitemaps",
                "Check if the site has a mobile version that's easier to parse"
            ])
        if content_length < 100:
            approaches.extend([
                "Site may have anti-bot protection - try with different user agents",
                "Check if the site requires authentication",
                "Look for alternative URLs (www vs non-www, http vs https)",
                "Try accessing the site's robots.txt for crawling guidelines"
            ])
        # Add general suggestions
        approaches.extend([
            "Use the web.search action to find alternative sources",
            "Try the web.scrape action with specific CSS selectors",
            "Check if the site has a public API or data export"
        ])
        return approaches
    async def _tryAdvancedAIWebResearch(self, action_type: str, parameters: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        """
        Try to get web research results using advanced AI first
        Args:
            action_type: Type of action ('crawl', 'scrape', or 'search')
            parameters: Action parameters
        Returns:
            Dict with AI results if successful, None if AI call fails
        """
        try:
            # Create appropriate prompt based on action type
            if action_type == "crawl":
                prompt = self._createCrawlAIPrompt(parameters)
            elif action_type == "scrape":
                prompt = self._createScrapeAIPrompt(parameters)
            elif action_type == "search":
                prompt = self._createSearchAIPrompt(parameters)
            else:
                logger.warning(f"Unknown action type for AI research: {action_type}")
                return None
            # Try advanced AI call
            if hasattr(self.service, 'callAiTextAdvanced'):
                logger.info(f"Attempting advanced AI web research for {action_type}")
                response = await self.service.callAiTextAdvanced(prompt)
                # Parse the AI response
                parsed_result = self._parseAIWebResponse(response, action_type)
                if parsed_result:
                    logger.info(f"Advanced AI web research successful for {action_type}")
                    return parsed_result
                else:
                    logger.warning(f"Failed to parse AI response for {action_type}")
                    return None
            else:
                logger.warning("Service does not have callAiTextAdvanced method")
                return None
        except Exception as e:
            logger.warning(f"Advanced AI web research failed for {action_type}: {str(e)}")
            return None
    def _createCrawlAIPrompt(self, parameters: Dict[str, Any]) -> str:
        """Create AI prompt for web crawling"""
        urls = parameters.get("urls", [])
        maxDepth = parameters.get("maxDepth", 2)
        includeImages = parameters.get("includeImages", False)
        followLinks = parameters.get("followLinks", True)
        prompt = f"""
 You are an advanced AI research assistant with comprehensive knowledge about websites, companies, and online content. Please provide detailed information about the following URLs based on your extensive training data and knowledge.
 URLs to research: {urls}
 Max depth: {maxDepth}
 Include images: {includeImages}
 Follow links: {followLinks}
 For each URL, please provide comprehensive information including:
 1. Company/organization information and background
 2. Main business activities and services
 3. Key personnel and leadership
 4. Contact information and locations
 5. Recent news and developments
 6. Industry analysis and market position
 7. Related companies and partnerships
 8. Website structure and key pages
 9. Business model and revenue streams
 10. Regulatory compliance and certifications
 For each URL, provide:
 - url: The original URL
 - title: Company/organization name
 - content: Comprehensive description and analysis
 - content_length: Number of characters in content
 - meta_info: Business information object
 - links: Related companies and important connections
 - images: Company logos or key visuals if known
 - requires_javascript: Boolean (usually false for static info)
 - alternative_approaches: Additional research suggestions
 - timestamp: Current timestamp
 Return the results in this exact JSON format:
 {{
    "urls": {urls},
    "maxDepth": {maxDepth},
    "includeImages": {includeImages},
    "followLinks": {followLinks},
    "crawlResults": [
        {{
            "url": "url_here",
            "depth": {maxDepth},
            "followLinks": {followLinks},
            "extractContent": true,
            "title": "company_name",
            "content": "comprehensive_company_analysis",
            "content_length": 1234,
            "meta_info": {{
                "url": "url_here",
                "title": "company_name",
                "description": "business_description",
                "keywords": "industry_keywords",
                "author": "company_info",
                "language": "language_code",
                "robots": "robots_info",
                "viewport": "viewport_info",
                "charset": "charset_info",
                "canonical": "canonical_url"
            }},
            "links": [
                {{
                    "url": "related_company_url",
                    "text": "company_name"
                }}
            ],
            "images": [
                {{
                    "src": "logo_url",
                    "alt": "company_logo",
                    "title": "company_name",
                    "width": "width_value",
                    "height": "height_value"
                }}
            ],
            "requires_javascript": false,
            "alternative_approaches": ["approach1", "approach2"],
            "timestamp": "2024-01-01T00:00:00Z"
        }}
    ],
    "summary": {{
        "total_urls": {len(urls)},
        "successful_crawls": 0,
        "failed_crawls": 0,
        "total_content_chars": 0
    }},
    "timestamp": "2024-01-01T00:00:00Z"
 }}
 Please provide accurate, comprehensive information about each company/organization based on your knowledge. If you don't have specific information about a URL, provide general industry analysis and suggest alternative research approaches.
 """
        return prompt
    def _createScrapeAIPrompt(self, parameters: Dict[str, Any]) -> str:
        """Create AI prompt for web scraping"""
        url = parameters.get("url")
        selectors = parameters.get("selectors", {})
        format = parameters.get("format", "json")
        prompt = f"""
 You are an advanced AI research assistant with comprehensive knowledge about websites, companies, and online content. Please provide detailed information about the following URL and the specific data requested based on your extensive training data and knowledge.
 URL to research: {url}
 Data selectors: {selectors}
 Output format: {format}
 Please provide comprehensive information including:
 1. Company/organization background and history
 2. Business activities and services offered
 3. Key personnel and leadership information
 4. Financial information and performance data
 5. Market position and competitive analysis
 6. Recent news and developments
 7. Contact information and locations
 8. Industry trends and insights
 9. Related companies and partnerships
 10. Regulatory and compliance information
 For each data selector requested, provide relevant information in the specified format (text, html, or json).
 Return the results in this exact JSON format:
 {{
    "url": "{url}",
    "selectors": {selectors},
    "format": "{format}",
    "scrapedData": {{
        "url": "{url}",
        "selectors": {selectors},
        "format": "{format}",
        "content": {{
            "company_info": ["comprehensive_company_analysis"],
            "business_activities": ["detailed_business_description"],
            "leadership": ["key_personnel_information"],
            "financial_data": ["financial_performance_analysis"],
            "market_position": ["competitive_analysis"],
            "recent_news": ["latest_developments"],
            "contact_info": ["contact_details"],
            "industry_insights": ["market_trends"],
            "partnerships": ["related_companies"],
            "compliance": ["regulatory_information"]
        }},
        "timestamp": "2024-01-01T00:00:00Z"
    }},
    "timestamp": "2024-01-01T00:00:00Z"
 }}
 Please provide accurate, comprehensive information about the company/organization based on your knowledge. If you don't have specific information about the URL, provide general industry analysis and suggest alternative research approaches.
 """
        return prompt
    def _createSearchAIPrompt(self, parameters: Dict[str, Any]) -> str:
        """Create AI prompt for web search"""
        query = parameters.get("query")
        engine = parameters.get("engine", "google")
        maxResults = parameters.get("maxResults", 10)
        filter = parameters.get("filter")
        prompt = f"""
 You are an advanced AI research assistant with comprehensive knowledge about companies, industries, and business information. Please provide detailed information about the following search query based on your extensive training data and knowledge.
 Search query: {query}
 Search engine: {engine}
 Max results: {maxResults}
 Filter: {filter}
 Please provide comprehensive research results including:
 1. Relevant company/organization information
 2. Industry analysis and market insights
 3. Key personnel and leadership details
 4. Business activities and services
 5. Financial performance and metrics
 6. Recent news and developments
 7. Competitive landscape analysis
 8. Market trends and opportunities
 9. Regulatory and compliance information
 10. Related companies and partnerships
 For each search result, provide:
 - title: Company/organization name
 - url: Official website or primary source
 - snippet: Brief description and key highlights
 - content: Comprehensive analysis and insights
 Return the results in this exact JSON format:
 {{
    "query": "{query}",
    "engine": "{engine}",
    "maxResults": {maxResults},
    "filter": "{filter}",
    "searchResults": {{
        "query": "{query}",
        "maxResults": {maxResults},
        "results": [
            {{
                "title": "company_name",
                "url": "official_website",
                "snippet": "brief_description",
                "content": "comprehensive_analysis"
            }}
        ],
        "totalFound": 0,
        "timestamp": "2024-01-01T00:00:00Z"
    }},
    "timestamp": "2024-01-01T00:00:00Z"
 }}
 Please provide accurate, comprehensive information about the search query based on your knowledge. If you don't have specific information about the query, provide general industry analysis and suggest alternative research approaches.
 """
        return prompt
    def _parseAIWebResponse(self, response: str, action_type: str) -> Optional[Dict[str, Any]]:
        """Parse AI response into structured data"""
        try:
            # Extract JSON from response
            json_start = response.find('{')
            json_end = response.rfind('}') + 1
            if json_start == -1 or json_end == 0:
                logger.warning(f"No JSON found in AI response: {response}")
                return None
            json_str = response[json_start:json_end]
            parsed_data = json.loads(json_str)
            # Validate basic structure based on action type
            if action_type == "crawl":
                if "crawlResults" not in parsed_data:
                    logger.warning("Invalid crawl response structure")
                    return None
            elif action_type == "scrape":
                if "scrapedData" not in parsed_data:
                    logger.warning("Invalid scrape response structure")
                    return None
            elif action_type == "search":
                if "searchResults" not in parsed_data:
                    logger.warning("Invalid search response structure")
                    return None
            return parsed_data
        except json.JSONDecodeError as e:
            logger.warning(f"Failed to parse AI response JSON: {str(e)}")
            return None
        except Exception as e:
            logger.warning(f"Error parsing AI response: {str(e)}")
            return None
    @action
    async def crawl(self, parameters: Dict[str, Any]) -> ActionResult:
        """
-        Crawl web pages and extract content
+        Crawl web pages and extract content with enhanced error handling and content detection
        Parameters:
            urls (List[str]): List of URLs to crawl
@ -240,23 +786,76 @@ class MethodWeb(MethodBase):
                    error="URLs are required"
                )
            # Try advanced AI research first
            ai_result = await self._tryAdvancedAIWebResearch("crawl", parameters)
            if ai_result:
                logger.info("Using advanced AI web research for crawl")
                # Reconstruct the result data from the AI response
                result_data = {
                    "urls": ai_result.get("urls", []),
                    "maxDepth": ai_result.get("maxDepth", 2),
                    "includeImages": ai_result.get("includeImages", False),
                    "followLinks": ai_result.get("followLinks", True),
                    "crawlResults": ai_result.get("crawlResults", []),
                    "summary": ai_result.get("summary", {}),
                    "timestamp": ai_result.get("timestamp", datetime.now(UTC).isoformat())
                }
                return self._createResult(
                    success=True,
                    data={
                        "documents": [
                            {
                                "documentName": f"web_crawl_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
                                "documentData": result_data,
                                "mimeType": "application/json"
                            }
                        ]
                    }
                )
            else:
                logger.info("Advanced AI web research failed, falling back to regular web crawling")
            # Crawl each URL
            crawl_results = []
            for url in urls:
                try:
-                    # Read the URL
+                    logger.info(f"Crawling URL: {url}")
                    # Read the URL with enhanced error handling
                    soup = self._readUrl(url)
                    if not soup:
                        logger.error(f"Failed to read URL: {url}")
                        crawl_results.append({
-                            "error": "Failed to read URL",
+                            "error": "Failed to read URL - check if the site is accessible and not blocking crawlers",
-                            "url": url
+                            "url": url,
                            "suggestions": [
                                "Try accessing the URL directly in a browser",
                                "Check if the site requires JavaScript",
                                "Verify the URL is correct and accessible"
                            ]
                        })
                        continue
-                    # Extract basic information
+                    # Extract comprehensive information
                    title = self._extractTitle(soup, url)
-                    content = self._extractMainContent(soup) if True else ""
+                    content = self._extractMainContent(soup)
                    meta_info = self._extractMetaInformation(soup, url)
                    # Check if content is meaningful
                    content_length = len(content)
                    if content_length < 100:
                        logger.warning(f"Very little content extracted from {url} ({content_length} chars)")
                        crawl_results.append({
                            "url": url,
                            "title": title,
                            "content": content,
                            "content_length": content_length,
                            "warning": "Very little content extracted - site may require JavaScript or have anti-bot protection",
                            "meta_info": meta_info,
                            "timestamp": datetime.now(UTC).isoformat()
                        })
                        continue
                    # Extract links if requested
                    links = []
@ -264,21 +863,32 @@ class MethodWeb(MethodBase):
                        for link in soup.find_all('a', href=True):
                            href = link.get('href')
                            if href and href.startswith(('http://', 'https://')):
-                                links.append({
+                                link_text = link.get_text(strip=True)
-                                    'url': href,
+                                if link_text:  # Only include links with text
-                                    'text': link.get_text(strip=True)[:100]
+                                    links.append({
                                        'url': href,
                                        'text': link_text[:100]
                                    })
                    # Extract images if requested
                    images = []
                    if includeImages:
                        for img in soup.find_all('img', src=True):
                            src = img.get('src')
                            if src:
                                images.append({
                                    'src': src,
                                    'alt': img.get('alt', ''),
                                    'title': img.get('title', ''),
                                    'width': img.get('width', ''),
                                    'height': img.get('height', '')
                                })
-                    # Extract images
+                    # Check for JavaScript rendering requirements
-                    images = []
+                    requires_js = self._detectJavaScriptRendering(soup)
-                    for img in soup.find_all('img', src=True):
+                    
-                        src = img.get('src')
+                    # Get alternative approaches if needed
-                        if src:
+                    alternative_approaches = self._getAlternativeApproaches(url, requires_js, content_length)
                            images.append({
                                'src': src,
                                'alt': img.get('alt', ''),
                                'title': img.get('title', '')
                            })
                    crawl_results.append({
                        "url": url,
@ -287,16 +897,27 @@ class MethodWeb(MethodBase):
                        "extractContent": True,
                        "title": title,
                        "content": content,
-                        "links": links[:10],  # Limit to first 10 links
+                        "content_length": content_length,
-                        "images": images[:10],  # Limit to first 10 images
+                        "meta_info": meta_info,
                        "links": links[:20],  # Limit to first 20 links
                        "images": images[:20],  # Limit to first 20 images
                        "requires_javascript": requires_js,
                        "alternative_approaches": alternative_approaches,
                        "timestamp": datetime.now(UTC).isoformat()
                    })
                    logger.info(f"Successfully crawled {url} - extracted {content_length} characters")
                except Exception as e:
                    logger.error(f"Error crawling web page {url}: {str(e)}")
                    crawl_results.append({
                        "error": str(e),
-                        "url": url
+                        "url": url,
                        "suggestions": [
                            "Check if the URL is accessible",
                            "Try with a different user agent",
                            "Verify the site doesn't block automated access"
                        ]
                    })
            # Create result data
@ -306,6 +927,12 @@ class MethodWeb(MethodBase):
                "includeImages": includeImages,
                "followLinks": followLinks,
                "crawlResults": crawl_results,
                "summary": {
                    "total_urls": len(urls),
                    "successful_crawls": len([r for r in crawl_results if "error" not in r]),
                    "failed_crawls": len([r for r in crawl_results if "error" in r]),
                    "total_content_chars": sum([r.get("content_length", 0) for r in crawl_results if "content_length" in r])
                },
                "timestamp": datetime.now(UTC).isoformat()
            }
@ -367,6 +994,33 @@ class MethodWeb(MethodBase):
                    error="URL and selectors are required"
                )
            # Try advanced AI research first
            ai_result = await self._tryAdvancedAIWebResearch("scrape", parameters)
            if ai_result:
                logger.info("Using advanced AI web research for scrape")
                # Reconstruct the result data from the AI response
                result_data = {
                    "url": ai_result.get("url"),
                    "selectors": ai_result.get("selectors"),
                    "format": ai_result.get("format"),
                    "scrapedData": ai_result.get("scrapedData"),
                    "timestamp": ai_result.get("timestamp", datetime.now(UTC).isoformat())
                }
                return self._createResult(
                    success=True,
                    data={
                        "documents": [
                            {
                                "documentName": f"web_scrape_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
                                "documentData": result_data,
                                "mimeType": "application/json"
                            }
                        ]
                    }
                )
            else:
                logger.info("Advanced AI web research failed, falling back to regular web scraping")
            # Read the URL
            soup = self._readUrl(url)
            if not soup:
@ -478,6 +1132,34 @@ class MethodWeb(MethodBase):
                    error="Search query is required"
                )
            # Try advanced AI research first
            ai_result = await self._tryAdvancedAIWebResearch("search", parameters)
            if ai_result:
                logger.info("Using advanced AI web research for search")
                # Reconstruct the result data from the AI response
                result_data = {
                    "query": ai_result.get("query"),
                    "engine": ai_result.get("engine"),
                    "maxResults": ai_result.get("maxResults"),
                    "filter": ai_result.get("filter"),
                    "searchResults": ai_result.get("searchResults"),
                    "timestamp": ai_result.get("timestamp", datetime.now(UTC).isoformat())
                }
                return self._createResult(
                    success=True,
                    data={
                        "documents": [
                            {
                                "documentName": f"web_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
                                "documentData": result_data,
                                "mimeType": "application/json"
                            }
                        ]
                    }
                )
            else:
                logger.info("Advanced AI web research failed, falling back to regular web search")
            # Search web content using Google search via SerpAPI
            try:
                if not self.srcApikey:
@ -601,94 +1283,3 @@ class MethodWeb(MethodBase):
                error=str(e)
            )
    @action
    async def validate(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Validate web pages for various criteria
        Parameters:
            url (str): URL to validate
            checks (List[str], optional): Types of checks to perform (default: ["accessibility", "seo", "performance"])
            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
        """
        try:
            url = parameters.get("url")
            checks = parameters.get("checks", ["accessibility", "seo", "performance"])
            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
            if not url:
                return self._createResult(
                    success=False,
                    data={},
                    error="URL is required"
                )
            # Read the URL
            soup = self._readUrl(url)
            if not soup:
                return self._createResult(
                    success=False,
                    data={},
                    error="Failed to read URL"
                )
            validation_results = {}
            for check in checks:
                if check == "accessibility":
                    validation_results["accessibility"] = self._checkAccessibility(soup)
                elif check == "seo":
                    validation_results["seo"] = self._checkSEO(soup)
                elif check == "performance":
                    validation_results["performance"] = self._checkPerformance(soup, url)
                else:
                    validation_results[check] = {"status": "unknown", "message": f"Unknown check type: {check}"}
            validation_result = {
                "url": url,
                "checks": checks,
                "results": validation_results,
                "timestamp": datetime.now(UTC).isoformat()
            }
            # Create result data
            result_data = {
                "url": url,
                "checks": checks,
                "validationResult": validation_result,
                "timestamp": datetime.now(UTC).isoformat()
            }
            # Determine output format based on expected formats
            output_extension = ".json"  # Default
            output_mime_type = "application/json"  # Default
            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
                # Use the first expected format
                expected_format = expectedDocumentFormats[0]
                output_extension = expected_format.get("extension", ".json")
                output_mime_type = expected_format.get("mimeType", "application/json")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")
            return self._createResult(
                success=True,
                data={
                    "documents": [
                        {
                            "documentName": f"web_validation_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
                            "documentData": result_data,
                            "mimeType": output_mime_type
                        }
                    ]
                }
            )
        except Exception as e:
            logger.error(f"Error validating web page: {str(e)}")
            return self._createResult(
                success=False,
                data={},
                error=str(e)
            ) 
--- a/modules/workflow/managerWorkflow.py
+++ b/modules/workflow/managerWorkflow.py
@ -170,7 +170,7 @@ class WorkflowManager:
            # Add completion log entry
            self.chatInterface.createWorkflowLog({
                "workflowId": workflow.id,
-                "message": "Workflow completed successfully",
+                "message": "Workflow completed",
                "type": "success",
                "status": "completed",
                "progress": 100