web service parallelized

2025-12-30 10:48:16 +01:00 · 2025-12-30 10:48:16 +01:00 · cb7ed7cf51
commit cb7ed7cf51
parent 0d77263fb7
3 changed files with 285 additions and 163 deletions
--- a/modules/services/serviceAi/subAiCallLooping.py
+++ b/modules/services/serviceAi/subAiCallLooping.py
@ -122,10 +122,14 @@ class AiCallLooper:
                )
                
                # Write the ACTUAL prompt sent to AI
-                if iteration == 1:
-                    self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
-                else:
-                    self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
+                # For section content generation: only write one prompt file (first iteration)
+                # For document generation: write prompt for each iteration
+                isSectionContent = "_section_" in debugPrefix
+                if iteration == 1 or not isSectionContent:
+                    if iteration == 1:
+                        self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
+                    elif not isSectionContent:
+                        self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
                
                response = await self.aiService.callAi(request)
                result = response.content
@ -146,10 +150,13 @@ class AiCallLooper:
                    self.services.chat.progressLogUpdate(iterationOperationId, 0.6, f"AI response received ({bytesDisplay})")
                
                # Write raw AI response to debug file
-                if iteration == 1:
-                    self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
-                else:
-                    self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
+                # For section content generation: only write one response file (first iteration)
+                # For document generation: write response for each iteration
+                if iteration == 1 or not isSectionContent:
+                    if iteration == 1:
+                        self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
+                    elif not isSectionContent:
+                        self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
                
                # Emit stats for this iteration (only if workflow exists and has id)
                if self.services.workflow and hasattr(self.services.workflow, 'id') and self.services.workflow.id:
@ -219,9 +226,9 @@ class AiCallLooper:
                    logger.info(f"Iteration {iteration}: Section content generation detected (elements found), returning JSON directly")
                    if iterationOperationId:
                        self.services.chat.progressLogFinish(iterationOperationId, True)
-                    # Write final result
+                    # Note: Debug files (_prompt and _response) are already written above for iteration 1
+                    # No need to write _final_result as it's redundant with _response
                    final_json = json.dumps(parsedJsonForSection, indent=2, ensure_ascii=False) if parsedJsonForSection else (extractedJsonForSection or result)
-                    self.services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result")
                    return final_json
                
                # Extract sections from response (handles both valid and broken JSON)
@ -397,7 +404,10 @@ class AiCallLooper:
                    self.services.chat.progressLogUpdate(operationId, estimatedProgress, f"Pipeline: {bytesDisplay} (iteration {iteration})")
                
                # Log merged sections for debugging
-                self.services.utils.writeDebugFile(merged_json_str, f"{debugPrefix}_merged_sections_iteration_{iteration}")
+                # For section content generation: skip merged sections debug files (only one prompt/response needed)
+                isSectionContent = "_section_" in debugPrefix
+                if not isSectionContent:
+                    self.services.utils.writeDebugFile(merged_json_str, f"{debugPrefix}_merged_sections_iteration_{iteration}")
                
                # Check if we should continue (completion detection)
                # Simple logic: JSON completeness determines continuation
@ -465,7 +475,10 @@ class AiCallLooper:
        final_result = self.responseParser.buildFinalResultFromSections(allSections, documentMetadata)
        
        # Write final result to debug file
-        self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
+        # For section content generation: skip final_result debug file (response already written)
+        isSectionContent = "_section_" in debugPrefix
+        if not isSectionContent:
+            self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
        
        return final_result
    
--- a/modules/services/serviceAi/subStructureFilling.py
+++ b/modules/services/serviceAi/subStructureFilling.py
@ -537,11 +537,6 @@ class StructureFiller:
                        
                        try:
                            self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt")
-                            self.services.utils.writeDebugFile(
-                                generationPrompt,
-                                f"{chapterId}_section_{sectionId}_prompt"
-                            )
-                            logger.debug(f"Logged section prompt: {chapterId}_section_{sectionId}_prompt (aggregation)")
                            
                            self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation")
                            
@ -553,6 +548,12 @@ class StructureFiller:
                                    logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters")
                                    generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0]
                                
+                                # Write debug file for IMAGE_GENERATE (direct callAi, no _callAiWithLooping)
+                                self.services.utils.writeDebugFile(
+                                    generationPrompt,
+                                    f"{chapterId}_section_{sectionId}_prompt"
+                                )
+                                
                                request = AiCallRequest(
                                    prompt=generationPrompt,
                                    contentParts=[],
@ -564,6 +565,12 @@ class StructureFiller:
                                )
                                aiResponse = await self.aiService.callAi(request)
                                generatedElements = []
+                                
+                                # Write debug file for IMAGE_GENERATE response (direct callAi, no _callAiWithLooping)
+                                self.services.utils.writeDebugFile(
+                                    aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse),
+                                    f"{chapterId}_section_{sectionId}_response"
+                                )
                            else:
                                async def buildSectionPromptWithContinuation(
                                    section: Dict[str, Any],
@ -665,11 +672,7 @@ The JSON should be a fragment that can be merged with the previous response."""
                                    generatedElements = []
                            
                            self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response")
-                            self.services.utils.writeDebugFile(
-                                aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse),
-                                f"{chapterId}_section_{sectionId}_response"
-                            )
-                            logger.debug(f"Logged section response: {chapterId}_section_{sectionId}_response (aggregation)")
+                            # Note: Debug files are written by _callAiWithLooping using debugPrefix
                            
                            self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content")
                            
@ -735,11 +738,6 @@ The JSON should be a fragment that can be merged with the previous response."""
                    
                    try:
                        self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt")
-                        self.services.utils.writeDebugFile(
-                            generationPrompt,
-                            f"{chapterId}_section_{sectionId}_prompt"
-                        )
-                        logger.debug(f"Logged section prompt: {chapterId}_section_{sectionId}_prompt")
                        
                        self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation")
                        
@ -751,6 +749,12 @@ The JSON should be a fragment that can be merged with the previous response."""
                                logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters")
                                generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0]
                            
+                            # Write debug file for IMAGE_GENERATE (direct callAi, no _callAiWithLooping)
+                            self.services.utils.writeDebugFile(
+                                generationPrompt,
+                                f"{chapterId}_section_{sectionId}_prompt"
+                            )
+                            
                            request = AiCallRequest(
                                prompt=generationPrompt,
                                contentParts=[],
@ -762,6 +766,12 @@ The JSON should be a fragment that can be merged with the previous response."""
                            )
                            aiResponse = await self.aiService.callAi(request)
                            generatedElements = []
+                            
+                            # Write debug file for IMAGE_GENERATE response (direct callAi, no _callAiWithLooping)
+                            self.services.utils.writeDebugFile(
+                                aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse),
+                                f"{chapterId}_section_{sectionId}_response"
+                            )
                        else:
                            isAggregation = False
                            
@ -865,11 +875,7 @@ The JSON should be a fragment that can be merged with the previous response."""
                                generatedElements = []
                        
                        self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response")
-                        self.services.utils.writeDebugFile(
-                            aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse),
-                            f"{chapterId}_section_{sectionId}_response"
-                        )
-                        logger.debug(f"Logged section response: {chapterId}_section_{sectionId}_response")
+                        # Note: Debug files are written by _callAiWithLooping using debugPrefix
                        
                        self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content")
                        
@ -968,11 +974,6 @@ The JSON should be a fragment that can be merged with the previous response."""
                            
                            try:
                                self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt")
-                                self.services.utils.writeDebugFile(
-                                    generationPrompt,
-                                    f"{chapterId}_section_{sectionId}_prompt"
-                                )
-                                logger.debug(f"Logged section prompt: {chapterId}_section_{sectionId}_prompt")
                                
                                self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation")
                                
@ -984,6 +985,12 @@ The JSON should be a fragment that can be merged with the previous response."""
                                        logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters")
                                        generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0]
                                    
+                                    # Write debug file for IMAGE_GENERATE (direct callAi, no _callAiWithLooping)
+                                    self.services.utils.writeDebugFile(
+                                        generationPrompt,
+                                        f"{chapterId}_section_{sectionId}_prompt"
+                                    )
+                                    
                                    request = AiCallRequest(
                                        prompt=generationPrompt,
                                        contentParts=[],
@ -995,6 +1002,12 @@ The JSON should be a fragment that can be merged with the previous response."""
                                    )
                                    aiResponse = await self.aiService.callAi(request)
                                    generatedElements = []
+                                    
+                                    # Write debug file for IMAGE_GENERATE response (direct callAi, no _callAiWithLooping)
+                                    self.services.utils.writeDebugFile(
+                                        aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse),
+                                        f"{chapterId}_section_{sectionId}_response"
+                                    )
                                else:
                                    isAggregation = False
                                    
@ -1098,11 +1111,7 @@ The JSON should be a fragment that can be merged with the previous response."""
                                        generatedElements = []
                                
                                self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response")
-                                self.services.utils.writeDebugFile(
-                                    aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse),
-                                    f"{chapterId}_section_{sectionId}_response"
-                                )
-                                logger.debug(f"Logged section response: {chapterId}_section_{sectionId}_response")
+                                # Note: Debug files are written by _callAiWithLooping using debugPrefix
                                
                                self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content")
                                
--- a/modules/services/serviceWeb/mainServiceWeb.py
+++ b/modules/services/serviceWeb/mainServiceWeb.py
@ -8,6 +8,8 @@ Manages the two-step process: WEB_SEARCH then WEB_CRAWL.
 import json
 import logging
 import time
+import asyncio
+from urllib.parse import urlparse
 from typing import Dict, Any, List, Optional
 from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptWebSearch, AiCallPromptWebCrawl

@ -99,12 +101,18 @@ class WebService:
                
                self.services.chat.progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
            
-            # Step 3: Filter to maxNumberPages (simple cut, no intelligent filtering)
-            if len(allUrls) > maxNumberPages:
-                allUrls = allUrls[:maxNumberPages]
+            # Step 3: Validate and filter URLs before crawling
+            validatedUrls = self._validateUrls(allUrls)
+            if not validatedUrls:
+                logger.warning(f"All {len(allUrls)} URLs failed validation")
+                return {"error": "No valid URLs found to crawl"}
+            
+            # Filter to maxNumberPages (simple cut, no intelligent filtering)
+            if len(validatedUrls) > maxNumberPages:
+                validatedUrls = validatedUrls[:maxNumberPages]
                logger.info(f"Limited URLs to {maxNumberPages}")
            
-            if not allUrls:
+            if not validatedUrls:
                return {"error": "No URLs found to crawl"}
            
            # Step 4: Translate researchDepth to maxDepth
@ -114,14 +122,14 @@ class WebService:
            # Step 5: Crawl all URLs with hierarchical logging
            if operationId:
                self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
-                self.services.chat.progressLogUpdate(operationId, 0.6, f"Crawling {len(allUrls)} URLs")
+                self.services.chat.progressLogUpdate(operationId, 0.6, f"Crawling {len(validatedUrls)} URLs")
            
            # Use parent operation ID directly (parentId should be operationId, not log entry ID)
            parentOperationId = operationId  # Use the parent's operationId directly
            
            crawlResult = await self._performWebCrawl(
                instruction=instruction,
-                urls=allUrls,
+                urls=validatedUrls,
                maxDepth=maxDepth,
                parentOperationId=parentOperationId
            )
@ -194,24 +202,24 @@ class WebService:
                    "max_depth": maxDepth,
                    "country": countryCode,
                    "language": languageCode,
-                    "urls_crawled": allUrls[:20],  # First 20 URLs for reference
-                    "total_urls": len(allUrls),
+                    "urls_crawled": validatedUrls[:20],  # First 20 URLs for reference
+                    "total_urls": len(validatedUrls),
                    "urls_with_content": urlsWithContent,
                    "total_content_length": totalContentLength,
                    "crawl_date": self.services.utils.timestampGetUtc() if hasattr(self.services, 'utils') else None
                },
                "sections": sections,
                "statistics": {
-                    "sectionCount": len(sections),
-                    "total_urls": len(allUrls),
+                "sectionCount": len(sections),
+                "total_urls": len(validatedUrls),
                    "results_count": totalResults,
                    "urls_with_content": urlsWithContent,
                    "total_content_length": totalContentLength
                },
                # Keep original structure for backward compatibility
                "instruction": instruction,
-                "urls_crawled": allUrls,
-                "total_urls": len(allUrls),
+                "urls_crawled": validatedUrls,
+                "total_urls": len(validatedUrls),
                "results": crawlResult,
                "total_results": totalResults
            }
@ -383,6 +391,50 @@ Return ONLY valid JSON, no additional text:
            logger.error(f"Error in web search: {str(e)}")
            return []
    
+    def _validateUrls(self, urls: List[str]) -> List[str]:
+        """
+        Validate URLs before crawling - filters out invalid URLs.
+        
+        Args:
+            urls: List of URLs to validate
+            
+        Returns:
+            List of valid URLs
+        """
+        validatedUrls = []
+        for url in urls:
+            if not url or not isinstance(url, str):
+                logger.debug(f"Skipping invalid URL (not a string): {url}")
+                continue
+            
+            url = url.strip()
+            if not url:
+                logger.debug(f"Skipping empty URL")
+                continue
+            
+            # Basic URL validation using urlparse
+            try:
+                parsed = urlparse(url)
+                # Check if URL has at least scheme and netloc
+                if not parsed.scheme or not parsed.netloc:
+                    logger.debug(f"Skipping invalid URL (missing scheme or netloc): {url}")
+                    continue
+                
+                # Only allow http/https schemes
+                if parsed.scheme not in ['http', 'https']:
+                    logger.debug(f"Skipping URL with unsupported scheme '{parsed.scheme}': {url}")
+                    continue
+                
+                validatedUrls.append(url)
+                logger.debug(f"Validated URL: {url}")
+                
+            except Exception as e:
+                logger.warning(f"Error validating URL '{url}': {str(e)}")
+                continue
+        
+        logger.info(f"Validated {len(validatedUrls)}/{len(urls)} URLs")
+        return validatedUrls
+    
    async def _performWebCrawl(
        self,
        instruction: str,
@ -390,117 +442,165 @@ Return ONLY valid JSON, no additional text:
        maxDepth: int = 2,
        parentOperationId: Optional[str] = None
        ) -> List[Dict[str, Any]]:
-        """Perform web crawl on list of URLs - calls plugin for each URL individually."""
-        crawlResults = []
-        
-        # Loop over each URL and crawl one at a time
+        """Perform web crawl on list of URLs - crawls URLs in parallel for better performance."""
+        # Create tasks for parallel crawling
+        crawlTasks = []
        for urlIndex, url in enumerate(urls):
-            # Create separate operation for each URL with parent reference
-            urlOperationId = None
-            if parentOperationId:
-                workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
-                urlOperationId = f"web_crawl_url_{workflowId}_{urlIndex}_{int(time.time())}"
-                self.services.chat.progressLogStart(
-                    urlOperationId,
-                    "Web Crawl",
-                    f"URL {urlIndex + 1}",
-                    url[:50] + "..." if len(url) > 50 else url,
-                    parentOperationId=parentOperationId
-                )
-            
-            try:
-                logger.info(f"Crawling URL {urlIndex + 1}/{len(urls)}: {url}")
-                
-                if urlOperationId:
-                    displayUrl = url[:50] + "..." if len(url) > 50 else url
-                    self.services.chat.progressLogUpdate(urlOperationId, 0.2, f"Crawling: {displayUrl}")
-                    self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating crawl")
-                
-                # Build crawl prompt model for single URL
-                crawlPromptModel = AiCallPromptWebCrawl(
-                    instruction=instruction,
-                    url=url,  # Single URL
-                    maxDepth=maxDepth,
-                    maxWidth=5  # Default: 5 pages per level
-                )
-                crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)
-                
-                # Debug: persist crawl prompt (with URL identifier in content for clarity)
-                debugPrompt = f"URL: {url}\n\n{crawlPrompt}"
-                self.services.utils.writeDebugFile(debugPrompt, "webcrawl_prompt")
-                
-                # Call AI with WEB_CRAWL operation
-                crawlOptions = AiCallOptions(
-                    operationType=OperationTypeEnum.WEB_CRAWL,
-                    resultFormat="json"
-                )
-                
-                if urlOperationId:
-                    self.services.chat.progressLogUpdate(urlOperationId, 0.4, "Calling crawl connector")
-                
-                # Use unified callAiContent method with parentOperationId for hierarchical logging
-                crawlResponse = await self.services.ai.callAiContent(
-                    prompt=crawlPrompt,
-                    options=crawlOptions,
-                    outputFormat="json",
-                    parentOperationId=urlOperationId  # Pass URL operation ID as parent for sub-URL logging
-                )
-                
-                if urlOperationId:
-                    self.services.chat.progressLogUpdate(urlOperationId, 0.7, "Processing crawl results")
-                
-                # Extract content from AiResponse
-                crawlResult = crawlResponse.content
-                
-                # Debug: persist crawl response
-                if isinstance(crawlResult, str):
-                    self.services.utils.writeDebugFile(crawlResult, "webcrawl_response")
-                else:
-                    self.services.utils.writeDebugFile(json.dumps(crawlResult, indent=2), "webcrawl_response")
-                
-                # Parse crawl result
-                if isinstance(crawlResult, str):
-                    try:
-                        # Extract JSON from response (handles markdown code blocks)
-                        extractedJson = self.services.utils.jsonExtractString(crawlResult)
-                        crawlData = json.loads(extractedJson) if extractedJson else json.loads(crawlResult)
-                    except:
-                        crawlData = {"url": url, "content": crawlResult}
-                else:
-                    crawlData = crawlResult
-                
-                # Process crawl results and create hierarchical progress logging for sub-URLs
-                if urlOperationId:
-                    self.services.chat.progressLogUpdate(urlOperationId, 0.8, "Processing crawl results")
-                
-                # Recursively process crawl results to find nested URLs and create child operations
-                processedResults = self._processCrawlResultsWithHierarchy(crawlData, url, urlOperationId, maxDepth, 0)
-                
-                # Count total URLs crawled (including sub-URLs) for progress message
-                totalUrlsCrawled = self._countUrlsInResults(processedResults)
-                
-                # Ensure it's a list of results
-                if isinstance(processedResults, list):
-                    crawlResults.extend(processedResults)
-                elif isinstance(processedResults, dict):
-                    crawlResults.append(processedResults)
-                else:
-                    crawlResults.append({"url": url, "content": str(processedResults)})
-                
-                if urlOperationId:
-                    if totalUrlsCrawled > 1:
-                        self.services.chat.progressLogUpdate(urlOperationId, 0.9, f"Crawled {totalUrlsCrawled} URLs (including sub-URLs)")
-                    else:
-                        self.services.chat.progressLogUpdate(urlOperationId, 0.9, "Crawl completed")
-                    self.services.chat.progressLogFinish(urlOperationId, True)
-                    
-            except Exception as e:
-                logger.error(f"Error crawling URL {url}: {str(e)}")
-                if urlOperationId:
-                    self.services.chat.progressLogFinish(urlOperationId, False)
-                crawlResults.append({"url": url, "error": str(e)})
+            task = self._crawlSingleUrl(
+                url=url,
+                urlIndex=urlIndex,
+                totalUrls=len(urls),
+                instruction=instruction,
+                maxDepth=maxDepth,
+                parentOperationId=parentOperationId
+            )
+            crawlTasks.append(task)
        
-        return crawlResults
+        # Execute all crawl tasks in parallel
+        logger.info(f"Starting parallel crawl of {len(urls)} URLs")
+        crawlResults = await asyncio.gather(*crawlTasks, return_exceptions=True)
+        
+        # Process results and handle exceptions
+        processedResults = []
+        for idx, result in enumerate(crawlResults):
+            if isinstance(result, Exception):
+                logger.error(f"Error crawling URL {urls[idx]}: {str(result)}")
+                processedResults.append({"url": urls[idx], "error": str(result)})
+            else:
+                processedResults.extend(result if isinstance(result, list) else [result])
+        
+        logger.info(f"Completed parallel crawl: {len(processedResults)} results")
+        return processedResults
+    
+    async def _crawlSingleUrl(
+        self,
+        url: str,
+        urlIndex: int,
+        totalUrls: int,
+        instruction: str,
+        maxDepth: int,
+        parentOperationId: Optional[str] = None
+        ) -> List[Dict[str, Any]]:
+        """
+        Crawl a single URL - called in parallel for multiple URLs.
+        
+        Args:
+            url: URL to crawl
+            urlIndex: Index of URL in the list
+            totalUrls: Total number of URLs being crawled
+            instruction: Research instruction
+            maxDepth: Maximum crawl depth
+            parentOperationId: Parent operation ID for progress tracking
+            
+        Returns:
+            List of crawl results for this URL
+        """
+        # Create separate operation for each URL with parent reference
+        urlOperationId = None
+        if parentOperationId:
+            workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
+            urlOperationId = f"web_crawl_url_{workflowId}_{urlIndex}_{int(time.time())}"
+            self.services.chat.progressLogStart(
+                urlOperationId,
+                "Web Crawl",
+                f"URL {urlIndex + 1}/{totalUrls}",
+                url[:50] + "..." if len(url) > 50 else url,
+                parentOperationId=parentOperationId
+            )
+        
+        try:
+            logger.info(f"Crawling URL {urlIndex + 1}/{totalUrls}: {url}")
+            
+            if urlOperationId:
+                displayUrl = url[:50] + "..." if len(url) > 50 else url
+                self.services.chat.progressLogUpdate(urlOperationId, 0.2, f"Crawling: {displayUrl}")
+                self.services.chat.progressLogUpdate(urlOperationId, 0.3, "Initiating crawl")
+            
+            # Build crawl prompt model for single URL
+            crawlPromptModel = AiCallPromptWebCrawl(
+                instruction=instruction,
+                url=url,  # Single URL
+                maxDepth=maxDepth,
+                maxWidth=5  # Default: 5 pages per level
+            )
+            crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)
+            
+            # Debug: persist crawl prompt (with URL identifier in content for clarity)
+            debugPrompt = f"URL: {url}\n\n{crawlPrompt}"
+            self.services.utils.writeDebugFile(debugPrompt, "webcrawl_prompt")
+            
+            # Call AI with WEB_CRAWL operation
+            crawlOptions = AiCallOptions(
+                operationType=OperationTypeEnum.WEB_CRAWL,
+                resultFormat="json"
+            )
+            
+            if urlOperationId:
+                self.services.chat.progressLogUpdate(urlOperationId, 0.4, "Calling crawl connector")
+            
+            # Use unified callAiContent method with parentOperationId for hierarchical logging
+            crawlResponse = await self.services.ai.callAiContent(
+                prompt=crawlPrompt,
+                options=crawlOptions,
+                outputFormat="json",
+                parentOperationId=urlOperationId  # Pass URL operation ID as parent for sub-URL logging
+            )
+            
+            if urlOperationId:
+                self.services.chat.progressLogUpdate(urlOperationId, 0.7, "Processing crawl results")
+            
+            # Extract content from AiResponse
+            crawlResult = crawlResponse.content
+            
+            # Debug: persist crawl response
+            if isinstance(crawlResult, str):
+                self.services.utils.writeDebugFile(crawlResult, "webcrawl_response")
+            else:
+                self.services.utils.writeDebugFile(json.dumps(crawlResult, indent=2), "webcrawl_response")
+            
+            # Parse crawl result
+            if isinstance(crawlResult, str):
+                try:
+                    # Extract JSON from response (handles markdown code blocks)
+                    extractedJson = self.services.utils.jsonExtractString(crawlResult)
+                    crawlData = json.loads(extractedJson) if extractedJson else json.loads(crawlResult)
+                except:
+                    crawlData = {"url": url, "content": crawlResult}
+            else:
+                crawlData = crawlResult
+            
+            # Process crawl results and create hierarchical progress logging for sub-URLs
+            if urlOperationId:
+                self.services.chat.progressLogUpdate(urlOperationId, 0.8, "Processing crawl results")
+            
+            # Recursively process crawl results to find nested URLs and create child operations
+            processedResults = self._processCrawlResultsWithHierarchy(crawlData, url, urlOperationId, maxDepth, 0)
+            
+            # Count total URLs crawled (including sub-URLs) for progress message
+            totalUrlsCrawled = self._countUrlsInResults(processedResults)
+            
+            # Ensure it's a list of results
+            if isinstance(processedResults, list):
+                results = processedResults
+            elif isinstance(processedResults, dict):
+                results = [processedResults]
+            else:
+                results = [{"url": url, "content": str(processedResults)}]
+            
+            if urlOperationId:
+                if totalUrlsCrawled > 1:
+                    self.services.chat.progressLogUpdate(urlOperationId, 0.9, f"Crawled {totalUrlsCrawled} URLs (including sub-URLs)")
+                else:
+                    self.services.chat.progressLogUpdate(urlOperationId, 0.9, "Crawl completed")
+                self.services.chat.progressLogFinish(urlOperationId, True)
+            
+            return results
+                
+        except Exception as e:
+            logger.error(f"Error crawling URL {url}: {str(e)}")
+            if urlOperationId:
+                self.services.chat.progressLogFinish(urlOperationId, False)
+            return [{"url": url, "error": str(e)}]
    
    def _processCrawlResultsWithHierarchy(
        self,