846 lines
36 KiB
Python
846 lines
36 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
AI Models Test - Tests WEB_CRAWL functionality on all models that support it
|
|
|
|
This script tests all models that have WEB_CRAWL capability, validates that
|
|
they can crawl specific URLs and return content, and analyzes the quality of results.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import sys
|
|
import os
|
|
import base64
|
|
from datetime import datetime
|
|
from typing import Dict, Any, List
|
|
|
|
# Add the gateway to path
|
|
sys.path.append(os.path.dirname(__file__))
|
|
|
|
# Import the service initialization
|
|
from modules.features.chatPlayground.mainChatPlayground import getServices
|
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
|
from modules.datamodels.datamodelUam import User
|
|
|
|
class AIModelsTester:
|
|
def __init__(self):
|
|
# Create a minimal user context for testing
|
|
testUser = User(
|
|
id="test_user",
|
|
username="test_user",
|
|
email="test@example.com",
|
|
fullName="Test User",
|
|
language="en",
|
|
mandateId="test_mandate"
|
|
)
|
|
|
|
# Initialize services using the existing system
|
|
self.services = getServices(testUser, None) # Test user, no workflow
|
|
self.testResults = []
|
|
|
|
# Create logs directory if it doesn't exist
|
|
self.logsDir = os.path.join(os.path.dirname(__file__), "..", "local", "logs")
|
|
os.makedirs(self.logsDir, exist_ok=True)
|
|
|
|
# Create modeltest subdirectory
|
|
self.modelTestDir = os.path.join(self.logsDir, "modeltest")
|
|
os.makedirs(self.modelTestDir, exist_ok=True)
|
|
|
|
# Copy test image to modeltest directory if it exists
|
|
testImageSource = os.path.join(self.logsDir, "_testdata_photo_2025-06-03_13-05-52.jpg")
|
|
testImageDest = os.path.join(self.modelTestDir, "_testdata_photo_2025-06-03_13-05-52.jpg")
|
|
if os.path.exists(testImageSource) and not os.path.exists(testImageDest):
|
|
import shutil
|
|
shutil.copy2(testImageSource, testImageDest)
|
|
print(f"📷 Test image copied to: {testImageDest}")
|
|
|
|
async def initialize(self):
|
|
"""Initialize the AI service."""
|
|
# Set logging level to DEBUG for detailed output
|
|
import logging
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
|
|
# Initialize the model registry with all connectors
|
|
from modules.aicore.aicoreModelRegistry import modelRegistry
|
|
from modules.aicore.aicorePluginTavily import AiTavily
|
|
from modules.aicore.aicorePluginPerplexity import AiPerplexity
|
|
|
|
# Register web connectors that support WEB_CRAWL
|
|
modelRegistry.registerConnector(AiTavily())
|
|
modelRegistry.registerConnector(AiPerplexity())
|
|
|
|
# The AI service needs to be recreated with proper initialization
|
|
from modules.services.serviceAi.mainServiceAi import AiService
|
|
self.services.ai = await AiService.create(self.services)
|
|
|
|
# Create a minimal workflow context
|
|
from modules.datamodels.datamodelChat import ChatWorkflow
|
|
import uuid
|
|
|
|
self.services.currentWorkflow = ChatWorkflow(
|
|
id=str(uuid.uuid4()),
|
|
name="Test Workflow",
|
|
status="running",
|
|
startedAt=self.services.utils.timestampGetUtc(),
|
|
lastActivity=self.services.utils.timestampGetUtc(),
|
|
currentRound=1,
|
|
currentTask=0,
|
|
currentAction=0,
|
|
totalTasks=0,
|
|
totalActions=0,
|
|
mandateId="test_mandate",
|
|
messageIds=[],
|
|
workflowMode="React",
|
|
maxSteps=5
|
|
)
|
|
|
|
print("✅ AI Service initialized successfully")
|
|
print(f"📁 Results will be saved to: {self.modelTestDir}")
|
|
|
|
async def testModel(self, modelName: str) -> Dict[str, Any]:
|
|
"""Test a specific AI model with WEB_CRAWL operation."""
|
|
print(f"\n{'='*60}")
|
|
print(f"TESTING MODEL: {modelName}")
|
|
print(f"OPERATION TYPE: WEB_CRAWL")
|
|
print(f"{'='*60}")
|
|
|
|
# CRAWL CONFIGURATION
|
|
# Deep and Broad Web Crawl Example:
|
|
# - maxDepth: 3 (deep) - follows links up to 3 levels from starting page
|
|
# - Level 1: Starting page
|
|
# - Level 2: Pages linked from starting page
|
|
# - Level 3: Pages linked from Level 2 pages
|
|
# - maxWidth: 50 (broad) - crawls up to 50 pages at each depth level
|
|
# This results in potential maximum of ~1,250 pages (if 50 links exist at each level)
|
|
#
|
|
# Common configurations:
|
|
# - Fast/Overview: maxDepth=1, maxWidth=5 (shallow, focused)
|
|
# - General/Standard: maxDepth=2, maxWidth=10 (balanced)
|
|
# - Deep and Broad: maxDepth=3, maxWidth=50 (comprehensive)
|
|
|
|
CRAWL_DEPTH = 3 # Deep crawl: follows links 3 levels deep
|
|
CRAWL_WIDTH = 50 # Broad crawl: up to 50 pages per level
|
|
|
|
print(f"Crawl Configuration:")
|
|
print(f" - Depth: {CRAWL_DEPTH} levels (deep)")
|
|
print(f" - Width: {CRAWL_WIDTH} pages per level (broad)")
|
|
print(f" - Theoretical max: {CRAWL_WIDTH ** min(CRAWL_DEPTH, 3)} pages")
|
|
|
|
# Use WEB_CRAWL specific prompt format
|
|
from modules.datamodels.datamodelAi import AiCallPromptWebCrawl
|
|
|
|
# Test with simple prompt like playground example
|
|
simplePrompt = f"https://www.valueon.ch: Who works in this company?"
|
|
|
|
# But keep structured format for now to match our API
|
|
testPrompt = json.dumps({
|
|
"instruction": "Who works in this company?",
|
|
"url": "https://www.valueon.ch",
|
|
"maxDepth": CRAWL_DEPTH,
|
|
"maxWidth": CRAWL_WIDTH
|
|
}, indent=2)
|
|
|
|
print(f"Simple prompt (playground style): {simplePrompt}")
|
|
|
|
# For Tavily models, test direct API call for better link following
|
|
if "tavily" in modelName.lower():
|
|
return await self._testTavilyDirect(modelName, CRAWL_DEPTH, CRAWL_WIDTH)
|
|
|
|
print(f"Test prompt: {testPrompt}")
|
|
print(f"Prompt length: {len(testPrompt)} characters")
|
|
|
|
startTime = asyncio.get_event_loop().time()
|
|
|
|
try:
|
|
# Create options for WEB_CRAWL operation
|
|
options = AiCallOptions(
|
|
operationType=OperationTypeEnum.WEB_CRAWL,
|
|
preferredModel=modelName
|
|
)
|
|
|
|
# Call the AI service DIRECTLY through the model's functionCall
|
|
# This tests the actual model, not the document generation pipeline
|
|
# Get the model directly from the registry using the model registry
|
|
from modules.aicore.aicoreModelRegistry import modelRegistry
|
|
model = modelRegistry.getModel(modelName)
|
|
|
|
if not model:
|
|
raise Exception(f"Model {modelName} not found")
|
|
|
|
# Create AiModelCall and call the model's functionCall directly
|
|
from modules.datamodels.datamodelAi import AiModelCall
|
|
import base64
|
|
import os
|
|
|
|
# For WEB_CRAWL models, use normal functionCall with structured prompt
|
|
messages = [{"role": "user", "content": testPrompt}]
|
|
modelCall = AiModelCall(
|
|
messages=messages,
|
|
model=model,
|
|
options=options
|
|
)
|
|
response = await model.functionCall(modelCall)
|
|
|
|
endTime = asyncio.get_event_loop().time()
|
|
processingTime = endTime - startTime
|
|
|
|
# Analyze response - now we get AiModelResponse objects
|
|
if hasattr(response, 'success'):
|
|
# AiModelResponse object
|
|
if response.success:
|
|
result = {
|
|
"modelName": modelName,
|
|
"status": "SUCCESS",
|
|
"processingTime": round(processingTime, 2),
|
|
"responseLength": len(response.content) if response.content else 0,
|
|
"responseType": "AiModelResponse",
|
|
"hasContent": bool(response.content),
|
|
"error": None,
|
|
"modelUsed": modelName,
|
|
"priceUsd": 0.0, # AiModelResponse doesn't have price info
|
|
"bytesSent": 0,
|
|
"bytesReceived": len(response.content.encode('utf-8')) if response.content else 0
|
|
}
|
|
|
|
# Extract actual prompt sent if available in metadata
|
|
if hasattr(response, 'metadata') and response.metadata:
|
|
result["actualPromptSent"] = response.metadata.get("actualPromptSent", "N/A")
|
|
|
|
# Try to parse content as JSON
|
|
if response.content:
|
|
try:
|
|
json.loads(response.content)
|
|
result["isValidJson"] = True
|
|
except:
|
|
result["isValidJson"] = False
|
|
|
|
result["responsePreview"] = response.content[:200] + "..." if len(response.content) > 200 else response.content
|
|
result["fullResponse"] = response.content
|
|
else:
|
|
result["isValidJson"] = False
|
|
result["responsePreview"] = "Empty response"
|
|
result["fullResponse"] = ""
|
|
|
|
print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
|
|
print(f"📄 Response length: {len(response.content) if response.content else 0} characters")
|
|
print(f"📄 Model used: {modelName}")
|
|
print(f"📄 Response preview: {result['responsePreview']}")
|
|
|
|
else:
|
|
error = response.error or "Unknown error"
|
|
result = {
|
|
"modelName": modelName,
|
|
"status": "ERROR",
|
|
"processingTime": round(processingTime, 2),
|
|
"responseLength": 0,
|
|
"responseType": "AiModelResponse",
|
|
"hasContent": False,
|
|
"error": error,
|
|
"fullResponse": str(response)
|
|
}
|
|
|
|
print(f"❌ ERROR - {error}")
|
|
|
|
elif isinstance(response, dict):
|
|
# Fallback for dict responses
|
|
if response.get("success", True):
|
|
result = {
|
|
"modelName": modelName,
|
|
"status": "SUCCESS",
|
|
"processingTime": round(processingTime, 2),
|
|
"responseLength": len(str(response)),
|
|
"responseType": "dict",
|
|
"hasContent": True,
|
|
"error": None
|
|
}
|
|
|
|
# Try to parse as JSON
|
|
try:
|
|
jsonResponse = json.dumps(response, indent=2)
|
|
result["responsePreview"] = jsonResponse[:200] + "..." if len(jsonResponse) > 200 else jsonResponse
|
|
result["isValidJson"] = True
|
|
result["fullResponse"] = jsonResponse
|
|
except:
|
|
result["responsePreview"] = str(response)[:200] + "..." if len(str(response)) > 200 else str(response)
|
|
result["isValidJson"] = False
|
|
result["fullResponse"] = str(response)
|
|
|
|
print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
|
|
print(f"📄 Response length: {len(str(response))} characters")
|
|
print(f"📄 Response preview: {result['responsePreview']}")
|
|
|
|
else:
|
|
error = response.get("error", "Unknown error")
|
|
result = {
|
|
"modelName": modelName,
|
|
"status": "ERROR",
|
|
"processingTime": round(processingTime, 2),
|
|
"responseLength": 0,
|
|
"responseType": "error",
|
|
"hasContent": False,
|
|
"error": error,
|
|
"fullResponse": str(response)
|
|
}
|
|
|
|
print(f"❌ ERROR - {error}")
|
|
|
|
else:
|
|
# String response
|
|
result = {
|
|
"modelName": modelName,
|
|
"status": "SUCCESS",
|
|
"processingTime": round(processingTime, 2),
|
|
"responseLength": len(str(response)),
|
|
"responseType": "string",
|
|
"hasContent": True,
|
|
"error": None
|
|
}
|
|
|
|
# Try to parse as JSON
|
|
try:
|
|
json.loads(str(response))
|
|
result["isValidJson"] = True
|
|
except:
|
|
result["isValidJson"] = False
|
|
|
|
result["responsePreview"] = str(response)[:200] + "..." if len(str(response)) > 200 else str(response)
|
|
result["fullResponse"] = str(response)
|
|
|
|
print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
|
|
print(f"📄 Response length: {len(str(response))} characters")
|
|
print(f"📄 Response preview: {result['responsePreview']}")
|
|
|
|
# Add prompt to result for logging
|
|
result["testPrompt"] = testPrompt
|
|
result["crawlConfig"] = {
|
|
"depth": CRAWL_DEPTH,
|
|
"width": CRAWL_WIDTH
|
|
}
|
|
|
|
# For WEB_CRAWL, also validate that content was extracted
|
|
if result.get("status") == "SUCCESS" and result.get("fullResponse"):
|
|
self._validateCrawlResponse(modelName, result)
|
|
|
|
except Exception as e:
|
|
endTime = asyncio.get_event_loop().time()
|
|
processingTime = endTime - startTime
|
|
|
|
result = {
|
|
"modelName": modelName,
|
|
"status": "EXCEPTION",
|
|
"processingTime": round(processingTime, 2),
|
|
"responseLength": 0,
|
|
"responseType": "exception",
|
|
"hasContent": False,
|
|
"error": str(e),
|
|
"testPrompt": testPrompt,
|
|
"crawlConfig": {
|
|
"depth": CRAWL_DEPTH,
|
|
"width": CRAWL_WIDTH
|
|
}
|
|
}
|
|
|
|
print(f"💥 EXCEPTION - {str(e)}")
|
|
|
|
self.testResults.append(result)
|
|
|
|
# Save text response even for exceptions to log the prompt
|
|
if result.get("status") in ["SUCCESS", "EXCEPTION", "ERROR"]:
|
|
self._saveTextResponse(modelName, result)
|
|
|
|
# Save individual model result immediately
|
|
self._saveIndividualModelResult(modelName, result)
|
|
|
|
return result
|
|
|
|
def _saveImageResponse(self, modelName: str, result: Dict[str, Any]):
|
|
"""Save base64 image response to file."""
|
|
try:
|
|
fullResponse = result.get("fullResponse", "")
|
|
base64Data = None
|
|
|
|
# Try to extract base64 data from response
|
|
if isinstance(fullResponse, dict):
|
|
# Look for base64 data in the response
|
|
if "content" in fullResponse:
|
|
base64Data = fullResponse["content"]
|
|
elif "data" in fullResponse:
|
|
base64Data = fullResponse["data"]
|
|
elif "image" in fullResponse:
|
|
base64Data = fullResponse["image"]
|
|
else:
|
|
# Try to find base64 data in string response
|
|
import re
|
|
base64Match = re.search(r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)', str(fullResponse))
|
|
if base64Match:
|
|
base64Data = base64Match.group(1)
|
|
else:
|
|
# Try to find pure base64 string
|
|
base64Match = re.search(r'([A-Za-z0-9+/=]{100,})', str(fullResponse))
|
|
if base64Match:
|
|
base64Data = base64Match.group(1)
|
|
|
|
if base64Data:
|
|
# Clean base64 data
|
|
if base64Data.startswith('data:image/'):
|
|
base64Data = base64Data.split(',', 1)[1]
|
|
|
|
# Decode and save image
|
|
imageData = base64.b64decode(base64Data)
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
filename = f"{modelName}_{timestamp}.png"
|
|
filepath = os.path.join(self.modelTestDir, filename)
|
|
|
|
with open(filepath, 'wb') as f:
|
|
f.write(imageData)
|
|
|
|
result["savedImage"] = filepath
|
|
print(f"🖼️ Image saved: {filepath}")
|
|
else:
|
|
print(f"⚠️ No base64 image data found in response")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error saving image: {str(e)}")
|
|
result["imageSaveError"] = str(e)
|
|
|
|
def _saveTextResponse(self, modelName: str, result: Dict[str, Any]):
|
|
"""Save text response to file."""
|
|
try:
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
filename = f"{modelName}_{timestamp}.txt"
|
|
filepath = os.path.join(self.modelTestDir, filename)
|
|
|
|
# Prepare content for saving
|
|
content = result.get("fullResponse", "")
|
|
if not content:
|
|
content = result.get("responsePreview", "No content available")
|
|
|
|
# If there's an error, include it in the content
|
|
if result.get("error"):
|
|
content = f"ERROR: {result.get('error')}\n\n{content}"
|
|
|
|
# Get prompt and config for logging
|
|
config = result.get("crawlConfig", {})
|
|
crawlDepth = config.get("depth", "N/A")
|
|
crawlWidth = config.get("width", "N/A")
|
|
|
|
# Get both the original JSON prompt and the actual prompt sent
|
|
originalPrompt = result.get("testPrompt", "N/A")
|
|
actualPromptSent = result.get("actualPromptSent", "N/A")
|
|
|
|
# Add metadata header
|
|
metadata = f"""Model: {modelName}
|
|
Test Time: {timestamp}
|
|
Status: {result.get('status', 'Unknown')}
|
|
Processing Time: {result.get('processingTime', 0):.2f}s
|
|
Response Length: {result.get('responseLength', 0)} characters
|
|
Is Valid JSON: {result.get('isValidJson', False)}
|
|
Test Method: {result.get('testMethod', 'standard')}
|
|
Pages Crawled: {result.get('pagesCrawled', 'N/A')}
|
|
Crawled URL: {result.get('crawledUrl', 'N/A')}
|
|
Has URL: {result.get('hasUrl', 'N/A')}
|
|
Has Title: {result.get('hasTitle', 'N/A')}
|
|
Has Content: {result.get('hasContent', 'N/A')}
|
|
Content Length: {result.get('contentLength', 'N/A')} characters
|
|
|
|
--- CRAWL CONFIGURATION ---
|
|
Depth: {crawlDepth}
|
|
Width: {crawlWidth}
|
|
|
|
--- ORIGINAL JSON PROMPT (input) ---
|
|
{originalPrompt}
|
|
|
|
--- ACTUAL PROMPT SENT TO API (EXACT) ---
|
|
{actualPromptSent}
|
|
|
|
--- RESPONSE CONTENT ---
|
|
{content}
|
|
"""
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
f.write(metadata)
|
|
|
|
result["savedTextFile"] = filepath
|
|
print(f"📄 Text response saved: {filepath}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error saving text response: {str(e)}")
|
|
result["textSaveError"] = str(e)
|
|
|
|
def _validateCrawlResponse(self, modelName: str, result: Dict[str, Any]):
|
|
"""Validate that the WEB_CRAWL response contains crawled content."""
|
|
try:
|
|
content = result.get("fullResponse", "")
|
|
|
|
# Try to parse as JSON
|
|
crawledData = {}
|
|
try:
|
|
parsed = json.loads(content)
|
|
if isinstance(parsed, dict):
|
|
crawledData = parsed
|
|
except:
|
|
pass
|
|
|
|
# Check for expected fields: url, title, content
|
|
hasUrl = bool(crawledData.get("url"))
|
|
hasTitle = bool(crawledData.get("title"))
|
|
hasContent = bool(crawledData.get("content"))
|
|
contentLength = len(crawledData.get("content", ""))
|
|
|
|
result["hasUrl"] = hasUrl
|
|
result["hasTitle"] = hasTitle
|
|
result["hasContent"] = hasContent
|
|
result["contentLength"] = contentLength
|
|
result["crawledUrl"] = crawledData.get("url", "")
|
|
|
|
if hasUrl and hasContent:
|
|
print(f"✅ Successfully crawled content from URL: {crawledData.get('url', 'unknown')}")
|
|
print(f" Content length: {contentLength} characters")
|
|
print(f" Title: {crawledData.get('title', 'N/A')}")
|
|
else:
|
|
print(f"⚠️ Incomplete crawl response - URL: {hasUrl}, Content: {hasContent}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error validating crawl response: {str(e)}")
|
|
result["crawlValidationError"] = str(e)
|
|
|
|
async def _testTavilyDirect(self, modelName: str, crawlDepth: int = 3, crawlWidth: int = 50) -> Dict[str, Any]:
|
|
"""Test Tavily API directly using the crawl() method with better link following."""
|
|
print(f"\n{'='*60}")
|
|
print(f"TESTING TAVILY DIRECT API (crawl method)")
|
|
print(f"{'='*60}")
|
|
|
|
startTime = asyncio.get_event_loop().time()
|
|
|
|
try:
|
|
from tavily import AsyncTavilyClient
|
|
from modules.shared.configuration import APP_CONFIG
|
|
|
|
apiKey = APP_CONFIG.get("Connector_AiTavily_API_SECRET")
|
|
if not apiKey:
|
|
raise Exception("Tavily API key not found")
|
|
|
|
client = AsyncTavilyClient(api_key=apiKey)
|
|
|
|
# Map our configuration to Tavily parameters
|
|
# maxWidth -> limit (pages per level)
|
|
# maxDepth -> max_depth (link following depth)
|
|
# max_breadth = maxWidth (breadth of crawl at each level)
|
|
tavilyLimit = crawlWidth
|
|
tavilyMaxDepth = crawlDepth
|
|
tavilyMaxBreadth = crawlWidth
|
|
|
|
print(f"Calling Tavily API with crawl() method...")
|
|
print(f"URL: https://www.valueon.ch")
|
|
print(f"Instructions: Who works in this company?")
|
|
print(f"Limit: {tavilyLimit} pages per level")
|
|
print(f"Max depth: {tavilyMaxDepth} (follows links {tavilyMaxDepth} levels deep)")
|
|
print(f"Max breadth: {tavilyMaxBreadth} (up to {tavilyMaxBreadth} pages at each level)")
|
|
print(f"Deep and Broad Crawl Configuration Active")
|
|
|
|
response = await client.crawl(
|
|
url="https://www.valueon.ch",
|
|
instructions="Who works in this company?",
|
|
limit=tavilyLimit,
|
|
max_depth=tavilyMaxDepth,
|
|
max_breadth=tavilyMaxBreadth
|
|
)
|
|
|
|
endTime = asyncio.get_event_loop().time()
|
|
processingTime = endTime - startTime
|
|
|
|
# Analyze response
|
|
contentLength = 0
|
|
pagesCrawled = 0
|
|
fullContent = ""
|
|
|
|
if isinstance(response, dict):
|
|
# Check if it has results
|
|
if "results" in response:
|
|
results = response["results"]
|
|
pagesCrawled = len(results)
|
|
content_parts = []
|
|
for result in results:
|
|
url = result.get("url", "")
|
|
title = result.get("title", "")
|
|
content = result.get("raw_content", result.get("content", ""))
|
|
content_parts.append(f"URL: {url}\nTitle: {title}\nContent: {content}\n{'='*60}\n")
|
|
contentLength += len(content)
|
|
|
|
fullContent = "\n".join(content_parts)
|
|
else:
|
|
fullContent = json.dumps(response, indent=2)
|
|
contentLength = len(fullContent)
|
|
elif isinstance(response, list):
|
|
pagesCrawled = len(response)
|
|
content_parts = []
|
|
for item in response:
|
|
if isinstance(item, dict):
|
|
url = item.get("url", "")
|
|
title = item.get("title", "")
|
|
content = item.get("raw_content", item.get("content", ""))
|
|
content_parts.append(f"URL: {url}\nTitle: {title}\nContent: {content}\n{'='*60}\n")
|
|
contentLength += len(content)
|
|
|
|
fullContent = "\n".join(content_parts)
|
|
else:
|
|
fullContent = str(response)
|
|
contentLength = len(fullContent)
|
|
|
|
result = {
|
|
"modelName": modelName,
|
|
"status": "SUCCESS",
|
|
"processingTime": round(processingTime, 2),
|
|
"responseLength": contentLength,
|
|
"responseType": "TavilyDirectAPI",
|
|
"hasContent": True,
|
|
"error": None,
|
|
"modelUsed": modelName,
|
|
"priceUsd": 0.0,
|
|
"bytesSent": 0,
|
|
"bytesReceived": contentLength,
|
|
"isValidJson": True,
|
|
"fullResponse": fullContent,
|
|
"pagesCrawled": pagesCrawled,
|
|
"testMethod": "direct_api_crawl"
|
|
}
|
|
|
|
print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
|
|
print(f"📄 Pages crawled: {pagesCrawled}")
|
|
print(f"📄 Total content length: {contentLength} characters")
|
|
|
|
# Save the response
|
|
self._saveTextResponse(modelName, result)
|
|
self._validateCrawlResponse(modelName, result)
|
|
self._saveIndividualModelResult(modelName, result)
|
|
|
|
self.testResults.append(result)
|
|
return result
|
|
|
|
except Exception as e:
|
|
endTime = asyncio.get_event_loop().time()
|
|
processingTime = endTime - startTime
|
|
|
|
result = {
|
|
"modelName": modelName,
|
|
"status": "EXCEPTION",
|
|
"processingTime": round(processingTime, 2),
|
|
"responseLength": 0,
|
|
"responseType": "exception",
|
|
"hasContent": False,
|
|
"error": str(e)
|
|
}
|
|
|
|
print(f"💥 EXCEPTION - {str(e)}")
|
|
self.testResults.append(result)
|
|
return result
|
|
|
|
def _saveIndividualModelResult(self, modelName: str, result: Dict[str, Any]):
|
|
"""Save individual model test result to file."""
|
|
try:
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
filename = f"{modelName}_{timestamp}.json"
|
|
filepath = os.path.join(self.modelTestDir, filename)
|
|
|
|
# Prepare individual result data
|
|
individualData = {
|
|
"modelName": modelName,
|
|
"testTimestamp": timestamp,
|
|
"testDate": datetime.now().isoformat(),
|
|
"result": result
|
|
}
|
|
|
|
# Save to JSON file
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
json.dump(individualData, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"📄 Individual result saved: {filename}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error saving individual result: {str(e)}")
|
|
|
|
def getAllAvailableModels(self) -> List[str]:
|
|
"""Get all available model names that support WEB_CRAWL."""
|
|
from modules.aicore.aicoreModelRegistry import modelRegistry
|
|
from modules.datamodels.datamodelAi import OperationTypeEnum
|
|
|
|
# Get all models from registry
|
|
allModels = modelRegistry.getAvailableModels()
|
|
|
|
# Filter models that support WEB_CRAWL
|
|
webCrawlModels = []
|
|
for model in allModels:
|
|
if model.operationTypes and any(
|
|
ot.operationType == OperationTypeEnum.WEB_CRAWL
|
|
for ot in model.operationTypes
|
|
): # Include both Tavily and Perplexity models
|
|
webCrawlModels.append(model.name)
|
|
|
|
# Filter to only "sonar" model for testing
|
|
webCrawlModels = [m for m in webCrawlModels if m == "sonar"]
|
|
|
|
print(f"Found {len(webCrawlModels)} models that support WEB_CRAWL (filtered to sonar):")
|
|
for modelName in webCrawlModels:
|
|
print(f" - {modelName}")
|
|
|
|
return webCrawlModels
|
|
|
|
def saveTestResults(self):
|
|
"""Save detailed test results to file."""
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
resultsFile = os.path.join(self.modelTestDir, f"modeltest_results_{timestamp}.json")
|
|
|
|
# Prepare results for saving
|
|
saveData = {
|
|
"testTimestamp": timestamp,
|
|
"testDate": datetime.now().isoformat(),
|
|
"totalModels": len(self.testResults),
|
|
"successfulModels": len([r for r in self.testResults if r["status"] == "SUCCESS"]),
|
|
"errorModels": len([r for r in self.testResults if r["status"] == "ERROR"]),
|
|
"exceptionModels": len([r for r in self.testResults if r["status"] == "EXCEPTION"]),
|
|
"results": self.testResults
|
|
}
|
|
|
|
# Calculate success rate
|
|
if saveData["totalModels"] > 0:
|
|
saveData["successRate"] = (saveData["successfulModels"] / saveData["totalModels"]) * 100
|
|
else:
|
|
saveData["successRate"] = 0
|
|
|
|
# Save to JSON file
|
|
with open(resultsFile, 'w', encoding='utf-8') as f:
|
|
json.dump(saveData, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"📄 Detailed results saved: {resultsFile}")
|
|
return resultsFile
|
|
|
|
def printTestSummary(self):
|
|
"""Print a summary of all test results."""
|
|
print(f"\n{'='*80}")
|
|
print("AI MODELS TEST SUMMARY")
|
|
print(f"{'='*80}")
|
|
|
|
totalModels = len(self.testResults)
|
|
successfulModels = len([r for r in self.testResults if r["status"] == "SUCCESS"])
|
|
errorModels = len([r for r in self.testResults if r["status"] == "ERROR"])
|
|
exceptionModels = len([r for r in self.testResults if r["status"] == "EXCEPTION"])
|
|
|
|
print(f"📊 Total models tested: {totalModels}")
|
|
print(f"✅ Successful: {successfulModels}")
|
|
print(f"❌ Errors: {errorModels}")
|
|
print(f"💥 Exceptions: {exceptionModels}")
|
|
print(f"📈 Success rate: {(successfulModels/totalModels*100):.1f}%" if totalModels > 0 else "0%")
|
|
|
|
print(f"\n{'='*80}")
|
|
print("DETAILED RESULTS")
|
|
print(f"{'='*80}")
|
|
|
|
for result in self.testResults:
|
|
status_icon = {
|
|
"SUCCESS": "✅",
|
|
"ERROR": "❌",
|
|
"EXCEPTION": "💥"
|
|
}.get(result["status"], "❓")
|
|
|
|
print(f"\n{status_icon} {result['modelName']}")
|
|
print(f" Status: {result['status']}")
|
|
print(f" Processing time: {result['processingTime']}s")
|
|
print(f" Response length: {result['responseLength']} characters")
|
|
print(f" Response type: {result['responseType']}")
|
|
|
|
if result.get("isValidJson") is not None:
|
|
print(f" Valid JSON: {'Yes' if result['isValidJson'] else 'No'}")
|
|
|
|
if result.get("crawledUrl"):
|
|
print(f" Crawled URL: {result['crawledUrl']}")
|
|
|
|
if result.get("contentLength") is not None:
|
|
print(f" Content length: {result['contentLength']} characters")
|
|
|
|
if result.get("pagesCrawled") is not None:
|
|
print(f" Pages crawled: {result['pagesCrawled']}")
|
|
|
|
if result["error"]:
|
|
print(f" Error: {result['error']}")
|
|
|
|
if result.get("responsePreview"):
|
|
print(f" Preview: {result['responsePreview']}")
|
|
|
|
# Find fastest and slowest models
|
|
if successfulModels > 0:
|
|
successfulResults = [r for r in self.testResults if r["status"] == "SUCCESS"]
|
|
fastest = min(successfulResults, key=lambda x: x["processingTime"])
|
|
slowest = max(successfulResults, key=lambda x: x["processingTime"])
|
|
|
|
print(f"\n{'='*80}")
|
|
print("PERFORMANCE HIGHLIGHTS")
|
|
print(f"{'='*80}")
|
|
print(f"🚀 Fastest model: {fastest['modelName']} ({fastest['processingTime']}s)")
|
|
print(f"🐌 Slowest model: {slowest['modelName']} ({slowest['processingTime']}s)")
|
|
|
|
# Find models with most content
|
|
modelsWithContent = [r for r in successfulResults if r.get("contentLength", 0) > 0]
|
|
if modelsWithContent:
|
|
mostContent = max(modelsWithContent, key=lambda x: x.get("contentLength", 0))
|
|
totalContent = sum(r.get("contentLength", 0) for r in modelsWithContent)
|
|
avgContent = totalContent / len(modelsWithContent)
|
|
print(f"📄 Model with most content: {mostContent['modelName']} ({mostContent.get('contentLength', 0)} chars)")
|
|
print(f"📊 Average content per model: {avgContent:.0f} characters")
|
|
print(f"📊 Total content crawled across all models: {totalContent} characters")
|
|
|
|
# Find models with most pages crawled (for Tavily direct API)
|
|
modelsWithPages = [r for r in successfulResults if r.get("pagesCrawled", 0) > 0]
|
|
if modelsWithPages:
|
|
mostPages = max(modelsWithPages, key=lambda x: x.get("pagesCrawled", 0))
|
|
totalPages = sum(r.get("pagesCrawled", 0) for r in modelsWithPages)
|
|
avgPages = totalPages / len(modelsWithPages)
|
|
print(f"🔍 Model with most pages crawled: {mostPages['modelName']} ({mostPages.get('pagesCrawled', 0)} pages)")
|
|
print(f"📊 Average pages per model: {avgPages:.1f} pages")
|
|
print(f"📊 Total pages crawled across all models: {totalPages} pages")
|
|
|
|
async def main():
|
|
"""Run AI models testing for WEB_CRAWL operation."""
|
|
tester = AIModelsTester()
|
|
|
|
print("Starting AI Models Testing for WEB_CRAWL...")
|
|
print("Initializing AI service...")
|
|
await tester.initialize()
|
|
|
|
# Get all available models
|
|
models = tester.getAllAvailableModels()
|
|
|
|
print(f"\nFound {len(models)} models to test:")
|
|
for i, model in enumerate(models, 1):
|
|
print(f" {i}. {model}")
|
|
|
|
print(f"\n{'='*80}")
|
|
print("STARTING WEB_CRAWL TESTS")
|
|
print(f"{'='*80}")
|
|
print("Testing each model's ability to crawl URLs and return content...")
|
|
print("Press Enter after each model test to continue to the next one...")
|
|
|
|
# Test each model individually
|
|
for i, modelName in enumerate(models, 1):
|
|
print(f"\n[{i}/{len(models)}] Testing model: {modelName}")
|
|
|
|
# Test the model
|
|
await tester.testModel(modelName)
|
|
|
|
# Pause for user input (except for the last model)
|
|
if i < len(models):
|
|
input(f"\nPress Enter to continue to the next model...")
|
|
|
|
# Save detailed results to file
|
|
resultsFile = tester.saveTestResults()
|
|
|
|
# Print final summary
|
|
tester.printTestSummary()
|
|
|
|
print(f"\n{'='*80}")
|
|
print("TESTING COMPLETED")
|
|
print(f"{'='*80}")
|
|
print(f"📄 Results saved to: {resultsFile}")
|
|
print(f"📁 Images saved to: {tester.modelTestDir}")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|