serviceCenter = DI-Container (Resolver, Registry, Context) fuer Service-Instanziierung serviceHub = Consumer-facing Aggregation (DB-Interfaces, Runtime-State, lazy Service-Resolution via serviceCenter) - modules/serviceHub/ erstellt: ServiceHub, PublicService, getInterface() - 22 Consumer-Dateien migriert (routes, features, tests): imports von modules.services auf serviceHub bzw. serviceCenter umgestellt - resolver.py: legacy fallback auf altes services/ entfernt - modules/services/ komplett geloescht (83 Dateien inkl. dead code mainAiChat.py) - pre-extraction: progress callback durch chunk-pipeline propagiert, operationType DATA_EXTRACT->DATA_ANALYSE fuer guenstigeres Modell
905 lines
39 KiB
Python
905 lines
39 KiB
Python
#!/usr/bin/env python3
|
||
# Copyright (c) 2025 Patrick Motsch
|
||
# All rights reserved.
|
||
"""
|
||
AI Models Test - Tests ALL operation types on ALL models that support them
|
||
|
||
This script tests all available models with all their supported operation types:
|
||
- PLAN: Planning operations
|
||
- DATA_ANALYSE: Data analysis
|
||
- DATA_GENERATE: Data generation
|
||
- DATA_EXTRACT: Data extraction
|
||
- IMAGE_ANALYSE: Image analysis
|
||
- IMAGE_GENERATE: Image generation
|
||
- WEB_SEARCH_DATA: Web search
|
||
- WEB_CRAWL: Web crawling
|
||
|
||
For each model, it tests every operation type the model supports and validates
|
||
the results. Results are saved to files for analysis.
|
||
"""
|
||
|
||
import asyncio
|
||
import json
|
||
import sys
|
||
import os
|
||
import base64
|
||
from datetime import datetime
|
||
from typing import Dict, Any, List
|
||
|
||
# Add the gateway to path (go up 2 levels from tests/functional/)
|
||
_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
||
if _gateway_path not in sys.path:
|
||
sys.path.insert(0, _gateway_path)
|
||
|
||
# Import the service initialization
|
||
from modules.serviceHub import getInterface as getServices
|
||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||
from modules.datamodels.datamodelUam import User
|
||
|
||
class AIModelsTester:
|
||
def __init__(self):
|
||
# Create a minimal user context for testing
|
||
testUser = User(
|
||
id="test_user",
|
||
username="test_user",
|
||
email="test@example.com",
|
||
fullName="Test User",
|
||
language="en",
|
||
mandateId="test_mandate"
|
||
)
|
||
|
||
# Initialize services using the existing system
|
||
self.services = getServices(testUser, None) # Test user, no workflow
|
||
self.testResults = []
|
||
|
||
# Create logs directory if it doesn't exist (go up 2 levels from tests/unit/services/)
|
||
_gateway_dir = os.path.dirname(_gateway_path)
|
||
self.logsDir = os.path.join(_gateway_dir, "local", "logs")
|
||
os.makedirs(self.logsDir, exist_ok=True)
|
||
|
||
# Create modeltest subdirectory
|
||
self.modelTestDir = os.path.join(self.logsDir, "modeltest")
|
||
os.makedirs(self.modelTestDir, exist_ok=True)
|
||
|
||
async def initialize(self):
|
||
"""Initialize the AI service."""
|
||
# Set logging level to DEBUG for detailed output
|
||
import logging
|
||
logging.getLogger().setLevel(logging.DEBUG)
|
||
|
||
# Initialize the model registry with all connectors
|
||
from modules.aicore.aicoreModelRegistry import modelRegistry
|
||
from modules.aicore.aicorePluginTavily import AiTavily
|
||
from modules.aicore.aicorePluginPerplexity import AiPerplexity
|
||
|
||
# Note: We don't need to register web connectors for IMAGE_ANALYSE testing
|
||
# modelRegistry.registerConnector(AiTavily())
|
||
# modelRegistry.registerConnector(AiPerplexity())
|
||
|
||
# The AI service needs to be recreated with proper initialization
|
||
from modules.serviceCenter.services.serviceAi.mainServiceAi import AiService
|
||
self.services.ai = await AiService.create(self.services)
|
||
|
||
# Also initialize extraction service for image processing
|
||
from modules.serviceCenter.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||
self.services.extraction = ExtractionService(self.services)
|
||
|
||
# Create a minimal workflow context
|
||
from modules.datamodels.datamodelChat import ChatWorkflow, WorkflowModeEnum
|
||
import uuid
|
||
|
||
self.services.currentWorkflow = ChatWorkflow(
|
||
id=str(uuid.uuid4()),
|
||
name="Test Workflow",
|
||
status="running",
|
||
startedAt=self.services.utils.timestampGetUtc(),
|
||
lastActivity=self.services.utils.timestampGetUtc(),
|
||
currentRound=1,
|
||
currentTask=0,
|
||
currentAction=0,
|
||
totalTasks=0,
|
||
totalActions=0,
|
||
mandateId="test_mandate",
|
||
messageIds=[],
|
||
workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC,
|
||
maxSteps=5
|
||
)
|
||
|
||
print("✅ AI Service initialized successfully")
|
||
print(f"📁 Results will be saved to: {self.modelTestDir}")
|
||
|
||
def _getTestPromptForOperation(self, operationType) -> str:
|
||
"""Get appropriate test prompt for each operation type."""
|
||
from modules.datamodels.datamodelAi import OperationTypeEnum
|
||
|
||
prompts = {
|
||
OperationTypeEnum.PLAN: "Create a project plan for developing a mobile app with 5 main tasks.",
|
||
OperationTypeEnum.DATA_ANALYSE: "Analyze the pros and cons of cloud computing.",
|
||
OperationTypeEnum.DATA_GENERATE: "Generate a list of 10 creative marketing ideas for a tech startup.",
|
||
OperationTypeEnum.DATA_EXTRACT: "Extract key information from this text about artificial intelligence trends.",
|
||
OperationTypeEnum.IMAGE_ANALYSE: "Describe what you see in this image.",
|
||
OperationTypeEnum.IMAGE_GENERATE: "A futuristic cityscape with flying cars and neon lights.",
|
||
OperationTypeEnum.WEB_SEARCH_DATA: "Who works in valueon ag in switzerland?", # Search query for valueon.ch
|
||
OperationTypeEnum.WEB_CRAWL: "https://www.valueon.ch" # URL to crawl
|
||
}
|
||
return prompts.get(operationType, "Test prompt for this operation type.")
|
||
|
||
def _createTestImage(self) -> str:
|
||
"""Load test image file and convert to base64 data URL."""
|
||
import base64
|
||
|
||
# Path to test image (relative to gateway directory)
|
||
testImagePath = os.path.join(
|
||
os.path.dirname(__file__), # tests/functional/
|
||
"..", # tests/
|
||
"testdata", # tests/testdata/
|
||
"Foto20250906_125903.jpg"
|
||
)
|
||
|
||
# Resolve absolute path
|
||
testImagePath = os.path.abspath(testImagePath)
|
||
|
||
if not os.path.exists(testImagePath):
|
||
raise FileNotFoundError(f"Test image not found at: {testImagePath}")
|
||
|
||
# Read image file and convert to base64
|
||
with open(testImagePath, 'rb') as f:
|
||
imageBytes = f.read()
|
||
|
||
imageBase64 = base64.b64encode(imageBytes).decode('utf-8')
|
||
return f"data:image/jpeg;base64,{imageBase64}"
|
||
|
||
async def testModelOperation(self, modelName: str, operationType, model) -> Dict[str, Any]:
|
||
"""Test a specific AI model with a specific operation type."""
|
||
print(f"\n Testing operation: {operationType.name}")
|
||
|
||
testPrompt = self._getTestPromptForOperation(operationType)
|
||
|
||
startTime = asyncio.get_event_loop().time()
|
||
|
||
try:
|
||
# Create messages - format differs for IMAGE_ANALYSE
|
||
from modules.datamodels.datamodelAi import OperationTypeEnum
|
||
|
||
if operationType == OperationTypeEnum.IMAGE_ANALYSE:
|
||
# For image analysis, content must be a list with text and image
|
||
testImage = self._createTestImage()
|
||
messages = [{
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "text", "text": testPrompt},
|
||
{"type": "image_url", "image_url": {"url": testImage}}
|
||
]
|
||
}]
|
||
else:
|
||
# For other operations, simple text content
|
||
messages = [{"role": "user", "content": testPrompt}]
|
||
|
||
# Create model call options
|
||
from modules.datamodels.datamodelAi import (
|
||
AiModelCall, AiCallOptions, AiCallPromptImage,
|
||
AiCallPromptWebSearch, AiCallPromptWebCrawl
|
||
)
|
||
import json
|
||
|
||
options = AiCallOptions(operationType=operationType)
|
||
|
||
# Format message content based on operation type
|
||
if operationType == OperationTypeEnum.IMAGE_GENERATE:
|
||
# Create structured prompt with image generation parameters
|
||
imagePrompt = AiCallPromptImage(
|
||
prompt=testPrompt,
|
||
size="1024x1024",
|
||
quality="standard",
|
||
style="vivid"
|
||
)
|
||
# Update message content to JSON format
|
||
messages[0]["content"] = json.dumps(imagePrompt.model_dump())
|
||
elif operationType == OperationTypeEnum.WEB_SEARCH_DATA:
|
||
# Create structured prompt for web search
|
||
webSearchPrompt = AiCallPromptWebSearch(
|
||
instruction=testPrompt,
|
||
maxNumberPages=5 # Limit for testing
|
||
)
|
||
# Update message content to JSON format
|
||
messages[0]["content"] = json.dumps(webSearchPrompt.model_dump())
|
||
elif operationType == OperationTypeEnum.WEB_CRAWL:
|
||
# Create structured prompt for web crawl
|
||
webCrawlPrompt = AiCallPromptWebCrawl(
|
||
instruction="Extract the main content from this page",
|
||
url=testPrompt, # testPrompt contains the URL
|
||
maxDepth=1, # Limit for testing
|
||
maxWidth=3 # Limit for testing
|
||
)
|
||
# Update message content to JSON format
|
||
messages[0]["content"] = json.dumps(webCrawlPrompt.model_dump())
|
||
|
||
modelCall = AiModelCall(
|
||
messages=messages,
|
||
model=model,
|
||
options=options
|
||
)
|
||
|
||
# Call model directly
|
||
modelResponse = await model.functionCall(modelCall)
|
||
|
||
if not modelResponse.success:
|
||
raise Exception(f"Model call failed: {modelResponse.error}")
|
||
|
||
result = modelResponse.content
|
||
|
||
endTime = asyncio.get_event_loop().time()
|
||
processingTime = endTime - startTime
|
||
|
||
# Analyze result based on operation type
|
||
analysisResult = {
|
||
"modelName": modelName,
|
||
"operationType": operationType.name,
|
||
"status": "SUCCESS",
|
||
"processingTime": round(processingTime, 2),
|
||
"responseLength": len(str(result)) if result else 0,
|
||
"hasContent": bool(result),
|
||
"error": None,
|
||
"testPrompt": testPrompt,
|
||
"fullResponse": str(result) if result else ""
|
||
}
|
||
|
||
# Operation-specific analysis
|
||
if operationType == OperationTypeEnum.IMAGE_GENERATE:
|
||
analysisResult["responseType"] = "base64_image"
|
||
import base64
|
||
try:
|
||
if isinstance(result, str) and result.startswith("data:image"):
|
||
base64Data = result.split(",")[1] if "," in result else result
|
||
else:
|
||
base64Data = result if isinstance(result, str) else ""
|
||
if base64Data:
|
||
imageBytes = base64.b64decode(base64Data)
|
||
analysisResult["isValidBase64"] = True
|
||
analysisResult["imageByteSize"] = len(imageBytes)
|
||
else:
|
||
analysisResult["isValidBase64"] = False
|
||
analysisResult["imageByteSize"] = 0
|
||
except:
|
||
analysisResult["isValidBase64"] = False
|
||
analysisResult["imageByteSize"] = 0
|
||
elif operationType in [OperationTypeEnum.DATA_ANALYSE, OperationTypeEnum.DATA_GENERATE, OperationTypeEnum.PLAN]:
|
||
analysisResult["responseType"] = "text"
|
||
try:
|
||
import json
|
||
json.loads(str(result))
|
||
analysisResult["isValidJson"] = True
|
||
except:
|
||
analysisResult["isValidJson"] = False
|
||
else:
|
||
analysisResult["responseType"] = "text"
|
||
|
||
analysisResult["responsePreview"] = str(result)[:200] + "..." if len(str(result)) > 200 else str(result)
|
||
|
||
print(f" ✅ SUCCESS - Processing time: {processingTime:.2f}s, Response length: {analysisResult['responseLength']} chars")
|
||
|
||
return analysisResult
|
||
|
||
except Exception as e:
|
||
endTime = asyncio.get_event_loop().time()
|
||
processingTime = endTime - startTime
|
||
|
||
result = {
|
||
"modelName": modelName,
|
||
"operationType": operationType.name,
|
||
"status": "EXCEPTION",
|
||
"processingTime": round(processingTime, 2),
|
||
"responseLength": 0,
|
||
"responseType": "exception",
|
||
"hasContent": False,
|
||
"error": str(e),
|
||
"testPrompt": testPrompt,
|
||
"fullResponse": ""
|
||
}
|
||
|
||
print(f" 💥 EXCEPTION - {str(e)}")
|
||
return result
|
||
|
||
async def testModel(self, modelInfo: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||
"""Test a specific AI model with all its supported operation types."""
|
||
modelName = modelInfo["displayName"]
|
||
operationTypes = modelInfo["operationTypes"]
|
||
|
||
print(f"\n{'='*60}")
|
||
print(f"TESTING MODEL: {modelName}")
|
||
print(f"Supported operations: {', '.join([op.name for op in operationTypes])}")
|
||
print(f"{'='*60}")
|
||
|
||
# Get model from registry
|
||
from modules.aicore.aicoreModelRegistry import modelRegistry
|
||
model = modelRegistry.getModel(modelName)
|
||
|
||
if not model:
|
||
errorResult = {
|
||
"modelName": modelName,
|
||
"operationType": "ALL",
|
||
"status": "ERROR",
|
||
"processingTime": 0,
|
||
"responseLength": 0,
|
||
"responseType": "error",
|
||
"hasContent": False,
|
||
"error": f"Model {modelName} not found in registry",
|
||
"fullResponse": ""
|
||
}
|
||
self.testResults.append(errorResult)
|
||
return [errorResult]
|
||
|
||
# Test each operation type
|
||
results = []
|
||
for operationType in operationTypes:
|
||
result = await self.testModelOperation(modelName, operationType, model)
|
||
results.append(result)
|
||
self.testResults.append(result)
|
||
|
||
# Save individual result
|
||
self._saveIndividualModelResult(f"{modelName}_{operationType.name}", result)
|
||
|
||
return results
|
||
|
||
def _saveImageResponse(self, modelName: str, result: Dict[str, Any]):
|
||
"""Save image generation response as image file."""
|
||
try:
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
|
||
# Save as image file
|
||
filename = f"{modelName}_{timestamp}.png"
|
||
filepath = os.path.join(self.modelTestDir, filename)
|
||
|
||
# Get image data
|
||
content = result.get("fullResponse", "")
|
||
|
||
if not content:
|
||
print(f"⚠️ No image data to save for {modelName}")
|
||
return
|
||
|
||
# Decode base64 image data
|
||
import base64
|
||
|
||
try:
|
||
# Extract base64 data if it's a data URL
|
||
if content.startswith("data:image"):
|
||
base64Data = content.split(",")[1] if "," in content else content
|
||
else:
|
||
base64Data = content
|
||
|
||
# Decode base64 to bytes
|
||
imageBytes = base64.b64decode(base64Data)
|
||
|
||
# Save image file
|
||
with open(filepath, 'wb') as f:
|
||
f.write(imageBytes)
|
||
|
||
result["savedImageFile"] = filepath
|
||
print(f"🖼️ Image saved: {filepath}")
|
||
|
||
# Also save metadata as JSON
|
||
metadata = {
|
||
"modelName": modelName,
|
||
"timestamp": timestamp,
|
||
"status": result.get('status', 'Unknown'),
|
||
"processingTime": result.get('processingTime', 0),
|
||
"responseLength": result.get('responseLength', 0),
|
||
"isValidBase64": result.get('isValidBase64', False),
|
||
"imageByteSize": len(imageBytes),
|
||
"size": result.get('size', 'N/A'),
|
||
"quality": result.get('quality', 'N/A'),
|
||
"style": result.get('style', 'N/A'),
|
||
"testPrompt": result.get('testPrompt', 'N/A'),
|
||
"imageFile": filename
|
||
}
|
||
|
||
metadataFile = os.path.join(self.modelTestDir, f"{modelName}_{timestamp}_metadata.json")
|
||
with open(metadataFile, 'w', encoding='utf-8') as f:
|
||
json.dump(metadata, f, indent=2, ensure_ascii=False)
|
||
|
||
print(f"📄 Metadata saved: {metadataFile}")
|
||
|
||
except Exception as decodeError:
|
||
print(f"❌ Error decoding base64 image data: {str(decodeError)}")
|
||
# Fall back to saving as text file
|
||
textFile = os.path.join(self.modelTestDir, f"{modelName}_{timestamp}.txt")
|
||
with open(textFile, 'w', encoding='utf-8') as f:
|
||
f.write(f"Error decoding image:\n{str(decodeError)}\n\nBase64 data:\n{content[:500]}...")
|
||
print(f"📄 Saved base64 data as text: {textFile}")
|
||
|
||
except Exception as e:
|
||
print(f"❌ Error saving image generation response: {str(e)}")
|
||
result["saveError"] = str(e)
|
||
|
||
def _saveTextResponse(self, modelName: str, result: Dict[str, Any]):
|
||
"""Save text response to file."""
|
||
try:
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
filename = f"{modelName}_{timestamp}.txt"
|
||
filepath = os.path.join(self.modelTestDir, filename)
|
||
|
||
# Prepare content for saving
|
||
content = result.get("fullResponse", "")
|
||
if not content:
|
||
content = result.get("responsePreview", "No content available")
|
||
|
||
# If there's an error, include it in the content
|
||
if result.get("error"):
|
||
content = f"ERROR: {result.get('error')}\n\n{content}"
|
||
|
||
# Get prompt and config for logging
|
||
config = result.get("crawlConfig", {})
|
||
crawlDepth = config.get("depth", "N/A")
|
||
crawlWidth = config.get("width", "N/A")
|
||
|
||
# Get both the original JSON prompt and the actual prompt sent
|
||
originalPrompt = result.get("testPrompt", "N/A")
|
||
actualPromptSent = result.get("actualPromptSent", "N/A")
|
||
|
||
# Add metadata header
|
||
metadata = f"""Model: {modelName}
|
||
Test Time: {timestamp}
|
||
Status: {result.get('status', 'Unknown')}
|
||
Processing Time: {result.get('processingTime', 0):.2f}s
|
||
Response Length: {result.get('responseLength', 0)} characters
|
||
Is Valid JSON: {result.get('isValidJson', False)}
|
||
Test Method: {result.get('testMethod', 'standard')}
|
||
Pages Crawled: {result.get('pagesCrawled', 'N/A')}
|
||
Crawled URL: {result.get('crawledUrl', 'N/A')}
|
||
Has URL: {result.get('hasUrl', 'N/A')}
|
||
Has Title: {result.get('hasTitle', 'N/A')}
|
||
Has Content: {result.get('hasContent', 'N/A')}
|
||
Content Length: {result.get('contentLength', 'N/A')} characters
|
||
|
||
--- CRAWL CONFIGURATION ---
|
||
Depth: {crawlDepth}
|
||
Width: {crawlWidth}
|
||
|
||
--- ORIGINAL JSON PROMPT (input) ---
|
||
{originalPrompt}
|
||
|
||
--- ACTUAL PROMPT SENT TO API (EXACT) ---
|
||
{actualPromptSent}
|
||
|
||
--- RESPONSE CONTENT ---
|
||
{content}
|
||
"""
|
||
|
||
with open(filepath, 'w', encoding='utf-8') as f:
|
||
f.write(metadata)
|
||
|
||
result["savedTextFile"] = filepath
|
||
print(f"📄 Text response saved: {filepath}")
|
||
|
||
except Exception as e:
|
||
print(f"❌ Error saving text response: {str(e)}")
|
||
result["textSaveError"] = str(e)
|
||
|
||
def _validateCrawlResponse(self, modelName: str, result: Dict[str, Any]):
|
||
"""Validate that the WEB_CRAWL response contains crawled content."""
|
||
try:
|
||
content = result.get("fullResponse", "")
|
||
|
||
# Try to parse as JSON
|
||
crawledData = {}
|
||
try:
|
||
parsed = json.loads(content)
|
||
if isinstance(parsed, dict):
|
||
crawledData = parsed
|
||
except:
|
||
pass
|
||
|
||
# Check for expected fields: url, title, content
|
||
hasUrl = bool(crawledData.get("url"))
|
||
hasTitle = bool(crawledData.get("title"))
|
||
hasContent = bool(crawledData.get("content"))
|
||
contentLength = len(crawledData.get("content", ""))
|
||
|
||
result["hasUrl"] = hasUrl
|
||
result["hasTitle"] = hasTitle
|
||
result["hasContent"] = hasContent
|
||
result["contentLength"] = contentLength
|
||
result["crawledUrl"] = crawledData.get("url", "")
|
||
|
||
if hasUrl and hasContent:
|
||
print(f"✅ Successfully crawled content from URL: {crawledData.get('url', 'unknown')}")
|
||
print(f" Content length: {contentLength} characters")
|
||
print(f" Title: {crawledData.get('title', 'N/A')}")
|
||
else:
|
||
print(f"⚠️ Incomplete crawl response - URL: {hasUrl}, Content: {hasContent}")
|
||
|
||
except Exception as e:
|
||
print(f"❌ Error validating crawl response: {str(e)}")
|
||
result["crawlValidationError"] = str(e)
|
||
|
||
def _validateImageResponse(self, modelName: str, result: Dict[str, Any]):
|
||
"""Validate that the IMAGE_GENERATE response contains a valid base64 image."""
|
||
try:
|
||
content = result.get("fullResponse", "")
|
||
|
||
# Check if content is a valid base64 image
|
||
hasContent = bool(content and len(content.strip()) > 0)
|
||
result["hasContent"] = hasContent
|
||
|
||
if hasContent:
|
||
isBase64 = result.get("isValidBase64", False)
|
||
imageSize = result.get("imageByteSize", 0)
|
||
imageSizeKB = imageSize / 1024 if imageSize > 0 else 0
|
||
|
||
print(f"✅ Successfully generated image")
|
||
print(f" Image size: {imageSizeKB:.2f} KB ({imageSize} bytes)")
|
||
print(f" Valid base64: {'Yes' if isBase64 else 'No'}")
|
||
else:
|
||
print(f"⚠️ Empty or invalid image generation response")
|
||
|
||
except Exception as e:
|
||
print(f"❌ Error validating image response: {str(e)}")
|
||
result["validationError"] = str(e)
|
||
|
||
async def _testTavilyDirect(self, modelName: str, crawlDepth: int = 3, crawlWidth: int = 50) -> Dict[str, Any]:
|
||
"""Test Tavily API directly using the crawl() method with better link following."""
|
||
print(f"\n{'='*60}")
|
||
print(f"TESTING TAVILY DIRECT API (crawl method)")
|
||
print(f"{'='*60}")
|
||
|
||
startTime = asyncio.get_event_loop().time()
|
||
|
||
try:
|
||
from tavily import AsyncTavilyClient
|
||
from modules.shared.configuration import APP_CONFIG
|
||
|
||
apiKey = APP_CONFIG.get("Connector_AiTavily_API_SECRET")
|
||
if not apiKey:
|
||
raise Exception("Tavily API key not found")
|
||
|
||
client = AsyncTavilyClient(api_key=apiKey)
|
||
|
||
# Map our configuration to Tavily parameters
|
||
# maxWidth -> limit (pages per level)
|
||
# maxDepth -> max_depth (link following depth)
|
||
# max_breadth = maxWidth (breadth of crawl at each level)
|
||
tavilyLimit = crawlWidth
|
||
tavilyMaxDepth = crawlDepth
|
||
tavilyMaxBreadth = crawlWidth
|
||
|
||
print(f"Calling Tavily API with crawl() method...")
|
||
print(f"URL: https://www.valueon.ch")
|
||
print(f"Instructions: Who works in this company?")
|
||
print(f"Limit: {tavilyLimit} pages per level")
|
||
print(f"Max depth: {tavilyMaxDepth} (follows links {tavilyMaxDepth} levels deep)")
|
||
print(f"Max breadth: {tavilyMaxBreadth} (up to {tavilyMaxBreadth} pages at each level)")
|
||
print(f"Deep and Broad Crawl Configuration Active")
|
||
|
||
response = await client.crawl(
|
||
url="https://www.valueon.ch",
|
||
instructions="Who works in this company?",
|
||
limit=tavilyLimit,
|
||
max_depth=tavilyMaxDepth,
|
||
max_breadth=tavilyMaxBreadth
|
||
)
|
||
|
||
endTime = asyncio.get_event_loop().time()
|
||
processingTime = endTime - startTime
|
||
|
||
# Analyze response
|
||
contentLength = 0
|
||
pagesCrawled = 0
|
||
fullContent = ""
|
||
|
||
if isinstance(response, dict):
|
||
# Check if it has results
|
||
if "results" in response:
|
||
results = response["results"]
|
||
pagesCrawled = len(results)
|
||
content_parts = []
|
||
for result in results:
|
||
url = result.get("url", "")
|
||
title = result.get("title", "")
|
||
content = result.get("raw_content", result.get("content", ""))
|
||
content_parts.append(f"URL: {url}\nTitle: {title}\nContent: {content}\n{'='*60}\n")
|
||
contentLength += len(content)
|
||
|
||
fullContent = "\n".join(content_parts)
|
||
else:
|
||
fullContent = json.dumps(response, indent=2)
|
||
contentLength = len(fullContent)
|
||
elif isinstance(response, list):
|
||
pagesCrawled = len(response)
|
||
content_parts = []
|
||
for item in response:
|
||
if isinstance(item, dict):
|
||
url = item.get("url", "")
|
||
title = item.get("title", "")
|
||
content = item.get("raw_content", item.get("content", ""))
|
||
content_parts.append(f"URL: {url}\nTitle: {title}\nContent: {content}\n{'='*60}\n")
|
||
contentLength += len(content)
|
||
|
||
fullContent = "\n".join(content_parts)
|
||
else:
|
||
fullContent = str(response)
|
||
contentLength = len(fullContent)
|
||
|
||
result = {
|
||
"modelName": modelName,
|
||
"status": "SUCCESS",
|
||
"processingTime": round(processingTime, 2),
|
||
"responseLength": contentLength,
|
||
"responseType": "TavilyDirectAPI",
|
||
"hasContent": True,
|
||
"error": None,
|
||
"modelUsed": modelName,
|
||
"priceCHF": 0.0,
|
||
"bytesSent": 0,
|
||
"bytesReceived": contentLength,
|
||
"isValidJson": True,
|
||
"fullResponse": fullContent,
|
||
"pagesCrawled": pagesCrawled,
|
||
"testMethod": "direct_api_crawl"
|
||
}
|
||
|
||
print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
|
||
print(f"📄 Pages crawled: {pagesCrawled}")
|
||
print(f"📄 Total content length: {contentLength} characters")
|
||
|
||
# Save the response
|
||
self._saveTextResponse(modelName, result)
|
||
self._validateCrawlResponse(modelName, result)
|
||
self._saveIndividualModelResult(modelName, result)
|
||
|
||
self.testResults.append(result)
|
||
return result
|
||
|
||
except Exception as e:
|
||
endTime = asyncio.get_event_loop().time()
|
||
processingTime = endTime - startTime
|
||
|
||
result = {
|
||
"modelName": modelName,
|
||
"status": "EXCEPTION",
|
||
"processingTime": round(processingTime, 2),
|
||
"responseLength": 0,
|
||
"responseType": "exception",
|
||
"hasContent": False,
|
||
"error": str(e)
|
||
}
|
||
|
||
print(f"💥 EXCEPTION - {str(e)}")
|
||
self.testResults.append(result)
|
||
return result
|
||
|
||
def _saveIndividualModelResult(self, modelName: str, result: Dict[str, Any]):
|
||
"""Save individual model test result to file."""
|
||
try:
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
filename = f"{modelName}_{timestamp}.json"
|
||
filepath = os.path.join(self.modelTestDir, filename)
|
||
|
||
# Prepare individual result data
|
||
individualData = {
|
||
"modelName": modelName,
|
||
"testTimestamp": timestamp,
|
||
"testDate": datetime.now().isoformat(),
|
||
"result": result
|
||
}
|
||
|
||
# Save to JSON file
|
||
with open(filepath, 'w', encoding='utf-8') as f:
|
||
json.dump(individualData, f, indent=2, ensure_ascii=False)
|
||
|
||
print(f"📄 Individual result saved: {filename}")
|
||
|
||
except Exception as e:
|
||
print(f"❌ Error saving individual result: {str(e)}")
|
||
|
||
def getAllAvailableModels(self) -> List[Dict[str, Any]]:
|
||
"""Get all available models with their supported operation types."""
|
||
from modules.aicore.aicoreModelRegistry import modelRegistry
|
||
from modules.datamodels.datamodelAi import OperationTypeEnum
|
||
|
||
# Get all models from registry
|
||
allModels = modelRegistry.getAvailableModels()
|
||
totalModels = len(allModels)
|
||
|
||
print(f"\n📊 Total models in registry: {totalModels}")
|
||
|
||
# Collect all models with their supported operation types
|
||
modelsToTest = []
|
||
for model in allModels:
|
||
if model.operationTypes and len(model.operationTypes) > 0:
|
||
supportedOps = [ot.operationType for ot in model.operationTypes]
|
||
modelsToTest.append({
|
||
"displayName": model.displayName,
|
||
"name": model.name,
|
||
"operationTypes": supportedOps
|
||
})
|
||
|
||
print(f"✅ Found {len(modelsToTest)} model(s) with operation type support (will test all):")
|
||
for i, modelInfo in enumerate(modelsToTest, 1):
|
||
opsStr = ", ".join([op.name for op in modelInfo["operationTypes"]])
|
||
print(f" {i}. {modelInfo['displayName']} - Operations: {opsStr}")
|
||
|
||
if len(modelsToTest) < totalModels:
|
||
skipped = totalModels - len(modelsToTest)
|
||
print(f"ℹ️ {skipped} model(s) have no operation types and will be skipped.")
|
||
|
||
return modelsToTest
|
||
|
||
def saveTestResults(self):
|
||
"""Save detailed test results to file."""
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
resultsFile = os.path.join(self.modelTestDir, f"modeltest_results_{timestamp}.json")
|
||
|
||
# Prepare results for saving
|
||
saveData = {
|
||
"testTimestamp": timestamp,
|
||
"testDate": datetime.now().isoformat(),
|
||
"totalModels": len(self.testResults),
|
||
"successfulModels": len([r for r in self.testResults if r["status"] == "SUCCESS"]),
|
||
"errorModels": len([r for r in self.testResults if r["status"] == "ERROR"]),
|
||
"exceptionModels": len([r for r in self.testResults if r["status"] == "EXCEPTION"]),
|
||
"results": self.testResults
|
||
}
|
||
|
||
# Calculate success rate
|
||
if saveData["totalModels"] > 0:
|
||
saveData["successRate"] = (saveData["successfulModels"] / saveData["totalModels"]) * 100
|
||
else:
|
||
saveData["successRate"] = 0
|
||
|
||
# Save to JSON file
|
||
with open(resultsFile, 'w', encoding='utf-8') as f:
|
||
json.dump(saveData, f, indent=2, ensure_ascii=False)
|
||
|
||
print(f"📄 Detailed results saved: {resultsFile}")
|
||
return resultsFile
|
||
|
||
def printTestSummary(self):
|
||
"""Print a summary of all test results."""
|
||
print(f"\n{'='*80}")
|
||
print("AI MODELS TEST SUMMARY")
|
||
print(f"{'='*80}")
|
||
|
||
totalTests = len(self.testResults)
|
||
successfulTests = len([r for r in self.testResults if r["status"] == "SUCCESS"])
|
||
errorTests = len([r for r in self.testResults if r["status"] == "ERROR"])
|
||
exceptionTests = len([r for r in self.testResults if r["status"] == "EXCEPTION"])
|
||
|
||
# Count unique models
|
||
uniqueModels = len(set(r["modelName"] for r in self.testResults))
|
||
|
||
print(f"📊 Total tests executed: {totalTests}")
|
||
print(f"📦 Unique models tested: {uniqueModels}")
|
||
print(f"✅ Successful tests: {successfulTests}")
|
||
print(f"❌ Error tests: {errorTests}")
|
||
print(f"💥 Exception tests: {exceptionTests}")
|
||
print(f"📈 Success rate: {(successfulTests/totalTests*100):.1f}%" if totalTests > 0 else "0%")
|
||
|
||
print(f"\n{'='*80}")
|
||
print("DETAILED RESULTS")
|
||
print(f"{'='*80}")
|
||
|
||
# Group results by model
|
||
from collections import defaultdict
|
||
resultsByModel = defaultdict(list)
|
||
for result in self.testResults:
|
||
resultsByModel[result['modelName']].append(result)
|
||
|
||
for modelName, modelResults in resultsByModel.items():
|
||
print(f"\n📦 {modelName}")
|
||
for result in modelResults:
|
||
status_icon = {
|
||
"SUCCESS": "✅",
|
||
"ERROR": "❌",
|
||
"EXCEPTION": "💥"
|
||
}.get(result["status"], "❓")
|
||
|
||
opType = result.get("operationType", "UNKNOWN")
|
||
print(f" {status_icon} {opType}: {result['status']} - {result['processingTime']}s - {result['responseLength']} chars")
|
||
|
||
if result.get("isValidJson") is not None:
|
||
print(f" Valid JSON: {'Yes' if result['isValidJson'] else 'No'}")
|
||
|
||
if result.get("isValidBase64") is not None:
|
||
print(f" Valid Base64: {'Yes' if result['isValidBase64'] else 'No'}")
|
||
if result.get("imageByteSize"):
|
||
print(f" Image size: {result['imageByteSize']} bytes")
|
||
|
||
if result.get("crawledUrl"):
|
||
print(f" Crawled URL: {result['crawledUrl']}")
|
||
|
||
if result.get("contentLength") is not None:
|
||
print(f" Content length: {result['contentLength']} characters")
|
||
|
||
if result.get("pagesCrawled") is not None:
|
||
print(f" Pages crawled: {result['pagesCrawled']}")
|
||
|
||
if result.get("error"):
|
||
print(f" Error: {result['error']}")
|
||
|
||
# Find fastest and slowest tests
|
||
if successfulTests > 0:
|
||
successfulResults = [r for r in self.testResults if r["status"] == "SUCCESS"]
|
||
fastest = min(successfulResults, key=lambda x: x["processingTime"])
|
||
slowest = max(successfulResults, key=lambda x: x["processingTime"])
|
||
|
||
print(f"\n{'='*80}")
|
||
print("PERFORMANCE HIGHLIGHTS")
|
||
print(f"{'='*80}")
|
||
print(f"🚀 Fastest test: {fastest['modelName']} - {fastest.get('operationType', 'UNKNOWN')} ({fastest['processingTime']}s)")
|
||
print(f"🐌 Slowest test: {slowest['modelName']} - {slowest.get('operationType', 'UNKNOWN')} ({slowest['processingTime']}s)")
|
||
|
||
# Find models with most content
|
||
modelsWithContent = [r for r in successfulResults if r.get("contentLength", 0) > 0]
|
||
if modelsWithContent:
|
||
mostContent = max(modelsWithContent, key=lambda x: x.get("contentLength", 0))
|
||
totalContent = sum(r.get("contentLength", 0) for r in modelsWithContent)
|
||
avgContent = totalContent / len(modelsWithContent)
|
||
print(f"📄 Model with most content: {mostContent['modelName']} ({mostContent.get('contentLength', 0)} chars)")
|
||
print(f"📊 Average content per model: {avgContent:.0f} characters")
|
||
print(f"📊 Total content crawled across all models: {totalContent} characters")
|
||
|
||
# Find models with most pages crawled (for Tavily direct API)
|
||
modelsWithPages = [r for r in successfulResults if r.get("pagesCrawled", 0) > 0]
|
||
if modelsWithPages:
|
||
mostPages = max(modelsWithPages, key=lambda x: x.get("pagesCrawled", 0))
|
||
totalPages = sum(r.get("pagesCrawled", 0) for r in modelsWithPages)
|
||
avgPages = totalPages / len(modelsWithPages)
|
||
print(f"🔍 Model with most pages crawled: {mostPages['modelName']} ({mostPages.get('pagesCrawled', 0)} pages)")
|
||
print(f"📊 Average pages per model: {avgPages:.1f} pages")
|
||
print(f"📊 Total pages crawled across all models: {totalPages} pages")
|
||
|
||
async def main():
|
||
"""Run AI models testing for all operation types."""
|
||
tester = AIModelsTester()
|
||
|
||
print("Starting AI Models Testing for ALL Operation Types...")
|
||
print("Initializing AI service...")
|
||
await tester.initialize()
|
||
|
||
# Get all available models with their operation types
|
||
models = tester.getAllAvailableModels()
|
||
|
||
if not models:
|
||
print("\n⚠️ No models found with operation type support.")
|
||
print(" Please check that models with operation types are registered.")
|
||
return
|
||
|
||
# Count total tests (models * operation types)
|
||
totalTests = sum(len(model["operationTypes"]) for model in models)
|
||
|
||
print(f"\n{'='*80}")
|
||
print("STARTING COMPREHENSIVE MODEL TESTS")
|
||
print(f"{'='*80}")
|
||
print(f"Testing {len(models)} model(s) with {totalTests} total operation type test(s)...")
|
||
print("All models and their supported operation types will be tested automatically.")
|
||
print(f"{'='*80}\n")
|
||
|
||
# Test each model with all its operation types
|
||
testCount = 0
|
||
for i, modelInfo in enumerate(models, 1):
|
||
print(f"\n{'='*80}")
|
||
print(f"[Model {i}/{len(models)}] Testing: {modelInfo['displayName']}")
|
||
print(f"{'='*80}")
|
||
|
||
# Test the model (tests all its operation types)
|
||
results = await tester.testModel(modelInfo)
|
||
testCount += len(results)
|
||
|
||
print(f"\n✅ Completed {len(results)} test(s) for {modelInfo['displayName']}")
|
||
|
||
# Save detailed results to file
|
||
resultsFile = tester.saveTestResults()
|
||
|
||
# Print final summary
|
||
tester.printTestSummary()
|
||
|
||
print(f"\n{'='*80}")
|
||
print("TESTING COMPLETED")
|
||
print(f"{'='*80}")
|
||
print(f"📊 Total tests executed: {testCount}")
|
||
print(f"📄 Results saved to: {resultsFile}")
|
||
print(f"📁 Test results saved to: {tester.modelTestDir}")
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(main())
|
||
|