From e8c3052176797752b54ed04cf3cc5aed0f4ab40e Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Sat, 25 Oct 2025 22:27:27 +0200 Subject: [PATCH] tuning and aligning ai models --- modules/aicore/aicorePluginAnthropic.py | 8 +- modules/aicore/aicorePluginOpenai.py | 18 +- modules/aicore/aicorePluginPerplexity.py | 150 ++--- modules/aicore/aicorePluginTavily.py | 73 ++- modules/datamodels/datamodelAi.py | 6 +- modules/interfaces/interfaceAiObjects.py | 18 +- modules/services/serviceAi/mainServiceAi.py | 5 + test_ai_behavior.py | 5 +- test_ai_models.py | 583 ++++++++++++++++++++ 9 files changed, 711 insertions(+), 155 deletions(-) create mode 100644 test_ai_models.py diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py index 1ac63516..4debc7ed 100644 --- a/modules/aicore/aicorePluginAnthropic.py +++ b/modules/aicore/aicorePluginAnthropic.py @@ -112,7 +112,9 @@ class AiAnthropic(BaseConnectorAi): messages = modelCall.messages model = modelCall.model options = modelCall.options - temperature = options.get("temperature", model.temperature) + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature maxTokens = model.maxTokens # Transform OpenAI-style messages to Anthropic format: @@ -237,8 +239,8 @@ class AiAnthropic(BaseConnectorAi): model = modelCall.model options = modelCall.options prompt = messages[0]["content"] if messages else "" - imageData = options.get("imageData") - mimeType = options.get("mimeType") + imageData = getattr(options, "imageData", None) + mimeType = getattr(options, "mimeType", None) # Debug logging logger.info(f"callAiImage called with imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}") diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py index f23ab7c7..4d2a0f4d 100644 --- a/modules/aicore/aicorePluginOpenai.py +++ b/modules/aicore/aicorePluginOpenai.py @@ -51,7 +51,7 @@ class AiOpenai(BaseConnectorAi): connectorType="openai", apiUrl="https://api.openai.com/v1/chat/completions", temperature=0.2, - maxTokens=128000, + maxTokens=16384, contextLength=128000, costPer1kTokensInput=0.03, costPer1kTokensOutput=0.06, @@ -76,7 +76,7 @@ class AiOpenai(BaseConnectorAi): connectorType="openai", apiUrl="https://api.openai.com/v1/chat/completions", temperature=0.2, - maxTokens=16000, + maxTokens=4096, contextLength=16000, costPer1kTokensInput=0.0015, costPer1kTokensOutput=0.002, @@ -100,7 +100,7 @@ class AiOpenai(BaseConnectorAi): connectorType="openai", apiUrl="https://api.openai.com/v1/chat/completions", temperature=0.2, - maxTokens=128000, + maxTokens=16384, contextLength=128000, costPer1kTokensInput=0.03, costPer1kTokensOutput=0.06, @@ -158,7 +158,9 @@ class AiOpenai(BaseConnectorAi): messages = modelCall.messages model = modelCall.model options = modelCall.options - temperature = options.get("temperature", model.temperature) + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature maxTokens = model.maxTokens payload = { @@ -226,8 +228,8 @@ class AiOpenai(BaseConnectorAi): model = modelCall.model options = modelCall.options prompt = messages[0]["content"] if messages else "" - imageData = options.get("imageData") - mimeType = options.get("mimeType", "image/jpeg") + imageData = getattr(options, "imageData", None) + mimeType = getattr(options, "mimeType", "image/jpeg") logger.debug(f"Starting image analysis with query '{prompt}' for size {len(imageData)}B...") @@ -261,10 +263,6 @@ class AiOpenai(BaseConnectorAi): } ] - # Use a vision-capable model for image analysis - # Override the model for vision tasks - visionModel = "gpt-4o" # or "gpt-4-vision-preview" depending on availability - # Use parameters from model temperature = model.temperature # Don't set maxTokens - let the model use its full context length diff --git a/modules/aicore/aicorePluginPerplexity.py b/modules/aicore/aicorePluginPerplexity.py index 44a30283..01fde7df 100644 --- a/modules/aicore/aicorePluginPerplexity.py +++ b/modules/aicore/aicorePluginPerplexity.py @@ -44,27 +44,29 @@ class AiPerplexity(BaseConnectorAi): """Get all available Perplexity models.""" return [ AiModel( - name="llama-3.1-sonar-large-128k-online", - displayName="Perplexity Llama 3.1 Sonar Large 128k", + name="sonar", + displayName="Perplexity Sonar", connectorType="perplexity", apiUrl="https://api.perplexity.ai/chat/completions", temperature=0.2, - maxTokens=128000, - contextLength=128000, + maxTokens=4000, + contextLength=32000, costPer1kTokensInput=0.005, costPer1kTokensOutput=0.005, speedRating=8, qualityRating=8, # capabilities removed (not used in business logic) - functionCall=self.callAiBasic, + functionCall=self.callWebOperation, priority=PriorityEnum.BALANCED, processingMode=ProcessingModeEnum.ADVANCED, operationTypes=createOperationTypeRatings( - (OperationTypeEnum.PLAN, 7), - (OperationTypeEnum.DATA_ANALYSE, 8), - (OperationTypeEnum.DATA_GENERATE, 7) + (OperationTypeEnum.WEB_RESEARCH, 8), + (OperationTypeEnum.WEB_SEARCH, 9), + (OperationTypeEnum.WEB_CRAWL, 7), + (OperationTypeEnum.WEB_NEWS, 8), + (OperationTypeEnum.WEB_QUESTIONS, 9) ), - version="llama-3.1-sonar-large-128k-online", + version="sonar", calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.005 ), AiModel( @@ -73,8 +75,8 @@ class AiPerplexity(BaseConnectorAi): connectorType="perplexity", apiUrl="https://api.perplexity.ai/chat/completions", temperature=0.2, - maxTokens=128000, - contextLength=128000, + maxTokens=4000, + contextLength=32000, costPer1kTokensInput=0.01, costPer1kTokensOutput=0.01, speedRating=6, # Slower due to AI analysis @@ -92,84 +94,6 @@ class AiPerplexity(BaseConnectorAi): ), version="sonar-pro", calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.01 + (bytesReceived / 4 / 1000) * 0.01 - ), - AiModel( - name="mistral-7b-instruct", - displayName="Perplexity Mistral 7B Instruct", - connectorType="perplexity", - apiUrl="https://api.perplexity.ai/chat/completions", - temperature=0.2, - maxTokens=32000, - contextLength=32000, - costPer1kTokensInput=0.002, - costPer1kTokensOutput=0.002, - speedRating=9, # Fast for basic AI tasks - qualityRating=7, # Good but not premium quality - # capabilities removed (not used in business logic) - functionCall=self.callWebOperation, - priority=PriorityEnum.COST, - processingMode=ProcessingModeEnum.BASIC, - operationTypes=createOperationTypeRatings( - (OperationTypeEnum.WEB_RESEARCH, 7), - (OperationTypeEnum.WEB_SEARCH, 6), - (OperationTypeEnum.WEB_CRAWL, 5), - (OperationTypeEnum.WEB_NEWS, 5), - (OperationTypeEnum.WEB_QUESTIONS, 6) - ), - version="mistral-7b-instruct", - calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.002 + (bytesReceived / 4 / 1000) * 0.002 - ), - AiModel( - name="mistral-7b-instruct-qa", - displayName="Perplexity Mistral 7B Instruct QA", - connectorType="perplexity", - apiUrl="https://api.perplexity.ai/chat/completions", - temperature=0.2, - maxTokens=32000, - contextLength=32000, - costPer1kTokensInput=0.002, - costPer1kTokensOutput=0.002, - speedRating=9, # Fast for Q&A tasks - qualityRating=7, # Good but not premium quality - # capabilities removed (not used in business logic) - functionCall=self.callWebOperation, - priority=PriorityEnum.COST, - processingMode=ProcessingModeEnum.BASIC, - operationTypes=createOperationTypeRatings( - (OperationTypeEnum.WEB_RESEARCH, 6), - (OperationTypeEnum.WEB_SEARCH, 5), - (OperationTypeEnum.WEB_CRAWL, 4), - (OperationTypeEnum.WEB_NEWS, 4), - (OperationTypeEnum.WEB_QUESTIONS, 10) - ), - version="mistral-7b-instruct", - calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.002 + (bytesReceived / 4 / 1000) * 0.002 - ), - AiModel( - name="mistral-7b-instruct-news", - displayName="Perplexity Mistral 7B Instruct News", - connectorType="perplexity", - apiUrl="https://api.perplexity.ai/chat/completions", - temperature=0.2, - maxTokens=32000, - contextLength=32000, - costPer1kTokensInput=0.002, - costPer1kTokensOutput=0.002, - speedRating=9, # Fast for news tasks - qualityRating=7, # Good but not premium quality - # capabilities removed (not used in business logic) - functionCall=self.callWebOperation, - priority=PriorityEnum.COST, - processingMode=ProcessingModeEnum.BASIC, - operationTypes=createOperationTypeRatings( - (OperationTypeEnum.WEB_RESEARCH, 6), - (OperationTypeEnum.WEB_SEARCH, 5), - (OperationTypeEnum.WEB_CRAWL, 4), - (OperationTypeEnum.WEB_NEWS, 10), - (OperationTypeEnum.WEB_QUESTIONS, 4) - ), - version="mistral-7b-instruct", - calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.002 + (bytesReceived / 4 / 1000) * 0.002 ) ] @@ -191,7 +115,9 @@ class AiPerplexity(BaseConnectorAi): messages = modelCall.messages model = modelCall.model options = modelCall.options - temperature = options.get("temperature", model.temperature) + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature maxTokens = model.maxTokens payload = { @@ -251,7 +177,9 @@ class AiPerplexity(BaseConnectorAi): messages = modelCall.messages model = modelCall.model options = modelCall.options - temperature = options.get("temperature", model.temperature) + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature maxTokens = model.maxTokens # Parse unified prompt JSON format @@ -349,7 +277,9 @@ Include actual URLs in your response.""" messages = modelCall.messages model = modelCall.model options = modelCall.options - temperature = options.get("temperature", model.temperature) + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature maxTokens = model.maxTokens payload = { @@ -408,7 +338,9 @@ Include actual URLs in your response.""" messages = modelCall.messages model = modelCall.model options = modelCall.options - temperature = options.get("temperature", model.temperature) + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature maxTokens = model.maxTokens payload = { @@ -467,7 +399,9 @@ Include actual URLs in your response.""" messages = modelCall.messages model = modelCall.model options = modelCall.options - temperature = options.get("temperature", model.temperature) + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature maxTokens = model.maxTokens payload = { @@ -526,7 +460,9 @@ Include actual URLs in your response.""" messages = modelCall.messages model = modelCall.model options = modelCall.options - temperature = options.get("temperature", model.temperature) + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature maxTokens = model.maxTokens # Parse unified prompt JSON format @@ -623,17 +559,17 @@ Extract content from each URL and provide detailed analysis.""" """ try: options = modelCall.options - operationType = options.get("operationType") + operationType = getattr(options, "operationType", None) - if operationType == "WEB_SEARCH": + if operationType == OperationTypeEnum.WEB_SEARCH: return await self.callAiWithWebSearch(modelCall) - elif operationType == "WEB_CRAWL": + elif operationType == OperationTypeEnum.WEB_CRAWL: return await self.crawl(modelCall) - elif operationType == "WEB_RESEARCH": + elif operationType == OperationTypeEnum.WEB_RESEARCH: return await self.research(modelCall) - elif operationType == "WEB_QUESTIONS": + elif operationType == OperationTypeEnum.WEB_QUESTIONS: return await self.questions(modelCall) - elif operationType == "WEB_NEWS": + elif operationType == OperationTypeEnum.WEB_NEWS: return await self.news(modelCall) else: # Fallback to research for unknown operation types @@ -661,7 +597,9 @@ Extract content from each URL and provide detailed analysis.""" messages = modelCall.messages model = modelCall.model options = modelCall.options - temperature = options.get("temperature", model.temperature) + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature maxTokens = model.maxTokens # Parse unified prompt JSON format @@ -754,7 +692,9 @@ Provide comprehensive research with detailed analysis.""" messages = modelCall.messages model = modelCall.model options = modelCall.options - temperature = options.get("temperature", model.temperature) + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature maxTokens = model.maxTokens # Parse unified prompt JSON format @@ -850,7 +790,9 @@ Provide a detailed answer with well-cited sources.""" messages = modelCall.messages model = modelCall.model options = modelCall.options - temperature = options.get("temperature", model.temperature) + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature maxTokens = model.maxTokens # Parse unified prompt JSON format diff --git a/modules/aicore/aicorePluginTavily.py b/modules/aicore/aicorePluginTavily.py index 9d6a3b6a..f2cc0b6a 100644 --- a/modules/aicore/aicorePluginTavily.py +++ b/modules/aicore/aicorePluginTavily.py @@ -64,6 +64,20 @@ class ConnectorWeb(BaseConnectorAi): # Cached web search constraints (camelCase per project style) self.webSearchMinResults: int = 1 self.webSearchMaxResults: int = 20 + # Initialize client if API key is available + self._initializeClient() + + def _initializeClient(self): + """Initialize the Tavily client if API key is available.""" + try: + api_key = APP_CONFIG.get("Connector_AiTavily_API_SECRET") + if api_key: + self.client = AsyncTavilyClient(api_key=api_key) + logger.info("Tavily client initialized successfully") + else: + logger.warning("Tavily API key not found, client not initialized") + except Exception as e: + logger.error(f"Failed to initialize Tavily client: {str(e)}") def getConnectorType(self) -> str: """Get the connector type identifier.""" @@ -442,13 +456,13 @@ One URL per line. """ try: options = modelCall.options - operationType = options.get("operationType") + operationType = getattr(options, "operationType", None) - if operationType == "WEB_SEARCH": + if operationType == OperationTypeEnum.WEB_SEARCH: return await self.search(modelCall) - elif operationType == "WEB_CRAWL": + elif operationType == OperationTypeEnum.WEB_CRAWL: return await self.crawl(modelCall) - elif operationType in ["WEB_RESEARCH", "WEB_QUESTIONS", "WEB_NEWS"]: + elif operationType in [OperationTypeEnum.WEB_RESEARCH, OperationTypeEnum.WEB_QUESTIONS, OperationTypeEnum.WEB_NEWS]: return await self.research(modelCall) else: # Fallback to search for unknown operation types @@ -493,8 +507,8 @@ One URL per line. time_range=optimizedParams.get("time_range", timeRange), country=optimizedParams.get("country", country), language=optimizedParams.get("language", language), - include_answer=options.get("include_answer", True), - include_raw_content=options.get("include_raw_content", True), + include_answer=getattr(options, "include_answer", True), + include_raw_content=getattr(options, "include_raw_content", True), ) # Step 3: AI-based URL selection and intelligent filtering @@ -607,14 +621,14 @@ One URL per line. # Extract parameters from modelCall promptContent = modelCall.messages[0]["content"] if modelCall.messages else "" options = modelCall.options - operationType = options.get("operationType") + operationType = getattr(options, "operationType", None) # Parse unified prompt JSON format import json promptData = json.loads(promptContent) # Extract parameters based on operation type - if operationType == "WEB_RESEARCH": + if operationType == OperationTypeEnum.WEB_RESEARCH: query = promptData.get("researchPrompt", promptContent) maxResults = promptData.get("maxResults", 8) searchDepth = "basic" @@ -623,7 +637,7 @@ One URL per line. language = promptData.get("language") topic = "general" - elif operationType == "WEB_QUESTIONS": + elif operationType == OperationTypeEnum.WEB_QUESTIONS: query = promptData.get("question", promptContent) maxResults = promptData.get("maxResults", 6) searchDepth = "basic" @@ -632,7 +646,7 @@ One URL per line. language = promptData.get("language") topic = "general" - elif operationType == "WEB_NEWS": + elif operationType == OperationTypeEnum.WEB_NEWS: query = promptData.get("newsPrompt", promptContent) maxResults = promptData.get("maxResults", 10) searchDepth = "basic" @@ -766,22 +780,22 @@ One URL per line. search_results = await self._search( query=query, - max_results=options.get("max_results", 5), - search_depth=options.get("search_depth"), - time_range=options.get("time_range"), - topic=options.get("topic"), - include_domains=options.get("include_domains"), - exclude_domains=options.get("exclude_domains"), - language=options.get("language"), - include_answer=options.get("include_answer"), - include_raw_content=options.get("include_raw_content"), + max_results=getattr(options, "max_results", 5), + search_depth=getattr(options, "search_depth", None), + time_range=getattr(options, "time_range", None), + topic=getattr(options, "topic", None), + include_domains=getattr(options, "include_domains", None), + exclude_domains=getattr(options, "exclude_domains", None), + language=getattr(options, "language", None), + include_answer=getattr(options, "include_answer", None), + include_raw_content=getattr(options, "include_raw_content", None), ) urls = [result.url for result in search_results] crawl_results = await self._crawl( urls, - extract_depth=options.get("extract_depth"), - format=options.get("format"), + extract_depth=getattr(options, "extract_depth", None), + format=getattr(options, "format", None), ) # Convert to JSON string @@ -805,8 +819,8 @@ One URL per line. success=True, metadata={ "total_count": len(crawl_results), - "search_depth": options.get("search_depth", "basic"), - "extract_depth": options.get("extract_depth", "basic") + "search_depth": getattr(options, "search_depth", "basic"), + "extract_depth": getattr(options, "extract_depth", "basic") } ) @@ -936,6 +950,13 @@ One URL per line. kwargs["include_raw_content"] = include_raw_content logger.debug(f"Tavily.search kwargs: {kwargs}") + + # Ensure client is initialized + if self.client is None: + self._initializeClient() + if self.client is None: + raise ValueError("Tavily client not initialized. Please check API key configuration.") + response = await self.client.search(**kwargs) return [ @@ -973,6 +994,12 @@ One URL per line. logger.debug(f"Sending request to Tavily with kwargs: {kwargs_extract}") + # Ensure client is initialized + if self.client is None: + self._initializeClient() + if self.client is None: + raise ValueError("Tavily client not initialized. Please check API key configuration.") + response = await asyncio.wait_for( self.client.extract(**kwargs_extract), timeout=timeout diff --git a/modules/datamodels/datamodelAi.py b/modules/datamodels/datamodelAi.py index baec853b..31387e28 100644 --- a/modules/datamodels/datamodelAi.py +++ b/modules/datamodels/datamodelAi.py @@ -2,8 +2,8 @@ from typing import Optional, List, Dict, Any, Callable, TYPE_CHECKING, Tuple from pydantic import BaseModel, Field from enum import Enum -if TYPE_CHECKING: - from modules.datamodels.datamodelExtraction import ContentPart +# Import ContentPart for runtime use (needed for Pydantic model rebuilding) +from modules.datamodels.datamodelExtraction import ContentPart # Operation Types class OperationTypeEnum(str, Enum): @@ -173,7 +173,7 @@ class AiModelCall(BaseModel): messages: List[Dict[str, Any]] = Field(description="Messages in OpenAI format (role, content)") model: Optional[AiModel] = Field(default=None, description="The AI model being called") - options: Dict[str, Any] = Field(default_factory=dict, description="Additional model-specific options") + options: AiCallOptions = Field(default_factory=AiCallOptions, description="Additional model-specific options") class Config: arbitraryTypesAllowed = True diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py index f9196cb4..2ae97586 100644 --- a/modules/interfaces/interfaceAiObjects.py +++ b/modules/interfaces/interfaceAiObjects.py @@ -399,9 +399,9 @@ class AiObjects: inputBytes = len((prompt + context).encode('utf-8')) # Replace placeholder in prompt for this specific model - contextLength = model.contextLength - if contextLength > 0: - tokenLimit = str(contextLength) + # Use maxTokens for output limit, not contextLength + if model.maxTokens > 0: + tokenLimit = str(model.maxTokens) else: tokenLimit = "16000" # Default for text generation @@ -450,7 +450,7 @@ class AiObjects: outputBytes = len(content.encode("utf-8")) # Calculate price using model's own price calculation method - priceUsd = model.calculatePriceUsd(inputBytes, outputBytes) + priceUsd = model.calculatePriceUsd(processingTime, inputBytes, outputBytes) return AiCallResponse( content=content, @@ -542,7 +542,7 @@ class AiObjects: modelCall = AiModelCall( messages=[{"role": "user", "content": prompt}], model=model, - options={"imageData": imageData, "mimeType": mimeType} + options=AiCallOptions(imageData=imageData, mimeType=mimeType) ) # Call the model with standardized interface @@ -562,7 +562,7 @@ class AiObjects: outputBytes = len(content.encode("utf-8")) # Calculate price using model's own price calculation method - priceUsd = model.calculatePriceUsd(inputBytes, outputBytes) + priceUsd = model.calculatePriceUsd(processingTime, inputBytes, outputBytes) return AiCallResponse( content=content, @@ -603,7 +603,7 @@ class AiObjects: modelCall = AiModelCall( messages=[{"role": "user", "content": prompt}], model=selectedModel, - options={"size": size, "quality": quality, "style": style} + options=AiCallOptions(size=size, quality=quality, style=style) ) # Call the model with standardized interface @@ -623,13 +623,13 @@ class AiObjects: outputBytes = len(content.encode("utf-8")) # Calculate price using model's own price calculation method - priceUsd = selectedModel.calculatePriceUsd(inputBytes, outputBytes) + priceUsd = selectedModel.calculatePriceUsd(processingTime, inputBytes, outputBytes) logger.info(f"✅ Image generation successful with model: {modelName}") return AiCallResponse( success=True, content=content, - model=modelName, + modelName=modelName, processingTime=processingTime, priceUsd=priceUsd, bytesSent=inputBytes, diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 6441db8b..6e199678 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -8,6 +8,7 @@ from modules.interfaces.interfaceAiObjects import AiObjects from modules.services.serviceAi.subCoreAi import SubCoreAi from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration +from modules.services.serviceAi.subSharedAiUtils import sanitizePromptContent logger = logging.getLogger(__name__) @@ -142,4 +143,8 @@ class AiService: # Use "json" for document generation calls since they return JSON return await self.coreAi.callAiDocuments(prompt, documents, options, outputFormat, title, "json") + def sanitizePromptContent(self, content: str, contentType: str = "text") -> str: + """Sanitize prompt content to prevent injection attacks and ensure safe presentation.""" + return sanitizePromptContent(content, contentType) + diff --git a/test_ai_behavior.py b/test_ai_behavior.py index 02bf2f54..76db986b 100644 --- a/test_ai_behavior.py +++ b/test_ai_behavior.py @@ -80,12 +80,11 @@ class AIBehaviorTester: # Use the AI service directly with the user prompt - it will build the generation prompt internally try: # Use the existing AI service with JSON format - it handles looping internally - response = await self.services.ai.coreAi.callAiDocuments( + response = await self.services.ai.callAiDocuments( prompt=prompt, # Use the raw user prompt directly documents=None, outputFormat="json", - title="Prime Numbers Test", - loopInstructionFormat="json" # Use the JSON loop instructions + title="Prime Numbers Test" ) if isinstance(response, dict): diff --git a/test_ai_models.py b/test_ai_models.py new file mode 100644 index 00000000..9d841829 --- /dev/null +++ b/test_ai_models.py @@ -0,0 +1,583 @@ +#!/usr/bin/env python3 +""" +AI Models Test - Tests all available AI models individually +""" + +import asyncio +import json +import sys +import os +import base64 +from datetime import datetime +from typing import Dict, Any, List + +# Add the gateway to path +sys.path.append(os.path.dirname(__file__)) + +# Import the service initialization +from modules.features.chatPlayground.mainChatPlayground import getServices +from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum +from modules.datamodels.datamodelUam import User + +class AIModelsTester: + def __init__(self): + # Create a minimal user context for testing + testUser = User( + id="test_user", + username="test_user", + email="test@example.com", + fullName="Test User", + language="en", + mandateId="test_mandate" + ) + + # Initialize services using the existing system + self.services = getServices(testUser, None) # Test user, no workflow + self.testResults = [] + + # Create logs directory if it doesn't exist + self.logsDir = os.path.join(os.path.dirname(__file__), "..", "local", "logs") + os.makedirs(self.logsDir, exist_ok=True) + + # Create modeltest subdirectory + self.modelTestDir = os.path.join(self.logsDir, "modeltest") + os.makedirs(self.modelTestDir, exist_ok=True) + + # Copy test image to modeltest directory if it exists + testImageSource = os.path.join(self.logsDir, "_testdata_photo_2025-06-03_13-05-52.jpg") + testImageDest = os.path.join(self.modelTestDir, "_testdata_photo_2025-06-03_13-05-52.jpg") + if os.path.exists(testImageSource) and not os.path.exists(testImageDest): + import shutil + shutil.copy2(testImageSource, testImageDest) + print(f"📷 Test image copied to: {testImageDest}") + + async def initialize(self): + """Initialize the AI service.""" + # Set logging level to INFO to reduce noise + import logging + logging.getLogger().setLevel(logging.INFO) + + # The AI service needs to be recreated with proper initialization + from modules.services.serviceAi.mainServiceAi import AiService + self.services.ai = await AiService.create(self.services) + + # Create a minimal workflow context + from modules.datamodels.datamodelChat import ChatWorkflow + import uuid + + self.services.currentWorkflow = ChatWorkflow( + id=str(uuid.uuid4()), + name="Test Workflow", + status="running", + startedAt=self.services.utils.timestampGetUtc(), + lastActivity=self.services.utils.timestampGetUtc(), + currentRound=1, + currentTask=0, + currentAction=0, + totalTasks=0, + totalActions=0, + mandateId="test_mandate", + messageIds=[], + workflowMode="React", + maxSteps=5 + ) + + print("✅ AI Service initialized successfully") + print(f"📁 Results will be saved to: {self.modelTestDir}") + + async def testModel(self, modelName: str) -> Dict[str, Any]: + """Test a specific AI model with a simple prompt.""" + print(f"\n{'='*60}") + print(f"TESTING MODEL: {modelName}") + print(f"{'='*60}") + + # Choose test prompt based on model type - Web models get JSON formatted prompts + import json + + if "tavily" in modelName.lower(): + # Tavily models get web search prompt in JSON format (from methodAi.py) + testPrompt = json.dumps({ + "searchPrompt": "Search for recent news about artificial intelligence developments in 2024. Return the top 3 results as JSON with fields: title, url, snippet.", + "maxResults": 3, + "timeRange": "y", + "country": "United States", + "instructions": "Search the web and return a JSON response with a 'results' array containing objects with 'title', 'url', and optionally 'content' fields. Focus on finding relevant URLs for the search prompt." + }, indent=2) + elif "perplexity" in modelName.lower() or "llama" in modelName.lower() or "sonar" in modelName.lower() or "mistral" in modelName.lower(): + # Perplexity models get web research prompt in JSON format (from methodAi.py) + testPrompt = json.dumps({ + "researchPrompt": "Research the latest trends in renewable energy technology. Provide a comprehensive overview with key developments, companies involved, and future prospects. Return as JSON.", + "maxResults": 5, + "timeRange": "y", + "country": "United States", + "instructions": "Conduct comprehensive web research and return a JSON response with 'results' array containing objects with 'title', 'url', 'content', and 'analysis' fields. Provide detailed analysis and insights." + }, indent=2) + else: + # Fallback for other models + testPrompt = "Generate a comprehensive analysis of the current state of artificial intelligence. Return as JSON." + + print(f"Test prompt: {testPrompt}") + print(f"Prompt length: {len(testPrompt)} characters") + + startTime = asyncio.get_event_loop().time() + + try: + # Create options to force this specific model + if "internal" in modelName.lower(): + options = AiCallOptions( + operationType=OperationTypeEnum.DATA_EXTRACT, + preferredModel=modelName + ) + else: + options = AiCallOptions( + operationType=OperationTypeEnum.DATA_GENERATE, + preferredModel=modelName + ) + + # Call the AI service DIRECTLY through the model's functionCall + # This tests the actual model, not the document generation pipeline + # Get the model directly from the registry using the model registry + from modules.aicore.aicoreModelRegistry import modelRegistry + model = modelRegistry.getModel(modelName) + + if not model: + raise Exception(f"Model {modelName} not found") + + # Create AiModelCall and call the model's functionCall directly + from modules.datamodels.datamodelAi import AiModelCall + import base64 + import os + + # Prepare messages and options based on model type + if "vision" in modelName.lower(): + # For vision models, skip for now since they require special handling + print(f"⚠️ Skipping vision model {modelName} - requires special image handling") + return { + "modelName": modelName, + "status": "SKIPPED", + "processingTime": 0.0, + "responseLength": 0, + "responseType": "skipped", + "hasContent": False, + "error": "Vision model requires special image handling", + "fullResponse": "Skipped - vision model requires special image handling" + } + else: + # For other models, use normal functionCall + messages = [{"role": "user", "content": testPrompt}] + modelCall = AiModelCall( + messages=messages, + model=model, + options=options + ) + response = await model.functionCall(modelCall) + + endTime = asyncio.get_event_loop().time() + processingTime = endTime - startTime + + # Analyze response - now we get AiModelResponse objects + if hasattr(response, 'success'): + # AiModelResponse object + if response.success: + result = { + "modelName": modelName, + "status": "SUCCESS", + "processingTime": round(processingTime, 2), + "responseLength": len(response.content) if response.content else 0, + "responseType": "AiModelResponse", + "hasContent": bool(response.content), + "error": None, + "modelUsed": modelName, + "priceUsd": 0.0, # AiModelResponse doesn't have price info + "bytesSent": 0, + "bytesReceived": len(response.content.encode('utf-8')) if response.content else 0 + } + + # Try to parse content as JSON + if response.content: + try: + json.loads(response.content) + result["isValidJson"] = True + except: + result["isValidJson"] = False + + result["responsePreview"] = response.content[:200] + "..." if len(response.content) > 200 else response.content + result["fullResponse"] = response.content + else: + result["isValidJson"] = False + result["responsePreview"] = "Empty response" + result["fullResponse"] = "" + + print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s") + print(f"📄 Response length: {len(response.content) if response.content else 0} characters") + print(f"📄 Model used: {modelName}") + print(f"📄 Response preview: {result['responsePreview']}") + + else: + error = response.error or "Unknown error" + result = { + "modelName": modelName, + "status": "ERROR", + "processingTime": round(processingTime, 2), + "responseLength": 0, + "responseType": "AiModelResponse", + "hasContent": False, + "error": error, + "fullResponse": str(response) + } + + print(f"❌ ERROR - {error}") + + elif isinstance(response, dict): + # Fallback for dict responses + if response.get("success", True): + result = { + "modelName": modelName, + "status": "SUCCESS", + "processingTime": round(processingTime, 2), + "responseLength": len(str(response)), + "responseType": "dict", + "hasContent": True, + "error": None + } + + # Try to parse as JSON + try: + jsonResponse = json.dumps(response, indent=2) + result["responsePreview"] = jsonResponse[:200] + "..." if len(jsonResponse) > 200 else jsonResponse + result["isValidJson"] = True + result["fullResponse"] = jsonResponse + except: + result["responsePreview"] = str(response)[:200] + "..." if len(str(response)) > 200 else str(response) + result["isValidJson"] = False + result["fullResponse"] = str(response) + + print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s") + print(f"📄 Response length: {len(str(response))} characters") + print(f"📄 Response preview: {result['responsePreview']}") + + else: + error = response.get("error", "Unknown error") + result = { + "modelName": modelName, + "status": "ERROR", + "processingTime": round(processingTime, 2), + "responseLength": 0, + "responseType": "error", + "hasContent": False, + "error": error, + "fullResponse": str(response) + } + + print(f"❌ ERROR - {error}") + + else: + # String response + result = { + "modelName": modelName, + "status": "SUCCESS", + "processingTime": round(processingTime, 2), + "responseLength": len(str(response)), + "responseType": "string", + "hasContent": True, + "error": None + } + + # Try to parse as JSON + try: + json.loads(str(response)) + result["isValidJson"] = True + except: + result["isValidJson"] = False + + result["responsePreview"] = str(response)[:200] + "..." if len(str(response)) > 200 else str(response) + result["fullResponse"] = str(response) + + print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s") + print(f"📄 Response length: {len(str(response))} characters") + print(f"📄 Response preview: {result['responsePreview']}") + + # Save text response for all models + if result.get("status") == "SUCCESS": + self._saveTextResponse(modelName, result) + + except Exception as e: + endTime = asyncio.get_event_loop().time() + processingTime = endTime - startTime + + result = { + "modelName": modelName, + "status": "EXCEPTION", + "processingTime": round(processingTime, 2), + "responseLength": 0, + "responseType": "exception", + "hasContent": False, + "error": str(e) + } + + print(f"💥 EXCEPTION - {str(e)}") + + self.testResults.append(result) + + # Save individual model result immediately + self._saveIndividualModelResult(modelName, result) + + return result + + def _saveImageResponse(self, modelName: str, result: Dict[str, Any]): + """Save base64 image response to file.""" + try: + fullResponse = result.get("fullResponse", "") + base64Data = None + + # Try to extract base64 data from response + if isinstance(fullResponse, dict): + # Look for base64 data in the response + if "content" in fullResponse: + base64Data = fullResponse["content"] + elif "data" in fullResponse: + base64Data = fullResponse["data"] + elif "image" in fullResponse: + base64Data = fullResponse["image"] + else: + # Try to find base64 data in string response + import re + base64Match = re.search(r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)', str(fullResponse)) + if base64Match: + base64Data = base64Match.group(1) + else: + # Try to find pure base64 string + base64Match = re.search(r'([A-Za-z0-9+/=]{100,})', str(fullResponse)) + if base64Match: + base64Data = base64Match.group(1) + + if base64Data: + # Clean base64 data + if base64Data.startswith('data:image/'): + base64Data = base64Data.split(',', 1)[1] + + # Decode and save image + imageData = base64.b64decode(base64Data) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"{modelName}_{timestamp}.png" + filepath = os.path.join(self.modelTestDir, filename) + + with open(filepath, 'wb') as f: + f.write(imageData) + + result["savedImage"] = filepath + print(f"🖼️ Image saved: {filepath}") + else: + print(f"⚠️ No base64 image data found in response") + + except Exception as e: + print(f"❌ Error saving image: {str(e)}") + result["imageSaveError"] = str(e) + + def _saveTextResponse(self, modelName: str, result: Dict[str, Any]): + """Save text response to file.""" + try: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"{modelName}_{timestamp}.txt" + filepath = os.path.join(self.modelTestDir, filename) + + # Prepare content for saving + content = result.get("fullResponse", "") + if not content: + content = result.get("responsePreview", "No content available") + + # Add metadata header + metadata = f"""Model: {modelName} +Test Time: {timestamp} +Status: {result.get('status', 'Unknown')} +Processing Time: {result.get('processingTime', 0):.2f}s +Response Length: {result.get('responseLength', 0)} characters +Is Valid JSON: {result.get('isValidJson', False)} + +--- RESPONSE CONTENT --- +{content} +""" + + with open(filepath, 'w', encoding='utf-8') as f: + f.write(metadata) + + result["savedTextFile"] = filepath + print(f"📄 Text response saved: {filepath}") + + except Exception as e: + print(f"❌ Error saving text response: {str(e)}") + result["textSaveError"] = str(e) + + def _saveIndividualModelResult(self, modelName: str, result: Dict[str, Any]): + """Save individual model test result to file.""" + try: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"{modelName}_{timestamp}.json" + filepath = os.path.join(self.modelTestDir, filename) + + # Prepare individual result data + individualData = { + "modelName": modelName, + "testTimestamp": timestamp, + "testDate": datetime.now().isoformat(), + "result": result + } + + # Save to JSON file + with open(filepath, 'w', encoding='utf-8') as f: + json.dump(individualData, f, indent=2, ensure_ascii=False) + + print(f"📄 Individual result saved: {filename}") + + except Exception as e: + print(f"❌ Error saving individual result: {str(e)}") + + def getAllAvailableModels(self) -> List[str]: + """Get all available model names.""" + # Hardcoded list of known models - same approach as test_ai_behavior.py + return [ + # "claude-3-5-sonnet-20241022", # Skipped - text model, test later + # "claude-3-5-sonnet-20241022-vision", # Skipped - requires image input + # "gpt-4o", # Skipped - text model, test later + # "gpt-3.5-turbo", # Skipped - text model, test later + # "gpt-4o-vision", # Skipped - requires image input + # "dall-e-3", # Skipped - image generation, test later + "sonar", # Perplexity web model + "sonar-pro", # Perplexity web model + "tavily-search", # Tavily web model + "tavily-extract", # Tavily web model + "tavily-search-extract", # Tavily web model + # "internal-extractor", # Skipped - internal model, test later + # "internal-generator", # Skipped - internal model, test later + # "internal-renderer" # Skipped - internal model, test later + ] + + def saveTestResults(self): + """Save detailed test results to file.""" + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + resultsFile = os.path.join(self.modelTestDir, f"modeltest_results_{timestamp}.json") + + # Prepare results for saving + saveData = { + "testTimestamp": timestamp, + "testDate": datetime.now().isoformat(), + "totalModels": len(self.testResults), + "successfulModels": len([r for r in self.testResults if r["status"] == "SUCCESS"]), + "errorModels": len([r for r in self.testResults if r["status"] == "ERROR"]), + "exceptionModels": len([r for r in self.testResults if r["status"] == "EXCEPTION"]), + "results": self.testResults + } + + # Calculate success rate + if saveData["totalModels"] > 0: + saveData["successRate"] = (saveData["successfulModels"] / saveData["totalModels"]) * 100 + else: + saveData["successRate"] = 0 + + # Save to JSON file + with open(resultsFile, 'w', encoding='utf-8') as f: + json.dump(saveData, f, indent=2, ensure_ascii=False) + + print(f"📄 Detailed results saved: {resultsFile}") + return resultsFile + + def printTestSummary(self): + """Print a summary of all test results.""" + print(f"\n{'='*80}") + print("AI MODELS TEST SUMMARY") + print(f"{'='*80}") + + totalModels = len(self.testResults) + successfulModels = len([r for r in self.testResults if r["status"] == "SUCCESS"]) + errorModels = len([r for r in self.testResults if r["status"] == "ERROR"]) + exceptionModels = len([r for r in self.testResults if r["status"] == "EXCEPTION"]) + + print(f"📊 Total models tested: {totalModels}") + print(f"✅ Successful: {successfulModels}") + print(f"❌ Errors: {errorModels}") + print(f"💥 Exceptions: {exceptionModels}") + print(f"📈 Success rate: {(successfulModels/totalModels*100):.1f}%" if totalModels > 0 else "0%") + + print(f"\n{'='*80}") + print("DETAILED RESULTS") + print(f"{'='*80}") + + for result in self.testResults: + status_icon = { + "SUCCESS": "✅", + "ERROR": "❌", + "EXCEPTION": "💥" + }.get(result["status"], "❓") + + print(f"\n{status_icon} {result['modelName']}") + print(f" Status: {result['status']}") + print(f" Processing time: {result['processingTime']}s") + print(f" Response length: {result['responseLength']} characters") + print(f" Response type: {result['responseType']}") + + if result.get("isValidJson") is not None: + print(f" Valid JSON: {'Yes' if result['isValidJson'] else 'No'}") + + if result["error"]: + print(f" Error: {result['error']}") + + if result.get("responsePreview"): + print(f" Preview: {result['responsePreview']}") + + # Find fastest and slowest models + if successfulModels > 0: + successfulResults = [r for r in self.testResults if r["status"] == "SUCCESS"] + fastest = min(successfulResults, key=lambda x: x["processingTime"]) + slowest = max(successfulResults, key=lambda x: x["processingTime"]) + + print(f"\n{'='*80}") + print("PERFORMANCE HIGHLIGHTS") + print(f"{'='*80}") + print(f"🚀 Fastest model: {fastest['modelName']} ({fastest['processingTime']}s)") + print(f"🐌 Slowest model: {slowest['modelName']} ({slowest['processingTime']}s)") + +async def main(): + """Run AI models testing.""" + tester = AIModelsTester() + + print("Starting AI Models Testing...") + print("Initializing AI service...") + await tester.initialize() + + # Get all available models + models = tester.getAllAvailableModels() + + print(f"\nFound {len(models)} models to test:") + for i, model in enumerate(models, 1): + print(f" {i}. {model}") + + print(f"\n{'='*80}") + print("STARTING INDIVIDUAL MODEL TESTS") + print(f"{'='*80}") + print("Press Enter after each model test to continue to the next one...") + + # Test each model individually + for i, modelName in enumerate(models, 1): + print(f"\n[{i}/{len(models)}] Testing model: {modelName}") + + # Test the model + await tester.testModel(modelName) + + # Pause for user input (except for the last model) + if i < len(models): + input(f"\nPress Enter to continue to the next model...") + + # Save detailed results to file + resultsFile = tester.saveTestResults() + + # Print final summary + tester.printTestSummary() + + print(f"\n{'='*80}") + print("TESTING COMPLETED") + print(f"{'='*80}") + print(f"📄 Results saved to: {resultsFile}") + print(f"📁 Images saved to: {tester.modelTestDir}") + +if __name__ == "__main__": + asyncio.run(main())