gateway/test_ai_models.py

#!/usr/bin/env python3
"""
AI Models Test - Tests all available AI models individually
"""

import asyncio
import json
import sys
import os
import base64
from datetime import datetime
from typing import Dict, Any, List

# Add the gateway to path
sys.path.append(os.path.dirname(__file__))

# Import the service initialization
from modules.features.chatPlayground.mainChatPlayground import getServices
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
from modules.datamodels.datamodelUam import User

class AIModelsTester:
    def __init__(self):
        # Create a minimal user context for testing
        testUser = User(
            id="test_user",
            username="test_user",
            email="test@example.com",
            fullName="Test User",
            language="en",
            mandateId="test_mandate"
        )

        # Initialize services using the existing system
        self.services = getServices(testUser, None)  # Test user, no workflow
        self.testResults = []

        # Create logs directory if it doesn't exist
        self.logsDir = os.path.join(os.path.dirname(__file__), "..", "local", "logs")
        os.makedirs(self.logsDir, exist_ok=True)

        # Create modeltest subdirectory
        self.modelTestDir = os.path.join(self.logsDir, "modeltest")
        os.makedirs(self.modelTestDir, exist_ok=True)

        # Copy test image to modeltest directory if it exists
        testImageSource = os.path.join(self.logsDir, "_testdata_photo_2025-06-03_13-05-52.jpg")
        testImageDest = os.path.join(self.modelTestDir, "_testdata_photo_2025-06-03_13-05-52.jpg")
        if os.path.exists(testImageSource) and not os.path.exists(testImageDest):
            import shutil
            shutil.copy2(testImageSource, testImageDest)
            print(f"📷 Test image copied to: {testImageDest}")

    async def initialize(self):
        """Initialize the AI service."""
        # Set logging level to INFO to reduce noise
        import logging
        logging.getLogger().setLevel(logging.INFO)

        # The AI service needs to be recreated with proper initialization
        from modules.services.serviceAi.mainServiceAi import AiService
        self.services.ai = await AiService.create(self.services)

        # Create a minimal workflow context
        from modules.datamodels.datamodelChat import ChatWorkflow
        import uuid

        self.services.currentWorkflow = ChatWorkflow(
            id=str(uuid.uuid4()),
            name="Test Workflow",
            status="running",
            startedAt=self.services.utils.timestampGetUtc(),
            lastActivity=self.services.utils.timestampGetUtc(),
            currentRound=1,
            currentTask=0,
            currentAction=0,
            totalTasks=0,
            totalActions=0,
            mandateId="test_mandate",
            messageIds=[],
            workflowMode="React",
            maxSteps=5
        )

        print("✅ AI Service initialized successfully")
        print(f"📁 Results will be saved to: {self.modelTestDir}")

    async def testModel(self, modelName: str) -> Dict[str, Any]:
        """Test a specific AI model with a simple prompt."""
        print(f"\n{'='*60}")
        print(f"TESTING MODEL: {modelName}")
        print(f"{'='*60}")

        # Use same prompt for all web models
        import json

        if "tavily" in modelName.lower() or "perplexity" in modelName.lower() or "llama" in modelName.lower() or "sonar" in modelName.lower() or "mistral" in modelName.lower():
            # All web models use the same JSON formatted prompt
            # Country format: Use full name for Tavily (Switzerland), Perplexity converts ISO codes to names
            testPrompt = json.dumps({
                "prompt": "Research, what ValueOn company in switzerland does and who works there? Return as JSON.",
                "maxResults": 5,
                "timeRange": "y",
                "country": "CH",  # ISO-2 code, Perplexity will convert to "Switzerland"
                "format": "json"
            }, indent=2)
        else:
            # Fallback for other models
            testPrompt = "Generate a comprehensive analysis of the current state of artificial intelligence. Return as JSON."

        print(f"Test prompt: {testPrompt}")
        print(f"Prompt length: {len(testPrompt)} characters")

        startTime = asyncio.get_event_loop().time()

        try:
            # Create options to force this specific model
            if "internal" in modelName.lower():
                options = AiCallOptions(
                    operationType=OperationTypeEnum.DATA_EXTRACT,
                    preferredModel=modelName
                )
            else:
                options = AiCallOptions(
                    operationType=OperationTypeEnum.DATA_GENERATE,
                    preferredModel=modelName
                )

            # Call the AI service DIRECTLY through the model's functionCall
            # This tests the actual model, not the document generation pipeline
            # Get the model directly from the registry using the model registry
            from modules.aicore.aicoreModelRegistry import modelRegistry
            model = modelRegistry.getModel(modelName)

            if not model:
                raise Exception(f"Model {modelName} not found")

            # Create AiModelCall and call the model's functionCall directly
            from modules.datamodels.datamodelAi import AiModelCall
            import base64
            import os

            # Prepare messages and options based on model type
            if "vision" in modelName.lower():
                # For vision models, skip for now since they require special handling
                print(f"⚠️  Skipping vision model {modelName} - requires special image handling")
                return {
                    "modelName": modelName,
                    "status": "SKIPPED",
                    "processingTime": 0.0,
                    "responseLength": 0,
                    "responseType": "skipped",
                    "hasContent": False,
                    "error": "Vision model requires special image handling",
                    "fullResponse": "Skipped - vision model requires special image handling"
                }
            else:
                # For other models, use normal functionCall
                messages = [{"role": "user", "content": testPrompt}]
                modelCall = AiModelCall(
                    messages=messages,
                    model=model,
                    options=options
                )
                response = await model.functionCall(modelCall)

            endTime = asyncio.get_event_loop().time()
            processingTime = endTime - startTime

            # Analyze response - now we get AiModelResponse objects
            if hasattr(response, 'success'):
                # AiModelResponse object
                if response.success:
                    result = {
                        "modelName": modelName,
                        "status": "SUCCESS",
                        "processingTime": round(processingTime, 2),
                        "responseLength": len(response.content) if response.content else 0,
                        "responseType": "AiModelResponse",
                        "hasContent": bool(response.content),
                        "error": None,
                        "modelUsed": modelName,
                        "priceUsd": 0.0,  # AiModelResponse doesn't have price info
                        "bytesSent": 0,
                        "bytesReceived": len(response.content.encode('utf-8')) if response.content else 0
                    }

                    # Try to parse content as JSON
                    if response.content:
                        try:
                            json.loads(response.content)
                            result["isValidJson"] = True
                        except:
                            result["isValidJson"] = False

                        result["responsePreview"] = response.content[:200] + "..." if len(response.content) > 200 else response.content
                        result["fullResponse"] = response.content
                    else:
                        result["isValidJson"] = False
                        result["responsePreview"] = "Empty response"
                        result["fullResponse"] = ""

                    print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
                    print(f"📄 Response length: {len(response.content) if response.content else 0} characters")
                    print(f"📄 Model used: {modelName}")
                    print(f"📄 Response preview: {result['responsePreview']}")

                else:
                    error = response.error or "Unknown error"
                    result = {
                        "modelName": modelName,
                        "status": "ERROR",
                        "processingTime": round(processingTime, 2),
                        "responseLength": 0,
                        "responseType": "AiModelResponse",
                        "hasContent": False,
                        "error": error,
                        "fullResponse": str(response)
                    }

                    print(f"❌ ERROR - {error}")

            elif isinstance(response, dict):
                # Fallback for dict responses
                if response.get("success", True):
                    result = {
                        "modelName": modelName,
                        "status": "SUCCESS",
                        "processingTime": round(processingTime, 2),
                        "responseLength": len(str(response)),
                        "responseType": "dict",
                        "hasContent": True,
                        "error": None
                    }

                    # Try to parse as JSON
                    try:
                        jsonResponse = json.dumps(response, indent=2)
                        result["responsePreview"] = jsonResponse[:200] + "..." if len(jsonResponse) > 200 else jsonResponse
                        result["isValidJson"] = True
                        result["fullResponse"] = jsonResponse
                    except:
                        result["responsePreview"] = str(response)[:200] + "..." if len(str(response)) > 200 else str(response)
                        result["isValidJson"] = False
                        result["fullResponse"] = str(response)

                    print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
                    print(f"📄 Response length: {len(str(response))} characters")
                    print(f"📄 Response preview: {result['responsePreview']}")

                else:
                    error = response.get("error", "Unknown error")
                    result = {
                        "modelName": modelName,
                        "status": "ERROR",
                        "processingTime": round(processingTime, 2),
                        "responseLength": 0,
                        "responseType": "error",
                        "hasContent": False,
                        "error": error,
                        "fullResponse": str(response)
                    }

                    print(f"❌ ERROR - {error}")

            else:
                # String response
                result = {
                    "modelName": modelName,
                    "status": "SUCCESS",
                    "processingTime": round(processingTime, 2),
                    "responseLength": len(str(response)),
                    "responseType": "string",
                    "hasContent": True,
                    "error": None
                }

                # Try to parse as JSON
                try:
                    json.loads(str(response))
                    result["isValidJson"] = True
                except:
                    result["isValidJson"] = False

                result["responsePreview"] = str(response)[:200] + "..." if len(str(response)) > 200 else str(response)
                result["fullResponse"] = str(response)

                print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
                print(f"📄 Response length: {len(str(response))} characters")
                print(f"📄 Response preview: {result['responsePreview']}")

            # Save text response for all models
            if result.get("status") == "SUCCESS":
                self._saveTextResponse(modelName, result)

        except Exception as e:
            endTime = asyncio.get_event_loop().time()
            processingTime = endTime - startTime

            result = {
                "modelName": modelName,
                "status": "EXCEPTION",
                "processingTime": round(processingTime, 2),
                "responseLength": 0,
                "responseType": "exception",
                "hasContent": False,
                "error": str(e)
            }

            print(f"💥 EXCEPTION - {str(e)}")

        self.testResults.append(result)

        # Save individual model result immediately
        self._saveIndividualModelResult(modelName, result)

        return result

    def _saveImageResponse(self, modelName: str, result: Dict[str, Any]):
        """Save base64 image response to file."""
        try:
            fullResponse = result.get("fullResponse", "")
            base64Data = None

            # Try to extract base64 data from response
            if isinstance(fullResponse, dict):
                # Look for base64 data in the response
                if "content" in fullResponse:
                    base64Data = fullResponse["content"]
                elif "data" in fullResponse:
                    base64Data = fullResponse["data"]
                elif "image" in fullResponse:
                    base64Data = fullResponse["image"]
            else:
                # Try to find base64 data in string response
                import re
                base64Match = re.search(r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)', str(fullResponse))
                if base64Match:
                    base64Data = base64Match.group(1)
                else:
                    # Try to find pure base64 string
                    base64Match = re.search(r'([A-Za-z0-9+/=]{100,})', str(fullResponse))
                    if base64Match:
                        base64Data = base64Match.group(1)

            if base64Data:
                # Clean base64 data
                if base64Data.startswith('data:image/'):
                    base64Data = base64Data.split(',', 1)[1]

                # Decode and save image
                imageData = base64.b64decode(base64Data)
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                filename = f"{modelName}_{timestamp}.png"
                filepath = os.path.join(self.modelTestDir, filename)

                with open(filepath, 'wb') as f:
                    f.write(imageData)

                result["savedImage"] = filepath
                print(f"🖼️  Image saved: {filepath}")
            else:
                print(f"⚠️  No base64 image data found in response")

        except Exception as e:
            print(f"❌ Error saving image: {str(e)}")
            result["imageSaveError"] = str(e)

    def _saveTextResponse(self, modelName: str, result: Dict[str, Any]):
        """Save text response to file."""
        try:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"{modelName}_{timestamp}.txt"
            filepath = os.path.join(self.modelTestDir, filename)

            # Prepare content for saving
            content = result.get("fullResponse", "")
            if not content:
                content = result.get("responsePreview", "No content available")

            # Add metadata header
            metadata = f"""Model: {modelName}
Test Time: {timestamp}
Status: {result.get('status', 'Unknown')}
Processing Time: {result.get('processingTime', 0):.2f}s
Response Length: {result.get('responseLength', 0)} characters
Is Valid JSON: {result.get('isValidJson', False)}

--- RESPONSE CONTENT ---
{content}
"""

            with open(filepath, 'w', encoding='utf-8') as f:
                f.write(metadata)

            result["savedTextFile"] = filepath
            print(f"📄 Text response saved: {filepath}")

        except Exception as e:
            print(f"❌ Error saving text response: {str(e)}")
            result["textSaveError"] = str(e)

    def _saveIndividualModelResult(self, modelName: str, result: Dict[str, Any]):
        """Save individual model test result to file."""
        try:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"{modelName}_{timestamp}.json"
            filepath = os.path.join(self.modelTestDir, filename)

            # Prepare individual result data
            individualData = {
                "modelName": modelName,
                "testTimestamp": timestamp,
                "testDate": datetime.now().isoformat(),
                "result": result
            }

            # Save to JSON file
            with open(filepath, 'w', encoding='utf-8') as f:
                json.dump(individualData, f, indent=2, ensure_ascii=False)

            print(f"📄 Individual result saved: {filename}")

        except Exception as e:
            print(f"❌ Error saving individual result: {str(e)}")

    def getAllAvailableModels(self) -> List[str]:
        """Get all available model names."""
        # Hardcoded list of known models - same approach as test_ai_behavior.py
        return [
            # "claude-3-5-sonnet-20241022",  # Skipped - text model, test later
            # "claude-3-5-sonnet-20241022-vision",  # Skipped - requires image input
            # "gpt-4o",  # Skipped - text model, test later
            # "gpt-3.5-turbo",  # Skipped - text model, test later
            # "gpt-4o-vision",  # Skipped - requires image input
            # "dall-e-3",  # Skipped - image generation, test later
            "sonar",  # Perplexity web model
            "sonar-pro",  # Perplexity web model
            "tavily-search",  # Tavily web model (unified research)
            # "internal-extractor",  # Skipped - internal model, test later
            # "internal-generator",  # Skipped - internal model, test later
            # "internal-renderer"  # Skipped - internal model, test later
        ]

    def saveTestResults(self):
        """Save detailed test results to file."""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        resultsFile = os.path.join(self.modelTestDir, f"modeltest_results_{timestamp}.json")

        # Prepare results for saving
        saveData = {
            "testTimestamp": timestamp,
            "testDate": datetime.now().isoformat(),
            "totalModels": len(self.testResults),
            "successfulModels": len([r for r in self.testResults if r["status"] == "SUCCESS"]),
            "errorModels": len([r for r in self.testResults if r["status"] == "ERROR"]),
            "exceptionModels": len([r for r in self.testResults if r["status"] == "EXCEPTION"]),
            "results": self.testResults
        }

        # Calculate success rate
        if saveData["totalModels"] > 0:
            saveData["successRate"] = (saveData["successfulModels"] / saveData["totalModels"]) * 100
        else:
            saveData["successRate"] = 0

        # Save to JSON file
        with open(resultsFile, 'w', encoding='utf-8') as f:
            json.dump(saveData, f, indent=2, ensure_ascii=False)

        print(f"📄 Detailed results saved: {resultsFile}")
        return resultsFile

    def printTestSummary(self):
        """Print a summary of all test results."""
        print(f"\n{'='*80}")
        print("AI MODELS TEST SUMMARY")
        print(f"{'='*80}")

        totalModels = len(self.testResults)
        successfulModels = len([r for r in self.testResults if r["status"] == "SUCCESS"])
        errorModels = len([r for r in self.testResults if r["status"] == "ERROR"])
        exceptionModels = len([r for r in self.testResults if r["status"] == "EXCEPTION"])

        print(f"📊 Total models tested: {totalModels}")
        print(f"✅ Successful: {successfulModels}")
        print(f"❌ Errors: {errorModels}")
        print(f"💥 Exceptions: {exceptionModels}")
        print(f"📈 Success rate: {(successfulModels/totalModels*100):.1f}%" if totalModels > 0 else "0%")

        print(f"\n{'='*80}")
        print("DETAILED RESULTS")
        print(f"{'='*80}")

        for result in self.testResults:
            status_icon = {
                "SUCCESS": "✅",
                "ERROR": "❌",
                "EXCEPTION": "💥"
            }.get(result["status"], "❓")

            print(f"\n{status_icon} {result['modelName']}")
            print(f"   Status: {result['status']}")
            print(f"   Processing time: {result['processingTime']}s")
            print(f"   Response length: {result['responseLength']} characters")
            print(f"   Response type: {result['responseType']}")

            if result.get("isValidJson") is not None:
                print(f"   Valid JSON: {'Yes' if result['isValidJson'] else 'No'}")

            if result["error"]:
                print(f"   Error: {result['error']}")

            if result.get("responsePreview"):
                print(f"   Preview: {result['responsePreview']}")

        # Find fastest and slowest models
        if successfulModels > 0:
            successfulResults = [r for r in self.testResults if r["status"] == "SUCCESS"]
            fastest = min(successfulResults, key=lambda x: x["processingTime"])
            slowest = max(successfulResults, key=lambda x: x["processingTime"])

            print(f"\n{'='*80}")
            print("PERFORMANCE HIGHLIGHTS")
            print(f"{'='*80}")
            print(f"🚀 Fastest model: {fastest['modelName']} ({fastest['processingTime']}s)")
            print(f"🐌 Slowest model: {slowest['modelName']} ({slowest['processingTime']}s)")

async def main():
    """Run AI models testing."""
    tester = AIModelsTester()

    print("Starting AI Models Testing...")
    print("Initializing AI service...")
    await tester.initialize()

    # Get all available models
    models = tester.getAllAvailableModels()

    print(f"\nFound {len(models)} models to test:")
    for i, model in enumerate(models, 1):
        print(f"  {i}. {model}")

    print(f"\n{'='*80}")
    print("STARTING INDIVIDUAL MODEL TESTS")
    print(f"{'='*80}")
    print("Press Enter after each model test to continue to the next one...")

    # Test each model individually
    for i, modelName in enumerate(models, 1):
        print(f"\n[{i}/{len(models)}] Testing model: {modelName}")

        # Test the model
        await tester.testModel(modelName)

        # Pause for user input (except for the last model)
        if i < len(models):
            input(f"\nPress Enter to continue to the next model...")

    # Save detailed results to file
    resultsFile = tester.saveTestResults()

    # Print final summary
    tester.printTestSummary()

    print(f"\n{'='*80}")
    print("TESTING COMPLETED")
    print(f"{'='*80}")
    print(f"📄 Results saved to: {resultsFile}")
    print(f"📁 Images saved to: {tester.modelTestDir}")

if __name__ == "__main__":
    asyncio.run(main())