tuning and aligning ai models

2025-10-25 22:27:27 +02:00 · 2025-10-25 22:27:27 +02:00 · e8c3052176
commit e8c3052176
parent 8d25ed6fc3
9 changed files with 711 additions and 155 deletions
--- a/modules/aicore/aicorePluginAnthropic.py
+++ b/modules/aicore/aicorePluginAnthropic.py
@ -112,7 +112,9 @@ class AiAnthropic(BaseConnectorAi):
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
-            temperature = options.get("temperature", model.temperature)
+            temperature = getattr(options, "temperature", None)
+            if temperature is None:
+                temperature = model.temperature
            maxTokens = model.maxTokens
            
            # Transform OpenAI-style messages to Anthropic format:
@ -237,8 +239,8 @@ class AiAnthropic(BaseConnectorAi):
            model = modelCall.model
            options = modelCall.options
            prompt = messages[0]["content"] if messages else ""
-            imageData = options.get("imageData")
-            mimeType = options.get("mimeType")
+            imageData = getattr(options, "imageData", None)
+            mimeType = getattr(options, "mimeType", None)
            
            # Debug logging
            logger.info(f"callAiImage called with imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}")
--- a/modules/aicore/aicorePluginOpenai.py
+++ b/modules/aicore/aicorePluginOpenai.py
@ -51,7 +51,7 @@ class AiOpenai(BaseConnectorAi):
                connectorType="openai",
                apiUrl="https://api.openai.com/v1/chat/completions",
                temperature=0.2,
-                maxTokens=128000,
+                maxTokens=16384,
                contextLength=128000,
                costPer1kTokensInput=0.03,
                costPer1kTokensOutput=0.06,
@ -76,7 +76,7 @@ class AiOpenai(BaseConnectorAi):
                connectorType="openai",
                apiUrl="https://api.openai.com/v1/chat/completions",
                temperature=0.2,
-                maxTokens=16000,
+                maxTokens=4096,
                contextLength=16000,
                costPer1kTokensInput=0.0015,
                costPer1kTokensOutput=0.002,
@ -100,7 +100,7 @@ class AiOpenai(BaseConnectorAi):
                connectorType="openai",
                apiUrl="https://api.openai.com/v1/chat/completions",
                temperature=0.2,
-                maxTokens=128000,
+                maxTokens=16384,
                contextLength=128000,
                costPer1kTokensInput=0.03,
                costPer1kTokensOutput=0.06,
@ -158,7 +158,9 @@ class AiOpenai(BaseConnectorAi):
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
-            temperature = options.get("temperature", model.temperature)
+            temperature = getattr(options, "temperature", None)
+            if temperature is None:
+                temperature = model.temperature
            maxTokens = model.maxTokens
            
            payload = {
@ -226,8 +228,8 @@ class AiOpenai(BaseConnectorAi):
            model = modelCall.model
            options = modelCall.options
            prompt = messages[0]["content"] if messages else ""
-            imageData = options.get("imageData")
-            mimeType = options.get("mimeType", "image/jpeg")
+            imageData = getattr(options, "imageData", None)
+            mimeType = getattr(options, "mimeType", "image/jpeg")
            
            logger.debug(f"Starting image analysis with query '{prompt}' for size {len(imageData)}B...")
            
@ -261,10 +263,6 @@ class AiOpenai(BaseConnectorAi):
                }
            ]
            
-            # Use a vision-capable model for image analysis
-            # Override the model for vision tasks
-            visionModel = "gpt-4o"  # or "gpt-4-vision-preview" depending on availability
-            
            # Use parameters from model
            temperature = model.temperature
            # Don't set maxTokens - let the model use its full context length
--- a/modules/aicore/aicorePluginPerplexity.py
+++ b/modules/aicore/aicorePluginPerplexity.py
@ -44,27 +44,29 @@ class AiPerplexity(BaseConnectorAi):
        """Get all available Perplexity models."""
        return [
            AiModel(
-                name="llama-3.1-sonar-large-128k-online",
-                displayName="Perplexity Llama 3.1 Sonar Large 128k",
+                name="sonar",
+                displayName="Perplexity Sonar",
                connectorType="perplexity",
                apiUrl="https://api.perplexity.ai/chat/completions",
                temperature=0.2,
-                maxTokens=128000,
-                contextLength=128000,
+                maxTokens=4000,
+                contextLength=32000,
                costPer1kTokensInput=0.005,
                costPer1kTokensOutput=0.005,
                speedRating=8,
                qualityRating=8,
                # capabilities removed (not used in business logic)
-                functionCall=self.callAiBasic,
+                functionCall=self.callWebOperation,
                priority=PriorityEnum.BALANCED,
                processingMode=ProcessingModeEnum.ADVANCED,
                operationTypes=createOperationTypeRatings(
-                    (OperationTypeEnum.PLAN, 7),
-                    (OperationTypeEnum.DATA_ANALYSE, 8),
-                    (OperationTypeEnum.DATA_GENERATE, 7)
+                    (OperationTypeEnum.WEB_RESEARCH, 8),
+                    (OperationTypeEnum.WEB_SEARCH, 9),
+                    (OperationTypeEnum.WEB_CRAWL, 7),
+                    (OperationTypeEnum.WEB_NEWS, 8),
+                    (OperationTypeEnum.WEB_QUESTIONS, 9)
                ),
-                version="llama-3.1-sonar-large-128k-online",
+                version="sonar",
                calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.005
            ),
            AiModel(
@ -73,8 +75,8 @@ class AiPerplexity(BaseConnectorAi):
                connectorType="perplexity",
                apiUrl="https://api.perplexity.ai/chat/completions",
                temperature=0.2,
-                maxTokens=128000,
-                contextLength=128000,
+                maxTokens=4000,
+                contextLength=32000,
                costPer1kTokensInput=0.01,
                costPer1kTokensOutput=0.01,
                speedRating=6,  # Slower due to AI analysis
@ -92,84 +94,6 @@ class AiPerplexity(BaseConnectorAi):
                ),
                version="sonar-pro",
                calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.01 + (bytesReceived / 4 / 1000) * 0.01
-            ),
-            AiModel(
-                name="mistral-7b-instruct",
-                displayName="Perplexity Mistral 7B Instruct",
-                connectorType="perplexity",
-                apiUrl="https://api.perplexity.ai/chat/completions",
-                temperature=0.2,
-                maxTokens=32000,
-                contextLength=32000,
-                costPer1kTokensInput=0.002,
-                costPer1kTokensOutput=0.002,
-                speedRating=9,  # Fast for basic AI tasks
-                qualityRating=7,  # Good but not premium quality
-                # capabilities removed (not used in business logic)
-                functionCall=self.callWebOperation,
-                priority=PriorityEnum.COST,
-                processingMode=ProcessingModeEnum.BASIC,
-                operationTypes=createOperationTypeRatings(
-                    (OperationTypeEnum.WEB_RESEARCH, 7),
-                    (OperationTypeEnum.WEB_SEARCH, 6),
-                    (OperationTypeEnum.WEB_CRAWL, 5),
-                    (OperationTypeEnum.WEB_NEWS, 5),
-                    (OperationTypeEnum.WEB_QUESTIONS, 6)
-                ),
-                version="mistral-7b-instruct",
-                calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.002 + (bytesReceived / 4 / 1000) * 0.002
-            ),
-            AiModel(
-                name="mistral-7b-instruct-qa",
-                displayName="Perplexity Mistral 7B Instruct QA",
-                connectorType="perplexity",
-                apiUrl="https://api.perplexity.ai/chat/completions",
-                temperature=0.2,
-                maxTokens=32000,
-                contextLength=32000,
-                costPer1kTokensInput=0.002,
-                costPer1kTokensOutput=0.002,
-                speedRating=9,  # Fast for Q&A tasks
-                qualityRating=7,  # Good but not premium quality
-                # capabilities removed (not used in business logic)
-                functionCall=self.callWebOperation,
-                priority=PriorityEnum.COST,
-                processingMode=ProcessingModeEnum.BASIC,
-                operationTypes=createOperationTypeRatings(
-                    (OperationTypeEnum.WEB_RESEARCH, 6),
-                    (OperationTypeEnum.WEB_SEARCH, 5),
-                    (OperationTypeEnum.WEB_CRAWL, 4),
-                    (OperationTypeEnum.WEB_NEWS, 4),
-                    (OperationTypeEnum.WEB_QUESTIONS, 10)
-                ),
-                version="mistral-7b-instruct",
-                calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.002 + (bytesReceived / 4 / 1000) * 0.002
-            ),
-            AiModel(
-                name="mistral-7b-instruct-news",
-                displayName="Perplexity Mistral 7B Instruct News",
-                connectorType="perplexity",
-                apiUrl="https://api.perplexity.ai/chat/completions",
-                temperature=0.2,
-                maxTokens=32000,
-                contextLength=32000,
-                costPer1kTokensInput=0.002,
-                costPer1kTokensOutput=0.002,
-                speedRating=9,  # Fast for news tasks
-                qualityRating=7,  # Good but not premium quality
-                # capabilities removed (not used in business logic)
-                functionCall=self.callWebOperation,
-                priority=PriorityEnum.COST,
-                processingMode=ProcessingModeEnum.BASIC,
-                operationTypes=createOperationTypeRatings(
-                    (OperationTypeEnum.WEB_RESEARCH, 6),
-                    (OperationTypeEnum.WEB_SEARCH, 5),
-                    (OperationTypeEnum.WEB_CRAWL, 4),
-                    (OperationTypeEnum.WEB_NEWS, 10),
-                    (OperationTypeEnum.WEB_QUESTIONS, 4)
-                ),
-                version="mistral-7b-instruct",
-                calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.002 + (bytesReceived / 4 / 1000) * 0.002
            )
        ]
    
@ -191,7 +115,9 @@ class AiPerplexity(BaseConnectorAi):
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
-            temperature = options.get("temperature", model.temperature)
+            temperature = getattr(options, "temperature", None)
+            if temperature is None:
+                temperature = model.temperature
            maxTokens = model.maxTokens
            
            payload = {
@ -251,7 +177,9 @@ class AiPerplexity(BaseConnectorAi):
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
-            temperature = options.get("temperature", model.temperature)
+            temperature = getattr(options, "temperature", None)
+            if temperature is None:
+                temperature = model.temperature
            maxTokens = model.maxTokens
            
            # Parse unified prompt JSON format
@ -349,7 +277,9 @@ Include actual URLs in your response."""
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
-            temperature = options.get("temperature", model.temperature)
+            temperature = getattr(options, "temperature", None)
+            if temperature is None:
+                temperature = model.temperature
            maxTokens = model.maxTokens
            
            payload = {
@ -408,7 +338,9 @@ Include actual URLs in your response."""
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
-            temperature = options.get("temperature", model.temperature)
+            temperature = getattr(options, "temperature", None)
+            if temperature is None:
+                temperature = model.temperature
            maxTokens = model.maxTokens
            
            payload = {
@ -467,7 +399,9 @@ Include actual URLs in your response."""
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
-            temperature = options.get("temperature", model.temperature)
+            temperature = getattr(options, "temperature", None)
+            if temperature is None:
+                temperature = model.temperature
            maxTokens = model.maxTokens
            
            payload = {
@ -526,7 +460,9 @@ Include actual URLs in your response."""
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
-            temperature = options.get("temperature", model.temperature)
+            temperature = getattr(options, "temperature", None)
+            if temperature is None:
+                temperature = model.temperature
            maxTokens = model.maxTokens
            
            # Parse unified prompt JSON format
@ -623,17 +559,17 @@ Extract content from each URL and provide detailed analysis."""
        """
        try:
            options = modelCall.options
-            operationType = options.get("operationType")
+            operationType = getattr(options, "operationType", None)
            
-            if operationType == "WEB_SEARCH":
+            if operationType == OperationTypeEnum.WEB_SEARCH:
                return await self.callAiWithWebSearch(modelCall)
-            elif operationType == "WEB_CRAWL":
+            elif operationType == OperationTypeEnum.WEB_CRAWL:
                return await self.crawl(modelCall)
-            elif operationType == "WEB_RESEARCH":
+            elif operationType == OperationTypeEnum.WEB_RESEARCH:
                return await self.research(modelCall)
-            elif operationType == "WEB_QUESTIONS":
+            elif operationType == OperationTypeEnum.WEB_QUESTIONS:
                return await self.questions(modelCall)
-            elif operationType == "WEB_NEWS":
+            elif operationType == OperationTypeEnum.WEB_NEWS:
                return await self.news(modelCall)
            else:
                # Fallback to research for unknown operation types
@ -661,7 +597,9 @@ Extract content from each URL and provide detailed analysis."""
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
-            temperature = options.get("temperature", model.temperature)
+            temperature = getattr(options, "temperature", None)
+            if temperature is None:
+                temperature = model.temperature
            maxTokens = model.maxTokens
            
            # Parse unified prompt JSON format
@ -754,7 +692,9 @@ Provide comprehensive research with detailed analysis."""
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
-            temperature = options.get("temperature", model.temperature)
+            temperature = getattr(options, "temperature", None)
+            if temperature is None:
+                temperature = model.temperature
            maxTokens = model.maxTokens
            
            # Parse unified prompt JSON format
@ -850,7 +790,9 @@ Provide a detailed answer with well-cited sources."""
            messages = modelCall.messages
            model = modelCall.model
            options = modelCall.options
-            temperature = options.get("temperature", model.temperature)
+            temperature = getattr(options, "temperature", None)
+            if temperature is None:
+                temperature = model.temperature
            maxTokens = model.maxTokens
            
            # Parse unified prompt JSON format
--- a/modules/aicore/aicorePluginTavily.py
+++ b/modules/aicore/aicorePluginTavily.py
@ -64,6 +64,20 @@ class ConnectorWeb(BaseConnectorAi):
        # Cached web search constraints (camelCase per project style)
        self.webSearchMinResults: int = 1
        self.webSearchMaxResults: int = 20
+        # Initialize client if API key is available
+        self._initializeClient()
+    
+    def _initializeClient(self):
+        """Initialize the Tavily client if API key is available."""
+        try:
+            api_key = APP_CONFIG.get("Connector_AiTavily_API_SECRET")
+            if api_key:
+                self.client = AsyncTavilyClient(api_key=api_key)
+                logger.info("Tavily client initialized successfully")
+            else:
+                logger.warning("Tavily API key not found, client not initialized")
+        except Exception as e:
+            logger.error(f"Failed to initialize Tavily client: {str(e)}")
    
    def getConnectorType(self) -> str:
        """Get the connector type identifier."""
@ -442,13 +456,13 @@ One URL per line.
        """
        try:
            options = modelCall.options
-            operationType = options.get("operationType")
+            operationType = getattr(options, "operationType", None)
            
-            if operationType == "WEB_SEARCH":
+            if operationType == OperationTypeEnum.WEB_SEARCH:
                return await self.search(modelCall)
-            elif operationType == "WEB_CRAWL":
+            elif operationType == OperationTypeEnum.WEB_CRAWL:
                return await self.crawl(modelCall)
-            elif operationType in ["WEB_RESEARCH", "WEB_QUESTIONS", "WEB_NEWS"]:
+            elif operationType in [OperationTypeEnum.WEB_RESEARCH, OperationTypeEnum.WEB_QUESTIONS, OperationTypeEnum.WEB_NEWS]:
                return await self.research(modelCall)
            else:
                # Fallback to search for unknown operation types
@ -493,8 +507,8 @@ One URL per line.
                time_range=optimizedParams.get("time_range", timeRange),
                country=optimizedParams.get("country", country),
                language=optimizedParams.get("language", language),
-                include_answer=options.get("include_answer", True),
-                include_raw_content=options.get("include_raw_content", True),
+                include_answer=getattr(options, "include_answer", True),
+                include_raw_content=getattr(options, "include_raw_content", True),
            )

            # Step 3: AI-based URL selection and intelligent filtering
@ -607,14 +621,14 @@ One URL per line.
            # Extract parameters from modelCall
            promptContent = modelCall.messages[0]["content"] if modelCall.messages else ""
            options = modelCall.options
-            operationType = options.get("operationType")
+            operationType = getattr(options, "operationType", None)
            
            # Parse unified prompt JSON format
            import json
            promptData = json.loads(promptContent)
            
            # Extract parameters based on operation type
-            if operationType == "WEB_RESEARCH":
+            if operationType == OperationTypeEnum.WEB_RESEARCH:
                query = promptData.get("researchPrompt", promptContent)
                maxResults = promptData.get("maxResults", 8)
                searchDepth = "basic"
@ -623,7 +637,7 @@ One URL per line.
                language = promptData.get("language")
                topic = "general"
                
-            elif operationType == "WEB_QUESTIONS":
+            elif operationType == OperationTypeEnum.WEB_QUESTIONS:
                query = promptData.get("question", promptContent)
                maxResults = promptData.get("maxResults", 6)
                searchDepth = "basic"
@ -632,7 +646,7 @@ One URL per line.
                language = promptData.get("language")
                topic = "general"
                
-            elif operationType == "WEB_NEWS":
+            elif operationType == OperationTypeEnum.WEB_NEWS:
                query = promptData.get("newsPrompt", promptContent)
                maxResults = promptData.get("maxResults", 10)
                searchDepth = "basic"
@ -766,22 +780,22 @@ One URL per line.
            
            search_results = await self._search(
                query=query,
-                max_results=options.get("max_results", 5),
-                search_depth=options.get("search_depth"),
-                time_range=options.get("time_range"),
-                topic=options.get("topic"),
-                include_domains=options.get("include_domains"),
-                exclude_domains=options.get("exclude_domains"),
-                language=options.get("language"),
-                include_answer=options.get("include_answer"),
-                include_raw_content=options.get("include_raw_content"),
+                max_results=getattr(options, "max_results", 5),
+                search_depth=getattr(options, "search_depth", None),
+                time_range=getattr(options, "time_range", None),
+                topic=getattr(options, "topic", None),
+                include_domains=getattr(options, "include_domains", None),
+                exclude_domains=getattr(options, "exclude_domains", None),
+                language=getattr(options, "language", None),
+                include_answer=getattr(options, "include_answer", None),
+                include_raw_content=getattr(options, "include_raw_content", None),
            )

            urls = [result.url for result in search_results]
            crawl_results = await self._crawl(
                urls,
-                extract_depth=options.get("extract_depth"),
-                format=options.get("format"),
+                extract_depth=getattr(options, "extract_depth", None),
+                format=getattr(options, "format", None),
            )

            # Convert to JSON string
@ -805,8 +819,8 @@ One URL per line.
                success=True,
                metadata={
                    "total_count": len(crawl_results),
-                    "search_depth": options.get("search_depth", "basic"),
-                    "extract_depth": options.get("extract_depth", "basic")
+                    "search_depth": getattr(options, "search_depth", "basic"),
+                    "extract_depth": getattr(options, "extract_depth", "basic")
                }
            )
            
@ -936,6 +950,13 @@ One URL per line.
            kwargs["include_raw_content"] = include_raw_content

        logger.debug(f"Tavily.search kwargs: {kwargs}")
+        
+        # Ensure client is initialized
+        if self.client is None:
+            self._initializeClient()
+            if self.client is None:
+                raise ValueError("Tavily client not initialized. Please check API key configuration.")
+        
        response = await self.client.search(**kwargs)

        return [
@ -973,6 +994,12 @@ One URL per line.

                logger.debug(f"Sending request to Tavily with kwargs: {kwargs_extract}")
                
+                # Ensure client is initialized
+                if self.client is None:
+                    self._initializeClient()
+                    if self.client is None:
+                        raise ValueError("Tavily client not initialized. Please check API key configuration.")
+                
                response = await asyncio.wait_for(
                    self.client.extract(**kwargs_extract),
                    timeout=timeout
--- a/modules/datamodels/datamodelAi.py
+++ b/modules/datamodels/datamodelAi.py
@ -2,8 +2,8 @@ from typing import Optional, List, Dict, Any, Callable, TYPE_CHECKING, Tuple
 from pydantic import BaseModel, Field
 from enum import Enum

-if TYPE_CHECKING:
-    from modules.datamodels.datamodelExtraction import ContentPart
+# Import ContentPart for runtime use (needed for Pydantic model rebuilding)
+from modules.datamodels.datamodelExtraction import ContentPart

 # Operation Types
 class OperationTypeEnum(str, Enum):
@ -173,7 +173,7 @@ class AiModelCall(BaseModel):
    
    messages: List[Dict[str, Any]] = Field(description="Messages in OpenAI format (role, content)")
    model: Optional[AiModel] = Field(default=None, description="The AI model being called")
-    options: Dict[str, Any] = Field(default_factory=dict, description="Additional model-specific options")
+    options: AiCallOptions = Field(default_factory=AiCallOptions, description="Additional model-specific options")
    
    class Config:
        arbitraryTypesAllowed = True
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@ -399,9 +399,9 @@ class AiObjects:
        inputBytes = len((prompt + context).encode('utf-8'))
        
        # Replace <TOKEN_LIMIT> placeholder in prompt for this specific model
-        contextLength = model.contextLength
-        if contextLength > 0:
-            tokenLimit = str(contextLength)
+        # Use maxTokens for output limit, not contextLength
+        if model.maxTokens > 0:
+            tokenLimit = str(model.maxTokens)
        else:
            tokenLimit = "16000"  # Default for text generation
        
@ -450,7 +450,7 @@ class AiObjects:
        outputBytes = len(content.encode("utf-8"))
        
        # Calculate price using model's own price calculation method
-        priceUsd = model.calculatePriceUsd(inputBytes, outputBytes)
+        priceUsd = model.calculatePriceUsd(processingTime, inputBytes, outputBytes)
        
        return AiCallResponse(
            content=content,
@ -542,7 +542,7 @@ class AiObjects:
        modelCall = AiModelCall(
            messages=[{"role": "user", "content": prompt}],
            model=model,
-            options={"imageData": imageData, "mimeType": mimeType}
+            options=AiCallOptions(imageData=imageData, mimeType=mimeType)
        )
        
        # Call the model with standardized interface
@ -562,7 +562,7 @@ class AiObjects:
        outputBytes = len(content.encode("utf-8"))
        
        # Calculate price using model's own price calculation method
-        priceUsd = model.calculatePriceUsd(inputBytes, outputBytes)
+        priceUsd = model.calculatePriceUsd(processingTime, inputBytes, outputBytes)
        
        return AiCallResponse(
            content=content,
@ -603,7 +603,7 @@ class AiObjects:
            modelCall = AiModelCall(
                messages=[{"role": "user", "content": prompt}],
                model=selectedModel,
-                options={"size": size, "quality": quality, "style": style}
+                options=AiCallOptions(size=size, quality=quality, style=style)
            )
            
            # Call the model with standardized interface
@ -623,13 +623,13 @@ class AiObjects:
            outputBytes = len(content.encode("utf-8"))
            
            # Calculate price using model's own price calculation method
-            priceUsd = selectedModel.calculatePriceUsd(inputBytes, outputBytes)
+            priceUsd = selectedModel.calculatePriceUsd(processingTime, inputBytes, outputBytes)
            
            logger.info(f"✅ Image generation successful with model: {modelName}")
            return AiCallResponse(
                success=True,
                content=content,
-                model=modelName,
+                modelName=modelName,
                processingTime=processingTime,
                priceUsd=priceUsd,
                bytesSent=inputBytes,
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@ -8,6 +8,7 @@ from modules.interfaces.interfaceAiObjects import AiObjects
 from modules.services.serviceAi.subCoreAi import SubCoreAi
 from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
 from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration
+from modules.services.serviceAi.subSharedAiUtils import sanitizePromptContent


 logger = logging.getLogger(__name__)
@ -142,4 +143,8 @@ class AiService:
        # Use "json" for document generation calls since they return JSON
        return await self.coreAi.callAiDocuments(prompt, documents, options, outputFormat, title, "json")

+    def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
+        """Sanitize prompt content to prevent injection attacks and ensure safe presentation."""
+        return sanitizePromptContent(content, contentType)
+

--- a/test_ai_behavior.py
+++ b/test_ai_behavior.py
@ -80,12 +80,11 @@ class AIBehaviorTester:
        # Use the AI service directly with the user prompt - it will build the generation prompt internally
        try:
            # Use the existing AI service with JSON format - it handles looping internally
-            response = await self.services.ai.coreAi.callAiDocuments(
+            response = await self.services.ai.callAiDocuments(
                prompt=prompt,  # Use the raw user prompt directly
                documents=None,
                outputFormat="json",
-                title="Prime Numbers Test",
-                loopInstructionFormat="json"  # Use the JSON loop instructions
+                title="Prime Numbers Test"
            )
            
            if isinstance(response, dict):
--- a/test_ai_models.py
+++ b/test_ai_models.py
@ -0,0 +1,583 @@
+#!/usr/bin/env python3
+"""
+AI Models Test - Tests all available AI models individually
+"""
+
+import asyncio
+import json
+import sys
+import os
+import base64
+from datetime import datetime
+from typing import Dict, Any, List
+
+# Add the gateway to path
+sys.path.append(os.path.dirname(__file__))
+
+# Import the service initialization
+from modules.features.chatPlayground.mainChatPlayground import getServices
+from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
+from modules.datamodels.datamodelUam import User
+
+class AIModelsTester:
+    def __init__(self):
+        # Create a minimal user context for testing
+        testUser = User(
+            id="test_user",
+            username="test_user",
+            email="test@example.com",
+            fullName="Test User",
+            language="en",
+            mandateId="test_mandate"
+        )
+        
+        # Initialize services using the existing system
+        self.services = getServices(testUser, None)  # Test user, no workflow
+        self.testResults = []
+        
+        # Create logs directory if it doesn't exist
+        self.logsDir = os.path.join(os.path.dirname(__file__), "..", "local", "logs")
+        os.makedirs(self.logsDir, exist_ok=True)
+        
+        # Create modeltest subdirectory
+        self.modelTestDir = os.path.join(self.logsDir, "modeltest")
+        os.makedirs(self.modelTestDir, exist_ok=True)
+        
+        # Copy test image to modeltest directory if it exists
+        testImageSource = os.path.join(self.logsDir, "_testdata_photo_2025-06-03_13-05-52.jpg")
+        testImageDest = os.path.join(self.modelTestDir, "_testdata_photo_2025-06-03_13-05-52.jpg")
+        if os.path.exists(testImageSource) and not os.path.exists(testImageDest):
+            import shutil
+            shutil.copy2(testImageSource, testImageDest)
+            print(f"📷 Test image copied to: {testImageDest}")
+    
+    async def initialize(self):
+        """Initialize the AI service."""
+        # Set logging level to INFO to reduce noise
+        import logging
+        logging.getLogger().setLevel(logging.INFO)
+        
+        # The AI service needs to be recreated with proper initialization
+        from modules.services.serviceAi.mainServiceAi import AiService
+        self.services.ai = await AiService.create(self.services)
+        
+        # Create a minimal workflow context
+        from modules.datamodels.datamodelChat import ChatWorkflow
+        import uuid
+        
+        self.services.currentWorkflow = ChatWorkflow(
+            id=str(uuid.uuid4()),
+            name="Test Workflow",
+            status="running",
+            startedAt=self.services.utils.timestampGetUtc(),
+            lastActivity=self.services.utils.timestampGetUtc(),
+            currentRound=1,
+            currentTask=0,
+            currentAction=0,
+            totalTasks=0,
+            totalActions=0,
+            mandateId="test_mandate",
+            messageIds=[],
+            workflowMode="React",
+            maxSteps=5
+        )
+        
+        print("✅ AI Service initialized successfully")
+        print(f"📁 Results will be saved to: {self.modelTestDir}")
+    
+    async def testModel(self, modelName: str) -> Dict[str, Any]:
+        """Test a specific AI model with a simple prompt."""
+        print(f"\n{'='*60}")
+        print(f"TESTING MODEL: {modelName}")
+        print(f"{'='*60}")
+        
+        # Choose test prompt based on model type - Web models get JSON formatted prompts
+        import json
+        
+        if "tavily" in modelName.lower():
+            # Tavily models get web search prompt in JSON format (from methodAi.py)
+            testPrompt = json.dumps({
+                "searchPrompt": "Search for recent news about artificial intelligence developments in 2024. Return the top 3 results as JSON with fields: title, url, snippet.",
+                "maxResults": 3,
+                "timeRange": "y",
+                "country": "United States",
+                "instructions": "Search the web and return a JSON response with a 'results' array containing objects with 'title', 'url', and optionally 'content' fields. Focus on finding relevant URLs for the search prompt."
+            }, indent=2)
+        elif "perplexity" in modelName.lower() or "llama" in modelName.lower() or "sonar" in modelName.lower() or "mistral" in modelName.lower():
+            # Perplexity models get web research prompt in JSON format (from methodAi.py)
+            testPrompt = json.dumps({
+                "researchPrompt": "Research the latest trends in renewable energy technology. Provide a comprehensive overview with key developments, companies involved, and future prospects. Return as JSON.",
+                "maxResults": 5,
+                "timeRange": "y",
+                "country": "United States",
+                "instructions": "Conduct comprehensive web research and return a JSON response with 'results' array containing objects with 'title', 'url', 'content', and 'analysis' fields. Provide detailed analysis and insights."
+            }, indent=2)
+        else:
+            # Fallback for other models
+            testPrompt = "Generate a comprehensive analysis of the current state of artificial intelligence. Return as JSON."
+        
+        print(f"Test prompt: {testPrompt}")
+        print(f"Prompt length: {len(testPrompt)} characters")
+        
+        startTime = asyncio.get_event_loop().time()
+        
+        try:
+            # Create options to force this specific model
+            if "internal" in modelName.lower():
+                options = AiCallOptions(
+                    operationType=OperationTypeEnum.DATA_EXTRACT,
+                    preferredModel=modelName
+                )
+            else:
+                options = AiCallOptions(
+                    operationType=OperationTypeEnum.DATA_GENERATE,
+                    preferredModel=modelName
+                )
+            
+            # Call the AI service DIRECTLY through the model's functionCall
+            # This tests the actual model, not the document generation pipeline
+            # Get the model directly from the registry using the model registry
+            from modules.aicore.aicoreModelRegistry import modelRegistry
+            model = modelRegistry.getModel(modelName)
+            
+            if not model:
+                raise Exception(f"Model {modelName} not found")
+            
+            # Create AiModelCall and call the model's functionCall directly
+            from modules.datamodels.datamodelAi import AiModelCall
+            import base64
+            import os
+            
+            # Prepare messages and options based on model type
+            if "vision" in modelName.lower():
+                # For vision models, skip for now since they require special handling
+                print(f"⚠️  Skipping vision model {modelName} - requires special image handling")
+                return {
+                    "modelName": modelName,
+                    "status": "SKIPPED",
+                    "processingTime": 0.0,
+                    "responseLength": 0,
+                    "responseType": "skipped",
+                    "hasContent": False,
+                    "error": "Vision model requires special image handling",
+                    "fullResponse": "Skipped - vision model requires special image handling"
+                }
+            else:
+                # For other models, use normal functionCall
+                messages = [{"role": "user", "content": testPrompt}]
+                modelCall = AiModelCall(
+                    messages=messages,
+                    model=model,
+                    options=options
+                )
+                response = await model.functionCall(modelCall)
+            
+            endTime = asyncio.get_event_loop().time()
+            processingTime = endTime - startTime
+            
+            # Analyze response - now we get AiModelResponse objects
+            if hasattr(response, 'success'):
+                # AiModelResponse object
+                if response.success:
+                    result = {
+                        "modelName": modelName,
+                        "status": "SUCCESS",
+                        "processingTime": round(processingTime, 2),
+                        "responseLength": len(response.content) if response.content else 0,
+                        "responseType": "AiModelResponse",
+                        "hasContent": bool(response.content),
+                        "error": None,
+                        "modelUsed": modelName,
+                        "priceUsd": 0.0,  # AiModelResponse doesn't have price info
+                        "bytesSent": 0,
+                        "bytesReceived": len(response.content.encode('utf-8')) if response.content else 0
+                    }
+                    
+                    # Try to parse content as JSON
+                    if response.content:
+                        try:
+                            json.loads(response.content)
+                            result["isValidJson"] = True
+                        except:
+                            result["isValidJson"] = False
+                        
+                        result["responsePreview"] = response.content[:200] + "..." if len(response.content) > 200 else response.content
+                        result["fullResponse"] = response.content
+                    else:
+                        result["isValidJson"] = False
+                        result["responsePreview"] = "Empty response"
+                        result["fullResponse"] = ""
+                    
+                    print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
+                    print(f"📄 Response length: {len(response.content) if response.content else 0} characters")
+                    print(f"📄 Model used: {modelName}")
+                    print(f"📄 Response preview: {result['responsePreview']}")
+                    
+                else:
+                    error = response.error or "Unknown error"
+                    result = {
+                        "modelName": modelName,
+                        "status": "ERROR",
+                        "processingTime": round(processingTime, 2),
+                        "responseLength": 0,
+                        "responseType": "AiModelResponse",
+                        "hasContent": False,
+                        "error": error,
+                        "fullResponse": str(response)
+                    }
+                    
+                    print(f"❌ ERROR - {error}")
+                    
+            elif isinstance(response, dict):
+                # Fallback for dict responses
+                if response.get("success", True):
+                    result = {
+                        "modelName": modelName,
+                        "status": "SUCCESS",
+                        "processingTime": round(processingTime, 2),
+                        "responseLength": len(str(response)),
+                        "responseType": "dict",
+                        "hasContent": True,
+                        "error": None
+                    }
+                    
+                    # Try to parse as JSON
+                    try:
+                        jsonResponse = json.dumps(response, indent=2)
+                        result["responsePreview"] = jsonResponse[:200] + "..." if len(jsonResponse) > 200 else jsonResponse
+                        result["isValidJson"] = True
+                        result["fullResponse"] = jsonResponse
+                    except:
+                        result["responsePreview"] = str(response)[:200] + "..." if len(str(response)) > 200 else str(response)
+                        result["isValidJson"] = False
+                        result["fullResponse"] = str(response)
+                    
+                    print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
+                    print(f"📄 Response length: {len(str(response))} characters")
+                    print(f"📄 Response preview: {result['responsePreview']}")
+                    
+                else:
+                    error = response.get("error", "Unknown error")
+                    result = {
+                        "modelName": modelName,
+                        "status": "ERROR",
+                        "processingTime": round(processingTime, 2),
+                        "responseLength": 0,
+                        "responseType": "error",
+                        "hasContent": False,
+                        "error": error,
+                        "fullResponse": str(response)
+                    }
+                    
+                    print(f"❌ ERROR - {error}")
+                    
+            else:
+                # String response
+                result = {
+                    "modelName": modelName,
+                    "status": "SUCCESS",
+                    "processingTime": round(processingTime, 2),
+                    "responseLength": len(str(response)),
+                    "responseType": "string",
+                    "hasContent": True,
+                    "error": None
+                }
+                
+                # Try to parse as JSON
+                try:
+                    json.loads(str(response))
+                    result["isValidJson"] = True
+                except:
+                    result["isValidJson"] = False
+                
+                result["responsePreview"] = str(response)[:200] + "..." if len(str(response)) > 200 else str(response)
+                result["fullResponse"] = str(response)
+                
+                print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
+                print(f"📄 Response length: {len(str(response))} characters")
+                print(f"📄 Response preview: {result['responsePreview']}")
+            
+            # Save text response for all models
+            if result.get("status") == "SUCCESS":
+                self._saveTextResponse(modelName, result)
+            
+        except Exception as e:
+            endTime = asyncio.get_event_loop().time()
+            processingTime = endTime - startTime
+            
+            result = {
+                "modelName": modelName,
+                "status": "EXCEPTION",
+                "processingTime": round(processingTime, 2),
+                "responseLength": 0,
+                "responseType": "exception",
+                "hasContent": False,
+                "error": str(e)
+            }
+            
+            print(f"💥 EXCEPTION - {str(e)}")
+        
+        self.testResults.append(result)
+        
+        # Save individual model result immediately
+        self._saveIndividualModelResult(modelName, result)
+        
+        return result
+    
+    def _saveImageResponse(self, modelName: str, result: Dict[str, Any]):
+        """Save base64 image response to file."""
+        try:
+            fullResponse = result.get("fullResponse", "")
+            base64Data = None
+            
+            # Try to extract base64 data from response
+            if isinstance(fullResponse, dict):
+                # Look for base64 data in the response
+                if "content" in fullResponse:
+                    base64Data = fullResponse["content"]
+                elif "data" in fullResponse:
+                    base64Data = fullResponse["data"]
+                elif "image" in fullResponse:
+                    base64Data = fullResponse["image"]
+            else:
+                # Try to find base64 data in string response
+                import re
+                base64Match = re.search(r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)', str(fullResponse))
+                if base64Match:
+                    base64Data = base64Match.group(1)
+                else:
+                    # Try to find pure base64 string
+                    base64Match = re.search(r'([A-Za-z0-9+/=]{100,})', str(fullResponse))
+                    if base64Match:
+                        base64Data = base64Match.group(1)
+            
+            if base64Data:
+                # Clean base64 data
+                if base64Data.startswith('data:image/'):
+                    base64Data = base64Data.split(',', 1)[1]
+                
+                # Decode and save image
+                imageData = base64.b64decode(base64Data)
+                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+                filename = f"{modelName}_{timestamp}.png"
+                filepath = os.path.join(self.modelTestDir, filename)
+                
+                with open(filepath, 'wb') as f:
+                    f.write(imageData)
+                
+                result["savedImage"] = filepath
+                print(f"🖼️  Image saved: {filepath}")
+            else:
+                print(f"⚠️  No base64 image data found in response")
+                
+        except Exception as e:
+            print(f"❌ Error saving image: {str(e)}")
+            result["imageSaveError"] = str(e)
+    
+    def _saveTextResponse(self, modelName: str, result: Dict[str, Any]):
+        """Save text response to file."""
+        try:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            filename = f"{modelName}_{timestamp}.txt"
+            filepath = os.path.join(self.modelTestDir, filename)
+            
+            # Prepare content for saving
+            content = result.get("fullResponse", "")
+            if not content:
+                content = result.get("responsePreview", "No content available")
+            
+            # Add metadata header
+            metadata = f"""Model: {modelName}
+Test Time: {timestamp}
+Status: {result.get('status', 'Unknown')}
+Processing Time: {result.get('processingTime', 0):.2f}s
+Response Length: {result.get('responseLength', 0)} characters
+Is Valid JSON: {result.get('isValidJson', False)}
+
+--- RESPONSE CONTENT ---
+{content}
+"""
+            
+            with open(filepath, 'w', encoding='utf-8') as f:
+                f.write(metadata)
+            
+            result["savedTextFile"] = filepath
+            print(f"📄 Text response saved: {filepath}")
+                
+        except Exception as e:
+            print(f"❌ Error saving text response: {str(e)}")
+            result["textSaveError"] = str(e)
+    
+    def _saveIndividualModelResult(self, modelName: str, result: Dict[str, Any]):
+        """Save individual model test result to file."""
+        try:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            filename = f"{modelName}_{timestamp}.json"
+            filepath = os.path.join(self.modelTestDir, filename)
+            
+            # Prepare individual result data
+            individualData = {
+                "modelName": modelName,
+                "testTimestamp": timestamp,
+                "testDate": datetime.now().isoformat(),
+                "result": result
+            }
+            
+            # Save to JSON file
+            with open(filepath, 'w', encoding='utf-8') as f:
+                json.dump(individualData, f, indent=2, ensure_ascii=False)
+            
+            print(f"📄 Individual result saved: {filename}")
+            
+        except Exception as e:
+            print(f"❌ Error saving individual result: {str(e)}")
+    
+    def getAllAvailableModels(self) -> List[str]:
+        """Get all available model names."""
+        # Hardcoded list of known models - same approach as test_ai_behavior.py
+        return [
+            # "claude-3-5-sonnet-20241022",  # Skipped - text model, test later
+            # "claude-3-5-sonnet-20241022-vision",  # Skipped - requires image input
+            # "gpt-4o",  # Skipped - text model, test later
+            # "gpt-3.5-turbo",  # Skipped - text model, test later
+            # "gpt-4o-vision",  # Skipped - requires image input
+            # "dall-e-3",  # Skipped - image generation, test later
+            "sonar",  # Perplexity web model
+            "sonar-pro",  # Perplexity web model
+            "tavily-search",  # Tavily web model
+            "tavily-extract",  # Tavily web model
+            "tavily-search-extract",  # Tavily web model
+            # "internal-extractor",  # Skipped - internal model, test later
+            # "internal-generator",  # Skipped - internal model, test later
+            # "internal-renderer"  # Skipped - internal model, test later
+        ]
+    
+    def saveTestResults(self):
+        """Save detailed test results to file."""
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        resultsFile = os.path.join(self.modelTestDir, f"modeltest_results_{timestamp}.json")
+        
+        # Prepare results for saving
+        saveData = {
+            "testTimestamp": timestamp,
+            "testDate": datetime.now().isoformat(),
+            "totalModels": len(self.testResults),
+            "successfulModels": len([r for r in self.testResults if r["status"] == "SUCCESS"]),
+            "errorModels": len([r for r in self.testResults if r["status"] == "ERROR"]),
+            "exceptionModels": len([r for r in self.testResults if r["status"] == "EXCEPTION"]),
+            "results": self.testResults
+        }
+        
+        # Calculate success rate
+        if saveData["totalModels"] > 0:
+            saveData["successRate"] = (saveData["successfulModels"] / saveData["totalModels"]) * 100
+        else:
+            saveData["successRate"] = 0
+        
+        # Save to JSON file
+        with open(resultsFile, 'w', encoding='utf-8') as f:
+            json.dump(saveData, f, indent=2, ensure_ascii=False)
+        
+        print(f"📄 Detailed results saved: {resultsFile}")
+        return resultsFile
+    
+    def printTestSummary(self):
+        """Print a summary of all test results."""
+        print(f"\n{'='*80}")
+        print("AI MODELS TEST SUMMARY")
+        print(f"{'='*80}")
+        
+        totalModels = len(self.testResults)
+        successfulModels = len([r for r in self.testResults if r["status"] == "SUCCESS"])
+        errorModels = len([r for r in self.testResults if r["status"] == "ERROR"])
+        exceptionModels = len([r for r in self.testResults if r["status"] == "EXCEPTION"])
+        
+        print(f"📊 Total models tested: {totalModels}")
+        print(f"✅ Successful: {successfulModels}")
+        print(f"❌ Errors: {errorModels}")
+        print(f"💥 Exceptions: {exceptionModels}")
+        print(f"📈 Success rate: {(successfulModels/totalModels*100):.1f}%" if totalModels > 0 else "0%")
+        
+        print(f"\n{'='*80}")
+        print("DETAILED RESULTS")
+        print(f"{'='*80}")
+        
+        for result in self.testResults:
+            status_icon = {
+                "SUCCESS": "✅",
+                "ERROR": "❌", 
+                "EXCEPTION": "💥"
+            }.get(result["status"], "❓")
+            
+            print(f"\n{status_icon} {result['modelName']}")
+            print(f"   Status: {result['status']}")
+            print(f"   Processing time: {result['processingTime']}s")
+            print(f"   Response length: {result['responseLength']} characters")
+            print(f"   Response type: {result['responseType']}")
+            
+            if result.get("isValidJson") is not None:
+                print(f"   Valid JSON: {'Yes' if result['isValidJson'] else 'No'}")
+            
+            if result["error"]:
+                print(f"   Error: {result['error']}")
+            
+            if result.get("responsePreview"):
+                print(f"   Preview: {result['responsePreview']}")
+        
+        # Find fastest and slowest models
+        if successfulModels > 0:
+            successfulResults = [r for r in self.testResults if r["status"] == "SUCCESS"]
+            fastest = min(successfulResults, key=lambda x: x["processingTime"])
+            slowest = max(successfulResults, key=lambda x: x["processingTime"])
+            
+            print(f"\n{'='*80}")
+            print("PERFORMANCE HIGHLIGHTS")
+            print(f"{'='*80}")
+            print(f"🚀 Fastest model: {fastest['modelName']} ({fastest['processingTime']}s)")
+            print(f"🐌 Slowest model: {slowest['modelName']} ({slowest['processingTime']}s)")
+
+async def main():
+    """Run AI models testing."""
+    tester = AIModelsTester()
+    
+    print("Starting AI Models Testing...")
+    print("Initializing AI service...")
+    await tester.initialize()
+    
+    # Get all available models
+    models = tester.getAllAvailableModels()
+    
+    print(f"\nFound {len(models)} models to test:")
+    for i, model in enumerate(models, 1):
+        print(f"  {i}. {model}")
+    
+    print(f"\n{'='*80}")
+    print("STARTING INDIVIDUAL MODEL TESTS")
+    print(f"{'='*80}")
+    print("Press Enter after each model test to continue to the next one...")
+    
+    # Test each model individually
+    for i, modelName in enumerate(models, 1):
+        print(f"\n[{i}/{len(models)}] Testing model: {modelName}")
+        
+        # Test the model
+        await tester.testModel(modelName)
+        
+        # Pause for user input (except for the last model)
+        if i < len(models):
+            input(f"\nPress Enter to continue to the next model...")
+    
+    # Save detailed results to file
+    resultsFile = tester.saveTestResults()
+    
+    # Print final summary
+    tester.printTestSummary()
+    
+    print(f"\n{'='*80}")
+    print("TESTING COMPLETED")
+    print(f"{'='*80}")
+    print(f"📄 Results saved to: {resultsFile}")
+    print(f"📁 Images saved to: {tester.modelTestDir}")
+
+if __name__ == "__main__":
+    asyncio.run(main())