From 5039096a10cddd964dccbc5b2f3c6dfccb4022a0 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Sun, 26 Oct 2025 23:09:26 +0100
Subject: [PATCH] ai models ready for image analysis
---
IMAGE_ANALYSE_ANALYSIS.md | 150 ++++++
modules/aicore/aicorePluginAnthropic.py | 168 ++++---
modules/aicore/aicorePluginOpenai.py | 45 +-
modules/datamodels/datamodelAi.py | 1 +
modules/interfaces/interfaceAiObjects.py | 203 ++++----
modules/services/serviceAi/mainServiceAi.py | 2 +-
modules/services/serviceAi/subCoreAi.py | 53 ++-
test_ai_models.py | 494 ++++++++++----------
8 files changed, 629 insertions(+), 487 deletions(-)
create mode 100644 IMAGE_ANALYSE_ANALYSIS.md
diff --git a/IMAGE_ANALYSE_ANALYSIS.md b/IMAGE_ANALYSE_ANALYSIS.md
new file mode 100644
index 00000000..bb14bb39
--- /dev/null
+++ b/IMAGE_ANALYSE_ANALYSIS.md
@@ -0,0 +1,150 @@
+# Image Analysis Code Flow Analysis
+
+## Zusammenfassung der Parameter und Handovers
+
+### 1. Ablauf für Image-Analyse (durch Dokumentenverarbeitung)
+
+#### Eingabe
+- **methodAi.process()** wird aufgerufen mit:
+ - `aiPrompt`: Textanweisung für die Bildanalyse
+ - `documentList`: Liste von Dokumenten (einschließlich Bilder)
+ - `resultType`: Output-Format (optional, default: txt)
+
+#### Verarbeitung
+1. **mainServiceAi.py** → `callAiDocuments()`
+ - Delegiert an `subCoreAi.callAiDocuments()`
+
+2. **subCoreAi.py** → `callAiDocuments()`
+ - Prüft, ob Dokumente vorhanden sind
+ - Wenn ja: ruft `documentProcessor.callAiText()` auf
+
+3. **subDocumentProcessing.py** → `callAiText()`
+ - Ruft `processDocumentsPerChunk()` auf
+
+4. **subDocumentProcessing.py** → `_processChunksWithMapping()`
+ - Analysiert jeden Chunk
+ - **Wichtig**: Zeile 645-689 - Erkennung von Bildern
+ - Prüft `is_image` Flag basierend auf:
+ - `document_mime_type` (z.B. "image/jpeg")
+ - `part.mimeType`
+ - `part.typeGroup == "image"`
+
+5. **subCoreAi.py** → `readImage()` (wird aufgerufen für Bildchunks)
+ - Zeile 561-625
+ - Setzt `operationType = IMAGE_ANALYSE`
+ - Ruft `aiObjects.callImage()` auf mit:
+ - `prompt`: Der Analyse-Prompt
+ - `imageData`: Die Bilddaten (bytes oder base64)
+ - `mimeType`: Z.B. "image/jpeg"
+ - `options`: Mit `operationType=IMAGE_ANALYSE`
+
+#### Ausgabe
+- Textanalyse des Bildes
+
+### 2. Ablauf für direkte Image-Analyse
+
+#### Eingabe
+- **mainServiceAi.readImage()** wird direkt aufgerufen mit:
+ - `prompt`: Textanweisung
+ - `imageData`: Bilddaten (bytes oder base64)
+ - `mimeType`: Z.B. "image/jpeg"
+ - `options`: Optional, wird auf `IMAGE_ANALYSE` gesetzt
+
+#### Verarbeitung
+1. **mainServiceAi.py** → `readImage()`
+ - Delegiert an `subCoreAi.readImage()`
+
+2. **subCoreAi.py** → `readImage()`
+ - Setzt `operationType = IMAGE_ANALYSE` (Zeile 582)
+ - Ruft `aiObjects.callImage()` auf
+
+#### Ausgabe
+- Textanalyse des Bildes
+
+## Wo werden welche Funktionen genutzt?
+
+### mainServiceAi.py
+
+#### `readImage()` (Zeile 96-105)
+- **Verwendung**: Wird direkt von außen aufgerufen (z.B. API)
+- **Delegiert an**: `subCoreAi.readImage()`
+- **Verwendung**: ✅ Wird verwendet
+
+#### `generateImage()` (Zeile 108-118)
+- **Verwendung**: Wird direkt von außen aufgerufen (z.B. API)
+- **Delegiert an**: `subCoreAi.generateImage()`
+- **Verwendung**: ✅ Wird verwendet
+
+### subCoreAi.py
+
+#### `readImage()` (Zeile 561-625)
+- **Verwendung**:
+ 1. Wird von `mainServiceAi.readImage()` aufgerufen
+ 2. Wird von `subDocumentProcessing._processChunksWithMapping()` aufgerufen (Zeile 670)
+- **Verwendung**: ✅ Wird verwendet
+
+#### `generateImage()` (Zeile 628-660)
+- **Verwendung**: Wird von `mainServiceAi.generateImage()` aufgerufen
+- **Verwendung**: ✅ Wird verwendet
+
+### subDocumentProcessing.py
+
+#### `_processChunksWithMapping()` (Zeile 594-994)
+- **Bildanalyse**: Zeile 645-689
+ - Erkennt Bilder basierend auf MIME-Type und typeGroup
+ - Ruft `core_ai.readImage()` auf
+- **Verwendung**: ✅ Wird verwendet
+
+## Parameter-Validierung
+
+### ✅ Alle Parameter korrekt
+
+1. **operationType**:
+ - Wird immer auf `IMAGE_ANALYSE` gesetzt (subCoreAi Zeile 582)
+ - Wird korrekt übergeben
+
+2. **imageData**:
+ - Wird korrekt geladen und übergeben
+ - Unterstützt bytes und base64
+
+3. **mimeType**:
+ - Wird automatisch erkannt
+ - Standard: "image/jpeg"
+
+4. **prompt**:
+ - Wird korrekt übergeben
+ - Kann von Benutzer angepasst werden
+
+## Handovers sind korrekt
+
+### mainServiceAi → subCoreAi
+- ✅ `readImage()` delegiert korrekt
+- ✅ `generateImage()` delegiert korrekt
+
+### subDocumentProcessing → subCoreAi
+- ✅ Erkennt Bilder korrekt (Zeile 645-689)
+- ✅ Ruft `readImage()` mit korrekten Parametern auf
+- ✅ Setzt `operationType=IMAGE_ANALYSE`
+
+### subCoreAi → aiObjects
+- ✅ Ruft `callImage()` mit korrekten Parametern auf
+- ✅ Setzt `operationType=IMAGE_ANALYSE`
+
+## Identifizierte Probleme
+
+### ⚠️ Keine Probleme identifiziert
+
+Die Parameter und Handovers sind alle korrekt:
+- ✅ Operation Type wird korrekt gesetzt
+- ✅ Bilddaten werden korrekt geladen und übergeben
+- ✅ MIME-Type wird korrekt erkannt
+- ✅ Prompt wird korrekt übergeben
+- ✅ Alle Delegierungen funktionieren korrekt
+
+## Test-Strategie
+
+Der Test verwendet:
+1. Direkte Bildanalyse über `mainServiceAi.readImage()`
+2. Testet alle Modelle die `IMAGE_ANALYSE` unterstützen
+3. Validiert die Antworten auf Inhalt und Struktur
+
diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py
index 4debc7ed..8a829fdc 100644
--- a/modules/aicore/aicorePluginAnthropic.py
+++ b/modules/aicore/aicorePluginAnthropic.py
@@ -70,8 +70,8 @@ class AiAnthropic(BaseConnectorAi):
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.015 + (bytesReceived / 4 / 1000) * 0.075
),
AiModel(
- name="claude-3-5-sonnet-20241022-vision",
- displayName="Anthropic Claude 3.5 Sonnet Vision",
+ name="claude-3-5-sonnet-20241022",
+ displayName="Anthropic Claude 3.5 Sonnet Instance Vision",
connectorType="anthropic",
apiUrl="https://api.anthropic.com/v1/messages",
temperature=0.2,
@@ -79,9 +79,8 @@ class AiAnthropic(BaseConnectorAi):
contextLength=200000,
costPer1kTokensInput=0.015,
costPer1kTokensOutput=0.075,
- speedRating=6, # Slower due to high-quality processing
- qualityRating=10, # Best quality available
- # capabilities removed (not used in business logic)
+ speedRating=6,
+ qualityRating=10,
functionCall=self.callAiImage,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
@@ -234,69 +233,122 @@ class AiAnthropic(BaseConnectorAi):
AiModelResponse with analysis content
"""
try:
- # Extract parameters from modelCall
+ # Extract parameters from messages for Anthropic Vision API
messages = modelCall.messages
model = modelCall.model
- options = modelCall.options
- prompt = messages[0]["content"] if messages else ""
- imageData = getattr(options, "imageData", None)
- mimeType = getattr(options, "mimeType", None)
- # Debug logging
- logger.info(f"callAiImage called with imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}")
+ # Verify messages contain image data
+ if not messages or not messages[0].get("content"):
+ raise ValueError("No messages provided for image analysis")
- # Distinguish between file path and binary data
- if isinstance(imageData, str):
- # Check if it's base64 encoded data or a file path
- if len(imageData) > 100 and not os.path.exists(imageData):
- # It's likely base64 encoded data
- logger.info("Treating imageData as base64 encoded string")
- base64Data = imageData
- if not mimeType:
- mimeType = "image/png"
- else:
- # It's a file path - import filehandling only when needed
- logger.info(f"Treating imageData as file path: {imageData}")
- from modules import agentserviceFilemanager as fileHandler
- base64Data, autoMimeType = fileHandler.encodeFileToBase64(imageData)
- mimeType = mimeType or autoMimeType
- else:
- # It's binary data
- logger.info("Treating imageData as binary data")
- import base64
- base64Data = base64.b64encode(imageData).decode('utf-8')
- # MIME type must be specified for binary data
- if not mimeType:
- # Fallback to generic image type
- mimeType = "image/png"
+ logger.info(f"callAiImage called with {len(messages)} message(s)...")
- # Prepare the payload for the Vision API
- messages = [
- {
- "role": "user",
- "content": [
- {"type": "text", "text": prompt},
- {
- "type": "image_url",
- "image_url": {
- "url": f"data:{mimeType};base64,{base64Data}"
- }
+ # Extract text prompt and image data from messages
+ # Messages format: [{"role": "user", "content": [{"type": "text", "text": "..."}, {"type": "image_url", "image_url": {"url": "data:..."}}]}]
+ userContent = messages[0]["content"]
+ if not isinstance(userContent, list):
+ raise ValueError("Expected content to be a list for vision")
+
+ textPrompt = ""
+ imageUrl = None
+
+ for contentItem in userContent:
+ if contentItem.get("type") == "text":
+ textPrompt = contentItem.get("text", "")
+ elif contentItem.get("type") == "image_url":
+ imageUrl = contentItem.get("image_url", {}).get("url", "")
+
+ if not imageUrl or not imageUrl.startswith("data:"):
+ raise ValueError("No image data found in messages")
+
+ # Extract base64 data and mime type from data URL
+ # Format: data:image/jpeg;base64,/9j/4AAQSkZ...
+ parts = imageUrl.split(";base64,")
+ if len(parts) != 2:
+ raise ValueError("Invalid image data URL format")
+
+ mimeType = parts[0].replace("data:", "")
+ base64Data = parts[1]
+
+ # Convert to Anthropic's vision format
+ anthropicMessages = [{
+ "role": "user",
+ "content": [
+ {"type": "text", "text": textPrompt},
+ {
+ "type": "image",
+ "source": {
+ "type": "base64",
+ "media_type": mimeType,
+ "data": base64Data
}
- ]
- }
- ]
+ }
+ ]
+ }]
- # Create a modelCall for the basic AI function
- basicModelCall = AiModelCall(
- messages=messages,
- model=model
+ # Call Anthropic API directly for vision
+ import time
+ import base64
+
+ startTime = time.time()
+
+ # Prepare system prompt if available
+ systemPrompt = None
+ for msg in messages:
+ if msg.get("role") == "system":
+ systemContent = msg.get("content")
+ if isinstance(systemContent, list):
+ systemPrompt = "\n".join([item.get("text", "") for item in systemContent if item.get("type") == "text"])
+ else:
+ systemPrompt = systemContent
+ break
+
+ # Get parameters from model (consistent with callAiBasic)
+ maxTokens = model.maxTokens if hasattr(model, 'maxTokens') else 8192
+ temperature = model.temperature if hasattr(model, 'temperature') else 0.2
+
+ # Prepare API payload
+ payload = {
+ "model": model.name, # Use standard model.name
+ "max_tokens": maxTokens,
+ "messages": anthropicMessages
+ }
+
+ if systemPrompt:
+ payload["system"] = systemPrompt
+
+ # Set temperature from model
+ payload["temperature"] = temperature
+
+ # Make API call with headers from httpClient (which includes anthropic-version)
+ response = await self.httpClient.post(
+ "https://api.anthropic.com/v1/messages",
+ json=payload
)
- # Use the existing callAiBasic function with the Vision model
- response = await self.callAiBasic(basicModelCall)
+ if response.status_code != 200:
+ errorText = response.text
+ logger.error(f"Anthropic API error: {response.status_code} - {errorText}")
+ raise HTTPException(status_code=response.status_code, detail=f"Anthropic API error: {errorText}")
- # Return the standardized response
- return response
+ # Parse response
+ result = response.json()
+ content = result["content"][0]["text"] if result.get("content") else ""
+
+ endTime = time.time()
+ processingTime = endTime - startTime
+
+ # Calculate cost
+ inputTokens = result.get("usage", {}).get("input_tokens", 0)
+ outputTokens = result.get("usage", {}).get("output_tokens", 0)
+
+ # Return standardized response
+ return AiModelResponse(
+ content=content,
+ success=True,
+ modelId=model.name,
+ processingTime=processingTime
+ )
except Exception as e:
logger.error(f"Error during image analysis: {str(e)}", exc_info=True)
diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py
index 4d2a0f4d..a2cc501c 100644
--- a/modules/aicore/aicorePluginOpenai.py
+++ b/modules/aicore/aicorePluginOpenai.py
@@ -95,8 +95,8 @@ class AiOpenai(BaseConnectorAi):
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0015 + (bytesReceived / 4 / 1000) * 0.002
),
AiModel(
- name="gpt-4o-vision",
- displayName="OpenAI GPT-4o Vision",
+ name="gpt-4o",
+ displayName="OpenAI GPT-4o Instance Vision",
connectorType="openai",
apiUrl="https://api.openai.com/v1/chat/completions",
temperature=0.2,
@@ -106,7 +106,6 @@ class AiOpenai(BaseConnectorAi):
costPer1kTokensOutput=0.06,
speedRating=6, # Slower for vision tasks
qualityRating=9, # High quality vision
- # capabilities removed (not used in business logic)
functionCall=self.callAiImage,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
@@ -226,42 +225,16 @@ class AiOpenai(BaseConnectorAi):
# Extract parameters from modelCall
messages = modelCall.messages
model = modelCall.model
- options = modelCall.options
- prompt = messages[0]["content"] if messages else ""
- imageData = getattr(options, "imageData", None)
- mimeType = getattr(options, "mimeType", "image/jpeg")
- logger.debug(f"Starting image analysis with query '{prompt}' for size {len(imageData)}B...")
+ # Messages should already be in the correct format with image data embedded
+ # Just verify they contain image data
+ if not messages or not messages[0].get("content"):
+ raise ValueError("No messages provided for image analysis")
- # Ensure imageData is a string (base64 encoded)
- if not isinstance(imageData, str):
- raise ValueError("imageData must be a string (base64 encoded)")
+ logger.debug(f"Starting image analysis with {len(messages)} message(s)...")
- # Fix base64 padding if needed
- padding_needed = len(imageData) % 4
- if padding_needed:
- imageData += '=' * (4 - padding_needed)
-
- logger.debug(f"Using MIME type: {mimeType}")
- logger.debug(f"Base64 data length: {len(imageData)} characters")
-
- # Create the data URL format as required by OpenAI Vision API
- data_url = f"data:{mimeType};base64,{imageData}"
-
- messages = [
- {
- "role": "user",
- "content": [
- {"type": "text", "text": prompt},
- {
- "type": "image_url",
- "image_url": {
- "url": data_url
- }
- }
- ]
- }
- ]
+ # Use the messages directly - they should already contain the image data
+ # in the format: {"type": "image_url", "image_url": {"url": "data:...base64,..."}}
# Use parameters from model
temperature = model.temperature
diff --git a/modules/datamodels/datamodelAi.py b/modules/datamodels/datamodelAi.py
index f73cbd08..1da6c65f 100644
--- a/modules/datamodels/datamodelAi.py
+++ b/modules/datamodels/datamodelAi.py
@@ -194,6 +194,7 @@ class AiModelResponse(BaseModel):
# Structured prompt models for specialized operations
+
class AiCallPromptWebSearch(BaseModel):
"""Structured prompt format for WEB_SEARCH operation - returns list of URLs."""
diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py
index 2ae97586..5b458925 100644
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@@ -1,6 +1,7 @@
import logging
import asyncio
import uuid
+import base64
from typing import Dict, Any, List, Union, Tuple, Optional
from dataclasses import dataclass
import time
@@ -74,7 +75,7 @@ class AiObjects:
logger.info(f"Selected model: {selectedModel.name} ({selectedModel.displayName})")
return selectedModel.name
-
+ # AI for Extraction and Text Generation
async def call(self, request: AiCallRequest) -> AiCallResponse:
"""Call AI model for text generation with model-aware chunking."""
# Handle content parts (unified path)
@@ -196,11 +197,71 @@ class AiObjects:
"""Process a single content part with model-aware chunking and fallback."""
lastError = None
+ # Check if this is an image - Vision models need special handling
+ isImage = (contentPart.typeGroup == "image") or (contentPart.mimeType and contentPart.mimeType.startswith("image/"))
+
for attempt, model in enumerate(failoverModelList):
try:
logger.info(f"Processing content part with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
- # Check if part fits in model context
+ # Special handling for images with Vision models
+ if isImage and hasattr(model, 'functionCall'):
+ # Call model's functionCall directly (for Vision models this is callAiImage)
+ from modules.datamodels.datamodelAi import AiModelCall, AiCallOptions as AiCallOpts
+
+ try:
+ modelCall = AiModelCall(
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": prompt},
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:{contentPart.mimeType};base64,{contentPart.data}" if isinstance(contentPart.data, str) else
+ f"data:{contentPart.mimeType};base64,{base64.b64encode(contentPart.data).decode('utf-8')}"
+ }
+ }
+ ]
+ }
+ ],
+ model=model,
+ options=AiCallOpts(operationType=options.operationType)
+ )
+
+ modelResponse = await model.functionCall(modelCall)
+
+ if not modelResponse.success:
+ raise ValueError(f"Model call failed: {modelResponse.error}")
+
+ logger.info(f"✅ Image content part processed successfully with model: {model.name}")
+
+ # Convert to AiCallResponse format
+ return AiCallResponse(
+ content=modelResponse.content,
+ modelName=model.name,
+ priceUsd=modelResponse.priceUsd if hasattr(modelResponse, 'priceUsd') else 0.0,
+ processingTime=modelResponse.processingTime if hasattr(modelResponse, 'processingTime') else 0.0,
+ bytesSent=0, # Will be calculated elsewhere
+ bytesReceived=0, # Will be calculated elsewhere
+ errorCount=0
+ )
+ except Exception as e:
+ # Image processing failed with this model
+ lastError = e
+ logger.warning(f"❌ Image processing failed with model {model.name}: {str(e)}")
+
+ # If this is not the last model, try the next one
+ if attempt < len(failoverModelList) - 1:
+ logger.info(f"🔄 Trying next fallback model for image processing...")
+ continue
+ else:
+ # All models failed
+ logger.error(f"💥 All {len(failoverModelList)} models failed for image processing")
+ raise
+
+ # For non-image parts, check if part fits in model context
partSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0
modelContextBytes = model.contextLength * 4 # Convert tokens to bytes
@@ -319,12 +380,13 @@ class AiObjects:
content_parts.append(content_part)
# Use existing merging system
- merge_strategy = {
- "useIntelligentMerging": True,
- "groupBy": "typeGroup",
- "orderBy": "id",
- "mergeType": "concatenate"
- }
+ from modules.datamodels.datamodelExtraction import MergeStrategy
+ merge_strategy = MergeStrategy(
+ useIntelligentMerging=True,
+ groupBy="typeGroup",
+ orderBy="id",
+ mergeType="concatenate"
+ )
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
@@ -365,12 +427,13 @@ class AiObjects:
content_parts.append(content_part)
# Use existing merging system
- merge_strategy = {
- "useIntelligentMerging": True,
- "groupBy": "typeGroup",
- "orderBy": "id",
- "mergeType": "concatenate"
- }
+ from modules.datamodels.datamodelExtraction import MergeStrategy
+ merge_strategy = MergeStrategy(
+ useIntelligentMerging=True,
+ groupBy="typeGroup",
+ orderBy="id",
+ mergeType="concatenate"
+ )
from modules.services.serviceExtraction.subPipeline import _applyMerging
merged_parts = _applyMerging(content_parts, merge_strategy)
@@ -462,118 +525,8 @@ class AiObjects:
errorCount=0
)
- async def callImage(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None, options: AiCallOptions = None) -> AiCallResponse:
- """Call AI model for image analysis with fallback mechanism."""
-
- if options is None:
- options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
-
- # Get fallback models for image analysis
- availableModels = modelRegistry.getAvailableModels()
- failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
-
- if not failoverModelList:
- errorMsg = f"No suitable models found for image analysis"
- logger.error(errorMsg)
- return AiCallResponse(
- content=errorMsg,
- modelName="error",
- priceUsd=0.0,
- processingTime=0.0,
- bytesSent=0,
- bytesReceived=0,
- errorCount=1
- )
-
- # Try each model in fallback sequence
- lastError = None
- for attempt, model in enumerate(failoverModelList):
- try:
- logger.info(f"Attempting image analysis with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
-
- # Call the model
- response = await self._callImageWithModel(model, prompt, imageData, mimeType)
-
- logger.info(f"✅ Image analysis successful with model: {model.name}")
- return response
-
- except Exception as e:
- lastError = e
- logger.warning(f"❌ Image analysis failed with model {model.name}: {str(e)}")
-
- # If this is not the last model, try the next one
- if attempt < len(failoverModelList) - 1:
- logger.info(f"🔄 Trying next fallback model for image analysis...")
- continue
- else:
- # All models failed
- logger.error(f"💥 All {len(failoverModelList)} models failed for image analysis")
- break
-
- # All fallback attempts failed - return error response
- errorMsg = f"All AI models failed for image analysis. Last error: {str(lastError)}"
- logger.error(errorMsg)
- return AiCallResponse(
- content=errorMsg,
- modelName="error",
- priceUsd=0.0,
- processingTime=0.0,
- bytesSent=0,
- bytesReceived=0,
- errorCount=1
- )
-
- async def _callImageWithModel(self, model: AiModel, prompt: str, imageData: Union[str, bytes], mimeType: str) -> AiCallResponse:
- """Call a specific model for image analysis and return the response."""
- # Calculate input bytes from prompt and image data
- promptBytes = len(prompt.encode('utf-8'))
- if isinstance(imageData, str):
- # Base64 encoded string
- imageBytes = len(imageData.encode('utf-8'))
- else:
- # Raw bytes
- imageBytes = len(imageData)
- inputBytes = promptBytes + imageBytes
-
- # Start timing
- startTime = time.time()
-
- # Create standardized call object for image analysis
- modelCall = AiModelCall(
- messages=[{"role": "user", "content": prompt}],
- model=model,
- options=AiCallOptions(imageData=imageData, mimeType=mimeType)
- )
-
- # Call the model with standardized interface
- if model.functionCall:
- modelResponse = await model.functionCall(modelCall)
-
- # Extract content from standardized response
- if not modelResponse.success:
- raise ValueError(f"Model call failed: {modelResponse.error}")
- content = modelResponse.content
- else:
- raise ValueError(f"Model {model.name} has no function call defined")
-
- # Calculate timing and output bytes
- endTime = time.time()
- processingTime = endTime - startTime
- outputBytes = len(content.encode("utf-8"))
-
- # Calculate price using model's own price calculation method
- priceUsd = model.calculatePriceUsd(processingTime, inputBytes, outputBytes)
-
- return AiCallResponse(
- content=content,
- modelName=model.name,
- priceUsd=priceUsd,
- processingTime=processingTime,
- bytesSent=inputBytes,
- bytesReceived=outputBytes,
- errorCount=0
- )
+ # AI for Image Generation
async def generateImage(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid", options: AiCallOptions = None) -> AiCallResponse:
"""Generate an image using AI."""
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index 2b876de3..27434915 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -100,7 +100,7 @@ class AiService:
mimeType: str = None,
options: Optional[AiCallOptions] = None,
) -> str:
- """Call AI for image analysis using interface.callImage()."""
+ """Call AI for image analysis using interface.call() with contentParts."""
await self._ensureAiObjectsInitialized()
return await self.coreAi.readImage(prompt, imageData, mimeType, options)
diff --git a/modules/services/serviceAi/subCoreAi.py b/modules/services/serviceAi/subCoreAi.py
index c0f6d6a4..0952e750 100644
--- a/modules/services/serviceAi/subCoreAi.py
+++ b/modules/services/serviceAi/subCoreAi.py
@@ -565,7 +565,7 @@ CRITICAL REQUIREMENTS:
mimeType: str = None,
options: Optional[AiCallOptions] = None,
) -> str:
- """Call AI for image analysis using interface.callImage()."""
+ """Call AI for image analysis using interface.call() with contentParts."""
try:
# Check if imageData is valid
if not imageData:
@@ -584,30 +584,51 @@ CRITICAL REQUIREMENTS:
# Override the operation type to ensure image analysis
options.operationType = OperationTypeEnum.IMAGE_ANALYSE
- self.services.utils.debugLogToFile(f"Calling aiObjects.callImage with operationType: {options.operationType}", "AI_SERVICE")
- logger.info(f"Calling aiObjects.callImage with operationType: {options.operationType}")
+ # Create content parts with image data
+ from modules.datamodels.datamodelExtraction import ContentPart
+ import base64
+
+ # ContentPart.data must be a string - convert bytes to base64 if needed
+ if isinstance(imageData, bytes):
+ imageDataStr = base64.b64encode(imageData).decode('utf-8')
+ else:
+ # Already a base64 string
+ imageDataStr = imageData
+
+ imagePart = ContentPart(
+ id="image_0",
+ parentId=None,
+ label="Image",
+ typeGroup="image",
+ mimeType=mimeType or "image/jpeg",
+ data=imageDataStr, # Must be a string (base64 encoded)
+ metadata={"imageAnalysis": True}
+ )
+
+ # Create request with content parts
+ from modules.datamodels.datamodelAi import AiCallRequest
+ request = AiCallRequest(
+ prompt=prompt,
+ context="",
+ options=options,
+ contentParts=[imagePart]
+ )
+
+ self.services.utils.debugLogToFile(f"Calling aiObjects.call() with operationType: {options.operationType}", "AI_SERVICE")
+ logger.info(f"Calling aiObjects.call() with operationType: {options.operationType}")
# Write image analysis prompt to debug file
self.services.utils.writeDebugFile(prompt, "image_analysis_prompt")
- response = await self.aiObjects.callImage(prompt, imageData, mimeType, options)
+ response = await self.aiObjects.call(request)
# Write image analysis response to debug file
- result = response.content if hasattr(response, 'content') else str(response)
+ # response is an AiCallResponse object
+ result = response.content
self.services.utils.writeDebugFile(result, "image_analysis_response")
- # Emit stats for image analysis
- self.services.workflow.storeWorkflowStat(
- self.services.currentWorkflow,
- response,
- f"ai.image.{options.operationType}"
- )
-
# Debug the result
- self.services.utils.debugLogToFile(f"Raw AI result type: {type(response)}, value: {repr(response)}", "AI_SERVICE")
-
- # Extract content from response
- result = response.content if hasattr(response, 'content') else str(response)
+ self.services.utils.debugLogToFile(f"AI image analysis result type: {type(response)}, content length: {len(result)}", "AI_SERVICE")
# Check if result is valid
if not result or (isinstance(result, str) and not result.strip()):
diff --git a/test_ai_models.py b/test_ai_models.py
index 37772ee3..07485087 100644
--- a/test_ai_models.py
+++ b/test_ai_models.py
@@ -1,9 +1,31 @@
#!/usr/bin/env python3
"""
-AI Models Test - Tests WEB_CRAWL functionality on all models that support it
+AI Models Test - Tests IMAGE_ANALYSE functionality on all models that support it
-This script tests all models that have WEB_CRAWL capability, validates that
-they can crawl specific URLs and return content, and analyzes the quality of results.
+This script tests all models that have IMAGE_ANALYSE capability, validates that
+they can analyze images and return structured content, and analyzes the quality of results.
+
+CODE FLOW ANALYSIS:
+
+1. methodAi.process() is called by AI planner with prompt and documents (images)
+2. mainServiceAi.callAiDocuments() is called
+ -> delegates to subCoreAi.callAiDocuments()
+ -> which calls subDocumentProcessing.callAiText()
+ -> which processes chunks and detects images
+ -> for image chunks, calls subCoreAi.readImage()
+ -> which calls aiObjects.callImage() with operationType=IMAGE_ANALYSE
+
+OR direct call:
+- mainServiceAi.readImage() can be called directly (used in this test)
+ -> delegates to subCoreAi.readImage()
+ -> which calls aiObjects.callImage() with operationType=IMAGE_ANALYSE
+
+WHERE FUNCTIONS ARE USED:
+- mainServiceAi.readImage(): Public API entry point for direct image analysis
+- mainServiceAi.generateImage(): Public API entry point for image generation
+- subCoreAi.readImage(): Internal implementation, called by document processing or directly
+- subCoreAi.generateImage(): Internal implementation, called by mainServiceAi
+- subDocumentProcessing._processChunksWithMapping(): Detects image chunks and calls readImage()
"""
import asyncio
@@ -53,6 +75,22 @@ class AIModelsTester:
import shutil
shutil.copy2(testImageSource, testImageDest)
print(f"📷 Test image copied to: {testImageDest}")
+
+ # Find test image
+ self.testImagePath = None
+ if os.path.exists(testImageDest):
+ self.testImagePath = testImageDest
+ else:
+ # Try to find any image in modeltest directory
+ for file in os.listdir(self.modelTestDir):
+ if file.lower().endswith(('.jpg', '.jpeg', '.png')):
+ self.testImagePath = os.path.join(self.modelTestDir, file)
+ break
+
+ if self.testImagePath:
+ print(f"📷 Using test image: {self.testImagePath}")
+ else:
+ print(f"⚠️ No test image found in {self.modelTestDir}")
async def initialize(self):
"""Initialize the AI service."""
@@ -65,14 +103,18 @@ class AIModelsTester:
from modules.aicore.aicorePluginTavily import AiTavily
from modules.aicore.aicorePluginPerplexity import AiPerplexity
- # Register web connectors that support WEB_CRAWL
- modelRegistry.registerConnector(AiTavily())
- modelRegistry.registerConnector(AiPerplexity())
+ # Note: We don't need to register web connectors for IMAGE_ANALYSE testing
+ # modelRegistry.registerConnector(AiTavily())
+ # modelRegistry.registerConnector(AiPerplexity())
# The AI service needs to be recreated with proper initialization
from modules.services.serviceAi.mainServiceAi import AiService
self.services.ai = await AiService.create(self.services)
+ # Also initialize extraction service for image processing
+ from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
+ self.services.extraction = ExtractionService(self.services)
+
# Create a minimal workflow context
from modules.datamodels.datamodelChat import ChatWorkflow
import uuid
@@ -98,228 +140,150 @@ class AIModelsTester:
print(f"📁 Results will be saved to: {self.modelTestDir}")
async def testModel(self, modelName: str) -> Dict[str, Any]:
- """Test a specific AI model with WEB_CRAWL operation."""
+ """Test a specific AI model with IMAGE_ANALYSE operation."""
print(f"\n{'='*60}")
print(f"TESTING MODEL: {modelName}")
- print(f"OPERATION TYPE: WEB_CRAWL")
+ print(f"OPERATION TYPE: IMAGE_ANALYSE")
print(f"{'='*60}")
- # CRAWL CONFIGURATION
- # Deep and Broad Web Crawl Example:
- # - maxDepth: 3 (deep) - follows links up to 3 levels from starting page
- # - Level 1: Starting page
- # - Level 2: Pages linked from starting page
- # - Level 3: Pages linked from Level 2 pages
- # - maxWidth: 50 (broad) - crawls up to 50 pages at each depth level
- # This results in potential maximum of ~1,250 pages (if 50 links exist at each level)
- #
- # Common configurations:
- # - Fast/Overview: maxDepth=1, maxWidth=5 (shallow, focused)
- # - General/Standard: maxDepth=2, maxWidth=10 (balanced)
- # - Deep and Broad: maxDepth=3, maxWidth=50 (comprehensive)
+ # Check if test image exists
+ if not self.testImagePath or not os.path.exists(self.testImagePath):
+ result = {
+ "modelName": modelName,
+ "status": "ERROR",
+ "processingTime": 0.0,
+ "responseLength": 0,
+ "responseType": "error",
+ "hasContent": False,
+ "error": "No test image available",
+ "fullResponse": ""
+ }
+ self.testResults.append(result)
+ return result
- CRAWL_DEPTH = 3 # Deep crawl: follows links 3 levels deep
- CRAWL_WIDTH = 50 # Broad crawl: up to 50 pages per level
-
- print(f"Crawl Configuration:")
- print(f" - Depth: {CRAWL_DEPTH} levels (deep)")
- print(f" - Width: {CRAWL_WIDTH} pages per level (broad)")
- print(f" - Theoretical max: {CRAWL_WIDTH ** min(CRAWL_DEPTH, 3)} pages")
-
- # Use WEB_CRAWL specific prompt format
- from modules.datamodels.datamodelAi import AiCallPromptWebCrawl
-
- # Test with simple prompt like playground example
- simplePrompt = f"https://www.valueon.ch: Who works in this company?"
-
- # But keep structured format for now to match our API
- testPrompt = json.dumps({
- "instruction": "Who works in this company?",
- "url": "https://www.valueon.ch",
- "maxDepth": CRAWL_DEPTH,
- "maxWidth": CRAWL_WIDTH
- }, indent=2)
-
- print(f"Simple prompt (playground style): {simplePrompt}")
-
- # For Tavily models, test direct API call for better link following
- if "tavily" in modelName.lower():
- return await self._testTavilyDirect(modelName, CRAWL_DEPTH, CRAWL_WIDTH)
+ # Test prompt for image analysis
+ testPrompt = "Analyze this image and describe what you see. Extract any text, numbers, or structured data."
+ print(f"Test image: {self.testImagePath}")
print(f"Test prompt: {testPrompt}")
- print(f"Prompt length: {len(testPrompt)} characters")
+
+ # Load image data
+ with open(self.testImagePath, 'rb') as f:
+ imageData = f.read()
+
+ print(f"Image size: {len(imageData)} bytes")
+
+ # Determine image MIME type from extension
+ if self.testImagePath.lower().endswith('.png'):
+ mimeType = "image/png"
+ elif self.testImagePath.lower().endswith(('.jpg', '.jpeg')):
+ mimeType = "image/jpeg"
+ else:
+ mimeType = "image/jpeg" # Default
+
+ print(f"Image MIME type: {mimeType}")
startTime = asyncio.get_event_loop().time()
try:
- # Create options for WEB_CRAWL operation
- options = AiCallOptions(
- operationType=OperationTypeEnum.WEB_CRAWL,
- preferredModel=modelName
- )
-
- # Call the AI service DIRECTLY through the model's functionCall
- # This tests the actual model, not the document generation pipeline
- # Get the model directly from the registry using the model registry
+ # Get model directly from registry and test it
from modules.aicore.aicoreModelRegistry import modelRegistry
model = modelRegistry.getModel(modelName)
if not model:
raise Exception(f"Model {modelName} not found")
- # Create AiModelCall and call the model's functionCall directly
- from modules.datamodels.datamodelAi import AiModelCall
+ # Import base64 for image data conversion
import base64
- import os
- # For WEB_CRAWL models, use normal functionCall with structured prompt
- messages = [{"role": "user", "content": testPrompt}]
+ # Convert image data to base64 string
+ if isinstance(imageData, bytes):
+ imageDataStr = base64.b64encode(imageData).decode('utf-8')
+ else:
+ imageDataStr = imageData
+
+ # Create messages in vision format
+ messages = [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": testPrompt},
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:{mimeType};base64,{imageDataStr}"
+ }
+ }
+ ]
+ }
+ ]
+
+ # Create model call
+ from modules.datamodels.datamodelAi import AiModelCall, AiCallOptions
modelCall = AiModelCall(
messages=messages,
model=model,
- options=options
+ options=AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
)
- response = await model.functionCall(modelCall)
+
+ # Call model directly
+ print(f"Calling model.functionCall() for {modelName}")
+ modelResponse = await model.functionCall(modelCall)
+
+ if not modelResponse.success:
+ raise Exception(f"Model call failed: {modelResponse.error}")
+
+ result = modelResponse.content
endTime = asyncio.get_event_loop().time()
processingTime = endTime - startTime
- # Analyze response - now we get AiModelResponse objects
- if hasattr(response, 'success'):
- # AiModelResponse object
- if response.success:
- result = {
- "modelName": modelName,
- "status": "SUCCESS",
- "processingTime": round(processingTime, 2),
- "responseLength": len(response.content) if response.content else 0,
- "responseType": "AiModelResponse",
- "hasContent": bool(response.content),
- "error": None,
- "modelUsed": modelName,
- "priceUsd": 0.0, # AiModelResponse doesn't have price info
- "bytesSent": 0,
- "bytesReceived": len(response.content.encode('utf-8')) if response.content else 0
- }
-
- # Extract actual prompt sent if available in metadata
- if hasattr(response, 'metadata') and response.metadata:
- result["actualPromptSent"] = response.metadata.get("actualPromptSent", "N/A")
-
- # Try to parse content as JSON
- if response.content:
- try:
- json.loads(response.content)
- result["isValidJson"] = True
- except:
- result["isValidJson"] = False
-
- result["responsePreview"] = response.content[:200] + "..." if len(response.content) > 200 else response.content
- result["fullResponse"] = response.content
- else:
- result["isValidJson"] = False
- result["responsePreview"] = "Empty response"
- result["fullResponse"] = ""
-
- print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
- print(f"📄 Response length: {len(response.content) if response.content else 0} characters")
- print(f"📄 Model used: {modelName}")
- print(f"📄 Response preview: {result['responsePreview']}")
-
- else:
- error = response.error or "Unknown error"
- result = {
- "modelName": modelName,
- "status": "ERROR",
- "processingTime": round(processingTime, 2),
- "responseLength": 0,
- "responseType": "AiModelResponse",
- "hasContent": False,
- "error": error,
- "fullResponse": str(response)
- }
-
- print(f"❌ ERROR - {error}")
-
- elif isinstance(response, dict):
- # Fallback for dict responses
- if response.get("success", True):
- result = {
- "modelName": modelName,
- "status": "SUCCESS",
- "processingTime": round(processingTime, 2),
- "responseLength": len(str(response)),
- "responseType": "dict",
- "hasContent": True,
- "error": None
- }
-
- # Try to parse as JSON
- try:
- jsonResponse = json.dumps(response, indent=2)
- result["responsePreview"] = jsonResponse[:200] + "..." if len(jsonResponse) > 200 else jsonResponse
- result["isValidJson"] = True
- result["fullResponse"] = jsonResponse
- except:
- result["responsePreview"] = str(response)[:200] + "..." if len(str(response)) > 200 else str(response)
- result["isValidJson"] = False
- result["fullResponse"] = str(response)
-
- print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
- print(f"📄 Response length: {len(str(response))} characters")
- print(f"📄 Response preview: {result['responsePreview']}")
-
- else:
- error = response.get("error", "Unknown error")
- result = {
- "modelName": modelName,
- "status": "ERROR",
- "processingTime": round(processingTime, 2),
- "responseLength": 0,
- "responseType": "error",
- "hasContent": False,
- "error": error,
- "fullResponse": str(response)
- }
-
- print(f"❌ ERROR - {error}")
-
- else:
- # String response
- result = {
+ # Analyze result (string response from readImage)
+ if result:
+ analysisResult = {
"modelName": modelName,
"status": "SUCCESS",
"processingTime": round(processingTime, 2),
- "responseLength": len(str(response)),
+ "responseLength": len(result) if result else 0,
"responseType": "string",
"hasContent": True,
- "error": None
+ "error": None,
+ "testPrompt": testPrompt,
+ "imagePath": self.testImagePath,
+ "imageSize": len(imageData),
+ "mimeType": mimeType
}
# Try to parse as JSON
try:
- json.loads(str(response))
- result["isValidJson"] = True
+ json.loads(result)
+ analysisResult["isValidJson"] = True
except:
- result["isValidJson"] = False
+ analysisResult["isValidJson"] = False
- result["responsePreview"] = str(response)[:200] + "..." if len(str(response)) > 200 else str(response)
- result["fullResponse"] = str(response)
+ analysisResult["responsePreview"] = result[:200] + "..." if len(result) > 200 else result
+ analysisResult["fullResponse"] = result
print(f"✅ SUCCESS - Processing time: {processingTime:.2f}s")
- print(f"📄 Response length: {len(str(response))} characters")
- print(f"📄 Response preview: {result['responsePreview']}")
-
- # Add prompt to result for logging
- result["testPrompt"] = testPrompt
- result["crawlConfig"] = {
- "depth": CRAWL_DEPTH,
- "width": CRAWL_WIDTH
- }
-
- # For WEB_CRAWL, also validate that content was extracted
- if result.get("status") == "SUCCESS" and result.get("fullResponse"):
- self._validateCrawlResponse(modelName, result)
+ print(f"📄 Response length: {len(result)} characters")
+ print(f"📄 Response preview: {analysisResult['responsePreview']}")
+
+ result = analysisResult
+
+ # Validate that content was extracted
+ if result.get("status") == "SUCCESS" and result.get("fullResponse"):
+ self._validateImageResponse(modelName, result)
+ else:
+ result = {
+ "modelName": modelName,
+ "status": "ERROR",
+ "processingTime": round(processingTime, 2),
+ "responseLength": 0,
+ "responseType": "error",
+ "hasContent": False,
+ "error": "Empty response",
+ "fullResponse": ""
+ }
except Exception as e:
endTime = asyncio.get_event_loop().time()
@@ -334,10 +298,9 @@ class AIModelsTester:
"hasContent": False,
"error": str(e),
"testPrompt": testPrompt,
- "crawlConfig": {
- "depth": CRAWL_DEPTH,
- "width": CRAWL_WIDTH
- }
+ "imagePath": self.testImagePath,
+ "imageSize": len(imageData) if imageData else 0,
+ "mimeType": mimeType
}
print(f"💥 EXCEPTION - {str(e)}")
@@ -346,7 +309,7 @@ class AIModelsTester:
# Save text response even for exceptions to log the prompt
if result.get("status") in ["SUCCESS", "EXCEPTION", "ERROR"]:
- self._saveTextResponse(modelName, result)
+ self._saveImageResponse(modelName, result)
# Save individual model result immediately
self._saveIndividualModelResult(modelName, result)
@@ -354,54 +317,48 @@ class AIModelsTester:
return result
def _saveImageResponse(self, modelName: str, result: Dict[str, Any]):
- """Save base64 image response to file."""
+ """Save image analysis response to file."""
try:
- fullResponse = result.get("fullResponse", "")
- base64Data = None
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ filename = f"{modelName}_{timestamp}.txt"
+ filepath = os.path.join(self.modelTestDir, filename)
- # Try to extract base64 data from response
- if isinstance(fullResponse, dict):
- # Look for base64 data in the response
- if "content" in fullResponse:
- base64Data = fullResponse["content"]
- elif "data" in fullResponse:
- base64Data = fullResponse["data"]
- elif "image" in fullResponse:
- base64Data = fullResponse["image"]
- else:
- # Try to find base64 data in string response
- import re
- base64Match = re.search(r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)', str(fullResponse))
- if base64Match:
- base64Data = base64Match.group(1)
- else:
- # Try to find pure base64 string
- base64Match = re.search(r'([A-Za-z0-9+/=]{100,})', str(fullResponse))
- if base64Match:
- base64Data = base64Match.group(1)
+ # Prepare content for saving
+ content = result.get("fullResponse", "")
+ if not content:
+ content = result.get("responsePreview", "No content available")
- if base64Data:
- # Clean base64 data
- if base64Data.startswith('data:image/'):
- base64Data = base64Data.split(',', 1)[1]
-
- # Decode and save image
- imageData = base64.b64decode(base64Data)
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
- filename = f"{modelName}_{timestamp}.png"
- filepath = os.path.join(self.modelTestDir, filename)
-
- with open(filepath, 'wb') as f:
- f.write(imageData)
-
- result["savedImage"] = filepath
- print(f"🖼️ Image saved: {filepath}")
- else:
- print(f"⚠️ No base64 image data found in response")
+ # If there's an error, include it in the content
+ if result.get("error"):
+ content = f"ERROR: {result.get('error')}\n\n{content}"
+
+ # Add metadata header
+ metadata = f"""Model: {modelName}
+Test Time: {timestamp}
+Status: {result.get('status', 'Unknown')}
+Processing Time: {result.get('processingTime', 0):.2f}s
+Response Length: {result.get('responseLength', 0)} characters
+Is Valid JSON: {result.get('isValidJson', False)}
+Image Path: {result.get('imagePath', 'N/A')}
+Image Size: {result.get('imageSize', 'N/A')} bytes
+MIME Type: {result.get('mimeType', 'N/A')}
+
+--- TEST PROMPT ---
+{result.get('testPrompt', 'N/A')}
+
+--- RESPONSE CONTENT ---
+{content}
+"""
+
+ with open(filepath, 'w', encoding='utf-8') as f:
+ f.write(metadata)
+
+ result["savedTextFile"] = filepath
+ print(f"📄 Analysis response saved: {filepath}")
except Exception as e:
- print(f"❌ Error saving image: {str(e)}")
- result["imageSaveError"] = str(e)
+ print(f"❌ Error saving analysis response: {str(e)}")
+ result["saveError"] = str(e)
def _saveTextResponse(self, modelName: str, result: Dict[str, Any]):
"""Save text response to file."""
@@ -504,6 +461,41 @@ Width: {crawlWidth}
print(f"❌ Error validating crawl response: {str(e)}")
result["crawlValidationError"] = str(e)
+ def _validateImageResponse(self, modelName: str, result: Dict[str, Any]):
+ """Validate that the IMAGE_ANALYSE response contains analyzed content."""
+ try:
+ content = result.get("fullResponse", "")
+
+ # Check if content is meaningful
+ hasContent = bool(content and len(content.strip()) > 0)
+ contentLength = len(content)
+
+ result["hasContent"] = hasContent
+ result["contentLength"] = contentLength
+
+ # Try to determine what kind of content was extracted
+ if hasContent:
+ # Check if it's structured data
+ isStructured = False
+ try:
+ parsed = json.loads(content)
+ if isinstance(parsed, dict):
+ isStructured = True
+ except:
+ pass
+
+ result["isStructured"] = isStructured
+
+ print(f"✅ Successfully analyzed image")
+ print(f" Content length: {contentLength} characters")
+ print(f" Is structured: {'Yes' if isStructured else 'No'}")
+ else:
+ print(f"⚠️ Empty or invalid image analysis response")
+
+ except Exception as e:
+ print(f"❌ Error validating image response: {str(e)}")
+ result["validationError"] = str(e)
+
async def _testTavilyDirect(self, modelName: str, crawlDepth: int = 3, crawlWidth: int = 50) -> Dict[str, Any]:
"""Test Tavily API directly using the crawl() method with better link following."""
print(f"\n{'='*60}")
@@ -660,30 +652,30 @@ Width: {crawlWidth}
print(f"❌ Error saving individual result: {str(e)}")
def getAllAvailableModels(self) -> List[str]:
- """Get all available model names that support WEB_CRAWL."""
+ """Get all available model names that support IMAGE_ANALYSE."""
from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.datamodels.datamodelAi import OperationTypeEnum
# Get all models from registry
allModels = modelRegistry.getAvailableModels()
- # Filter models that support WEB_CRAWL
- webCrawlModels = []
+ # Filter models that support IMAGE_ANALYSE
+ imageAnalyseModels = []
for model in allModels:
if model.operationTypes and any(
- ot.operationType == OperationTypeEnum.WEB_CRAWL
+ ot.operationType == OperationTypeEnum.IMAGE_ANALYSE
for ot in model.operationTypes
- ): # Include both Tavily and Perplexity models
- webCrawlModels.append(model.name)
+ ):
+ imageAnalyseModels.append(model.name)
- # Filter to only "sonar" model for testing
- webCrawlModels = [m for m in webCrawlModels if m == "sonar"]
+ # Filter to common models for testing (remove filter to test all models)
+ # imageAnalyseModels = [m for m in imageAnalyseModels if "gpt" in m.lower() or "claude" in m.lower()]
- print(f"Found {len(webCrawlModels)} models that support WEB_CRAWL (filtered to sonar):")
- for modelName in webCrawlModels:
+ print(f"Found {len(imageAnalyseModels)} models that support IMAGE_ANALYSE:")
+ for modelName in imageAnalyseModels:
print(f" - {modelName}")
- return webCrawlModels
+ return imageAnalyseModels
def saveTestResults(self):
"""Save detailed test results to file."""
@@ -802,7 +794,7 @@ async def main():
"""Run AI models testing for WEB_CRAWL operation."""
tester = AIModelsTester()
- print("Starting AI Models Testing for WEB_CRAWL...")
+ print("Starting AI Models Testing for IMAGE_ANALYSE...")
print("Initializing AI service...")
await tester.initialize()
@@ -814,9 +806,9 @@ async def main():
print(f" {i}. {model}")
print(f"\n{'='*80}")
- print("STARTING WEB_CRAWL TESTS")
+ print("STARTING IMAGE_ANALYSE TESTS")
print(f"{'='*80}")
- print("Testing each model's ability to crawl URLs and return content...")
+ print("Testing each model's ability to analyze images and return structured content...")
print("Press Enter after each model test to continue to the next one...")
# Test each model individually
@@ -840,7 +832,7 @@ async def main():
print("TESTING COMPLETED")
print(f"{'='*80}")
print(f"📄 Results saved to: {resultsFile}")
- print(f"📁 Images saved to: {tester.modelTestDir}")
+ print(f"📁 Test results saved to: {tester.modelTestDir}")
if __name__ == "__main__":
asyncio.run(main())