From bb10a46cd5db09bfdc648c7f68314a707b48d0f7 Mon Sep 17 00:00:00 2001
From: patrick-motsch
Date: Fri, 6 Feb 2026 10:26:54 +0100
Subject: [PATCH] integrated privateLLM
---
app.py | 6 +
env_dev.env | 1 +
env_int.env | 1 +
env_prod.env | 1 +
modules/aicore/aicorePluginPrivateLlm.py | 496 +++++++++++++++++++++++
5 files changed, 505 insertions(+)
create mode 100644 modules/aicore/aicorePluginPrivateLlm.py
diff --git a/app.py b/app.py
index 609d0c07..9aa05093 100644
--- a/app.py
+++ b/app.py
@@ -404,10 +404,16 @@ def getAllowedOrigins():
return origins
+# CORS origin regex pattern for wildcard subdomain support
+# Matches all subdomains of poweron.swiss and poweron-center.net
+CORS_ORIGIN_REGEX = r"https://.*\.(poweron\.swiss|poweron-center\.net)"
+
+
# CORS configuration using environment variables
app.add_middleware(
CORSMiddleware,
allow_origins=getAllowedOrigins(),
+ allow_origin_regex=CORS_ORIGIN_REGEX,
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
allow_headers=["*"],
diff --git a/env_dev.env b/env_dev.env
index ac5349a7..5339bbaf 100644
--- a/env_dev.env
+++ b/env_dev.env
@@ -40,6 +40,7 @@ Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQnBaSnM4TWFRRmxVQmNQblVIYmc1Y0Q3
Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpENmFBWG16STFQUVZxNzZZRzRLYTA4X3lRanF1VkF4cU45OExNMzlsQmdISGFxTUxud1dXODBKcFhMVG9KNjdWVnlTTFFROVc3NDlsdlNHLUJXeG41NDBHaXhHR0VHVWl5UW9RNkVWbmlhakRKVW5pM0R4VHk0LUw0TV9LdkljNHdBLXJua21NQkl2b3l4UkVkMGN1YjBrMmJEeWtMay1jbmxrYWJNbUV0aktCXzU1djR2d2RSQXZORTNwcG92ZUVvVGMtQzQzTTVncEZTRGRtZUFIZWQ0dz09
Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQm82Mzk2Q1MwZ0dNcUVBcUtuRDJIcTZkMXVvYnpjM3JEMzJiT1NKSHljX282ZDIyZTJYc09VSTdVNXAtOWU2UXp5S193NTk5dHJsWlFjRjhWektFOG1DVGY4ZUhHTXMzS0RPN1lNcF9nSlVWbW5BZ1hkZDVTejl6bVZNRFVvX29xamJidWRFMmtjQmkyRUQ2RUh6UTN1aWNPSUJBPT0=
Connector_AiTavily_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQTdnUHMwd2pIaXNtMmtCTFREd0pyQXRKb1F5eGtHSnkyOGZiUnlBOFc0b3Vzcndrc3ViRm1nMDJIOEZKYWxqdWNkZGh5N0Z4R0JlQmxXSG5pVnJUR2VYckZhMWNMZ1FNeXJ3enJLVlpiblhOZTNleUg3ZzZyUzRZanFSeDlVMkI=
+Connector_AiPrivateLlm_API_SECRET = jL4vyNfh_tv4rxoRaHKW88sVWNHbj32GsxuKE2A8bf0
# Microsoft Service Configuration
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
diff --git a/env_int.env b/env_int.env
index 05313802..5534cbdf 100644
--- a/env_int.env
+++ b/env_int.env
@@ -40,6 +40,7 @@ Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4MENkQ2xJVmE5WFZKUkh2SHJF
Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjT1ZlRWVJdVZMT3ljSFJDcFdxRFBRVkZhS204NnN5RDBlQ0tpenhTM0FFVktuWW9mWHNwRWx2dHB0eDBSZ0JFQnZKWlp6c01pVGREWHd1eGpERnU0Q2xhaks1clQ1ZXVsdnd2ZzhpNXNQS1BhY3FjSkdkVEhHalNaRGR4emhpakZncnpDQUVxOHVXQzVUWmtQc0FsYmFwTF9TSG5FOUFtWk5Ick1NcHFvY2s1T1c2WXlRUFFJZnh6TWhuaVpMYmppcDR0QUx0a0R6RXlwbGRYb1R4dzJkUT09
Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQm82Mzk2UWZJdUFhSW8yc3RKc0tKRXphd0xWMkZOVlFpSGZ4SGhFWnk0cTF5VjlKQVZjdS1QSWdkS0pUSWw4OFU5MjUxdTVQel9aeWVIZTZ5TXRuVmFkZG0zWEdTOGdHMHpsTzI0TGlWYURKU1Q0VVpKTlhxUk5FTmN6SUJScDZ3ZldIaUJZcWpaQVRiSEpyQm9tRTNDWk9KTnZBPT0=
Connector_AiTavily_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkdkJMTDY0akhXNzZDWHVYSEt1cDZoOWEzSktneHZEV2JndTNmWlNSMV9KbFNIZmQzeVlrNE5qUEIwcUlBSGM1a0hOZ3J6djIyOVhnZzI3M1dIUkdicl9FVXF3RGktMmlEYmhnaHJfWTdGUkktSXVUSGdQMC1vSEV6VE8zR2F1SVk=
+Connector_AiPrivateLlm_API_SECRET = jL4vyNfh_tv4rxoRaHKW88sVWNHbj32GsxuKE2A8bf0
# Microsoft Service Configuration
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
diff --git a/env_prod.env b/env_prod.env
index 57a4e83c..a7b4512c 100644
--- a/env_prod.env
+++ b/env_prod.env
@@ -40,6 +40,7 @@ Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3U
Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09
Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQm82Mzk2Q1FGRkJEUkI4LXlQbHYzT2RkdVJEcmM4WGdZTWpJTEhoeUF1NW5LUVpJdDBYN3k1WFN4a2FQSWJSQmd0U0xJbzZDTmFFN05FcXl0Z3V1OEpsZjYydV94TXVjVjVXRTRYSWdLMkd5XzZIbFV6emRCZHpuOUpQeThadE5xcDNDVGV1RHJrUEN0c1BBYXctZFNWcFRuVXhRPT0=
Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg=
+Connector_AiPrivateLlm_API_SECRET = jL4vyNfh_tv4rxoRaHKW88sVWNHbj32GsxuKE2A8bf0
# Microsoft Service Configuration
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
diff --git a/modules/aicore/aicorePluginPrivateLlm.py b/modules/aicore/aicorePluginPrivateLlm.py
new file mode 100644
index 00000000..3b9754d2
--- /dev/null
+++ b/modules/aicore/aicorePluginPrivateLlm.py
@@ -0,0 +1,496 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+AI Connector for PowerOn Private-LLM Service.
+
+Connects to the private-llm service running on-premise with Ollama backend.
+Provides OCR and Vision capabilities via local AI models.
+
+Models:
+- poweron-ocr-general: Text extraction and OCR (deepseek backend)
+- poweron-vision-general: General vision tasks (qwen2.5vl backend)
+- poweron-vision-deep: Deep vision analysis (granite3.2 backend)
+
+Pricing (CHF per call):
+- Text models: CHF 0.010
+- Vision models: CHF 0.100
+"""
+
+import logging
+import httpx
+import time
+from typing import List, Optional, Dict, Any
+from fastapi import HTTPException
+from modules.shared.configuration import APP_CONFIG
+from .aicoreBase import BaseConnectorAi
+from modules.datamodels.datamodelAi import (
+ AiModel,
+ PriorityEnum,
+ ProcessingModeEnum,
+ OperationTypeEnum,
+ AiModelCall,
+ AiModelResponse,
+ createOperationTypeRatings
+)
+
+# Configure logger
+logger = logging.getLogger(__name__)
+
+# Pricing constants (CHF)
+PRICE_TEXT_PER_CALL = 0.01 # CHF 0.010 per text model call
+PRICE_VISION_PER_CALL = 0.10 # CHF 0.100 per vision model call
+
+
+# Private-LLM Service URL (fix, nicht via env konfigurierbar)
+PRIVATE_LLM_BASE_URL = "https://llm.poweron.swiss:8000"
+
+
+def _loadConfigData():
+ """Load configuration data for Private-LLM connector."""
+ return {
+ "apiKey": APP_CONFIG.get("Connector_AiPrivateLlm_API_SECRET"),
+ "baseUrl": PRIVATE_LLM_BASE_URL,
+ }
+
+
+class AiPrivateLlm(BaseConnectorAi):
+ """Connector for communication with the PowerOn Private-LLM Service."""
+
+ def __init__(self):
+ super().__init__()
+ # Load configuration
+ self.config = _loadConfigData()
+ self.apiKey = self.config["apiKey"]
+ self.baseUrl = self.config["baseUrl"]
+
+ # HTTP client for API calls
+ # Timeout set to 3600 seconds (60 minutes) for large model processing
+ headers = {"Content-Type": "application/json"}
+ if self.apiKey:
+ headers["X-API-Key"] = self.apiKey
+
+ self.httpClient = httpx.AsyncClient(
+ timeout=3600.0,
+ headers=headers
+ )
+
+ # Cache for service availability check
+ self._serviceAvailable: Optional[bool] = None
+ self._availableOllamaModels: Optional[List[str]] = None
+ self._lastAvailabilityCheck: float = 0
+ self._availabilityCacheTtl: float = 60.0 # 60 seconds cache
+
+ logger.info(f"Private-LLM Connector initialized (URL: {self.baseUrl})")
+
+ def getConnectorType(self) -> str:
+ """Get the connector type identifier."""
+ return "privatellm"
+
+ def _checkServiceAvailability(self) -> Dict[str, Any]:
+ """
+ Check if the Private-LLM service is available and which Ollama models are installed.
+ Uses caching to avoid excessive health checks.
+
+ Returns:
+ Dict with 'serviceAvailable', 'ollamaConnected', 'availableModels'
+ """
+ import asyncio
+
+ currentTime = time.time()
+
+ # Return cached result if still valid
+ if (self._serviceAvailable is not None and
+ currentTime - self._lastAvailabilityCheck < self._availabilityCacheTtl):
+ return {
+ "serviceAvailable": self._serviceAvailable,
+ "ollamaConnected": self._serviceAvailable,
+ "availableModels": self._availableOllamaModels or []
+ }
+
+ # Perform availability check
+ try:
+ # Use synchronous client for blocking check during initialization
+ with httpx.Client(timeout=5.0) as client:
+ headers = {"Content-Type": "application/json"}
+ if self.apiKey:
+ headers["X-API-Key"] = self.apiKey
+
+ # Check health endpoint
+ healthResponse = client.get(
+ f"{self.baseUrl}/api/health",
+ headers=headers
+ )
+
+ if healthResponse.status_code != 200:
+ logger.warning(f"Private-LLM service not available: HTTP {healthResponse.status_code}")
+ self._serviceAvailable = False
+ self._availableOllamaModels = []
+ self._lastAvailabilityCheck = currentTime
+ return {"serviceAvailable": False, "ollamaConnected": False, "availableModels": []}
+
+ healthData = healthResponse.json()
+ ollamaConnected = healthData.get("ollamaConnected", False)
+
+ if not ollamaConnected:
+ logger.warning("Private-LLM service available but Ollama not connected")
+ self._serviceAvailable = True
+ self._availableOllamaModels = []
+ self._lastAvailabilityCheck = currentTime
+ return {"serviceAvailable": True, "ollamaConnected": False, "availableModels": []}
+
+ # Check Ollama status for available models
+ statusResponse = client.get(
+ f"{self.baseUrl}/api/ollama/status",
+ headers=headers
+ )
+
+ if statusResponse.status_code == 200:
+ statusData = statusResponse.json()
+ self._availableOllamaModels = statusData.get("models", [])
+ else:
+ self._availableOllamaModels = []
+
+ self._serviceAvailable = True
+ self._lastAvailabilityCheck = currentTime
+
+ logger.info(f"Private-LLM availability check: service=OK, ollama=OK, models={len(self._availableOllamaModels)}")
+
+ return {
+ "serviceAvailable": True,
+ "ollamaConnected": True,
+ "availableModels": self._availableOllamaModels
+ }
+
+ except httpx.ConnectError:
+ logger.warning(f"Private-LLM service not reachable at {self.baseUrl}")
+ self._serviceAvailable = False
+ self._availableOllamaModels = []
+ self._lastAvailabilityCheck = currentTime
+ return {"serviceAvailable": False, "ollamaConnected": False, "availableModels": []}
+ except Exception as e:
+ logger.warning(f"Error checking Private-LLM availability: {e}")
+ self._serviceAvailable = False
+ self._availableOllamaModels = []
+ self._lastAvailabilityCheck = currentTime
+ return {"serviceAvailable": False, "ollamaConnected": False, "availableModels": []}
+
+ def _isModelAvailableInOllama(self, ollamaModelName: str, availableModels: List[str]) -> bool:
+ """
+ Check if a model is available in Ollama.
+ Handles model name variations (with/without tags).
+ """
+ if not availableModels:
+ return False
+
+ # Direct match
+ if ollamaModelName in availableModels:
+ return True
+
+ # Check without tag (e.g., "qwen2.5vl:72b" -> "qwen2.5vl")
+ baseModelName = ollamaModelName.split(":")[0]
+ for availModel in availableModels:
+ availBase = availModel.split(":")[0]
+ if baseModelName == availBase:
+ return True
+
+ return False
+
+ def getModels(self) -> List[AiModel]:
+ """
+ Get all available Private-LLM models.
+
+ Checks service availability and returns only models that are actually available
+ in the connected Ollama instance. Returns empty list if service is not reachable.
+ """
+ # Check service availability
+ availability = self._checkServiceAvailability()
+
+ if not availability["serviceAvailable"]:
+ logger.warning("Private-LLM service not available - no models returned")
+ return []
+
+ if not availability["ollamaConnected"]:
+ logger.warning("Private-LLM service available but Ollama not connected - no models returned")
+ return []
+
+ availableOllamaModels = availability.get("availableModels", [])
+
+ # Define all models with their Ollama backend names
+ # Actual model specs (for 32GB RAM server):
+ # - deepseek-ocr: 3.34B params, 8K context, ~6.7GB RAM
+ # - qwen2.5vl:7b: 8.29B params, 125K context, ~6GB RAM
+ # - granite3.2-vision: 2B params, 16K context, ~2.4GB RAM
+ modelDefinitions = [
+ # OCR Text Model (deepseek-ocr: 3.34B, 8K context)
+ {
+ "model": AiModel(
+ name="poweron-ocr-general",
+ displayName="PowerOn OCR General",
+ connectorType="privatellm",
+ apiUrl=f"{self.baseUrl}/api/analyze",
+ temperature=0.1,
+ maxTokens=4096,
+ contextLength=8192, # deepseek-ocr actual context: 8K
+ costPer1kTokensInput=0.0, # Flat rate pricing
+ costPer1kTokensOutput=0.0, # Flat rate pricing
+ speedRating=8, # Fast due to smaller model
+ qualityRating=8,
+ functionCall=self.callAiText,
+ priority=PriorityEnum.COST,
+ processingMode=ProcessingModeEnum.BASIC,
+ operationTypes=createOperationTypeRatings(
+ (OperationTypeEnum.DATA_EXTRACT, 9),
+ (OperationTypeEnum.DATA_ANALYSE, 7),
+ ),
+ version="deepseek-ocr",
+ calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_TEXT_PER_CALL
+ ),
+ "ollamaModel": "deepseek-ocr"
+ },
+ # Vision General Model (qwen2.5vl:7b: 8.29B, 125K context)
+ {
+ "model": AiModel(
+ name="poweron-vision-general",
+ displayName="PowerOn Vision General",
+ connectorType="privatellm",
+ apiUrl=f"{self.baseUrl}/api/analyze",
+ temperature=0.2,
+ maxTokens=8192,
+ contextLength=125000, # qwen2.5vl:7b actual context: 125K
+ costPer1kTokensInput=0.0, # Flat rate pricing
+ costPer1kTokensOutput=0.0, # Flat rate pricing
+ speedRating=7,
+ qualityRating=9,
+ functionCall=self.callAiVision,
+ priority=PriorityEnum.BALANCED,
+ processingMode=ProcessingModeEnum.ADVANCED,
+ operationTypes=createOperationTypeRatings(
+ (OperationTypeEnum.IMAGE_ANALYSE, 9),
+ (OperationTypeEnum.DATA_EXTRACT, 8),
+ ),
+ version="qwen2.5vl:7b",
+ calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_VISION_PER_CALL
+ ),
+ "ollamaModel": "qwen2.5vl:7b"
+ },
+ # Vision Deep Model (granite3.2-vision: 2B, 16K context)
+ {
+ "model": AiModel(
+ name="poweron-vision-deep",
+ displayName="PowerOn Vision Deep",
+ connectorType="privatellm",
+ apiUrl=f"{self.baseUrl}/api/analyze",
+ temperature=0.1,
+ maxTokens=4096,
+ contextLength=16000, # granite3.2-vision actual context: 16K
+ costPer1kTokensInput=0.0, # Flat rate pricing
+ costPer1kTokensOutput=0.0, # Flat rate pricing
+ speedRating=9, # Fast due to small 2B model
+ qualityRating=8, # Good for document understanding
+ functionCall=self.callAiVision,
+ priority=PriorityEnum.QUALITY,
+ processingMode=ProcessingModeEnum.DETAILED,
+ operationTypes=createOperationTypeRatings(
+ (OperationTypeEnum.IMAGE_ANALYSE, 9),
+ (OperationTypeEnum.DATA_EXTRACT, 9),
+ (OperationTypeEnum.DATA_ANALYSE, 8),
+ ),
+ version="granite3.2-vision",
+ calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_VISION_PER_CALL
+ ),
+ "ollamaModel": "granite3.2-vision"
+ },
+ ]
+
+ # Filter models by Ollama availability
+ availableModels = []
+ unavailableModels = []
+
+ for modelDef in modelDefinitions:
+ ollamaModelName = modelDef["ollamaModel"]
+ if self._isModelAvailableInOllama(ollamaModelName, availableOllamaModels):
+ availableModels.append(modelDef["model"])
+ else:
+ unavailableModels.append(modelDef["model"].name)
+
+ if unavailableModels:
+ logger.warning(
+ f"Private-LLM: {len(unavailableModels)} models not available in Ollama: {', '.join(unavailableModels)}. "
+ f"Install with: ollama pull "
+ )
+
+ if availableModels:
+ logger.info(f"Private-LLM: {len(availableModels)} models available")
+ else:
+ logger.warning("Private-LLM: No models available. Check Ollama installation.")
+
+ return availableModels
+
+ async def callAiText(self, modelCall: AiModelCall) -> AiModelResponse:
+ """
+ Call the Private-LLM API for text-based analysis.
+
+ Args:
+ modelCall: AiModelCall with messages
+
+ Returns:
+ AiModelResponse with content and metadata
+ """
+ try:
+ messages = modelCall.messages
+ model = modelCall.model
+
+ # Extract prompt from messages
+ prompt = ""
+ for msg in messages:
+ content = msg.get("content", "")
+ if isinstance(content, str):
+ prompt += content + "\n"
+ elif isinstance(content, list):
+ for part in content:
+ if isinstance(part, dict) and part.get("type") == "text":
+ prompt += part.get("text", "") + "\n"
+
+ payload = {
+ "modelName": model.name,
+ "prompt": prompt.strip(),
+ "imageBase64": None
+ }
+
+ logger.debug(f"Calling Private-LLM text API with model {model.name}")
+
+ response = await self.httpClient.post(
+ model.apiUrl,
+ json=payload
+ )
+
+ if response.status_code != 200:
+ errorMessage = f"Private-LLM API error: {response.status_code} - {response.text}"
+ logger.error(errorMessage)
+ raise HTTPException(status_code=500, detail=errorMessage)
+
+ responseJson = response.json()
+
+ if not responseJson.get("success", False):
+ errorMsg = responseJson.get("error", "Unknown error")
+ logger.error(f"Private-LLM returned error: {errorMsg}")
+ return AiModelResponse(
+ content="",
+ success=False,
+ error=errorMsg
+ )
+
+ # Extract content from response
+ data = responseJson.get("data", {})
+ rawResponse = responseJson.get("rawResponse", "")
+
+ # Prefer rawResponse for full content, fall back to data
+ content = rawResponse if rawResponse else str(data.get("response", data))
+
+ return AiModelResponse(
+ content=content,
+ success=True,
+ modelId=model.name,
+ metadata={"data": data}
+ )
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error calling Private-LLM text API: {str(e)}")
+ raise HTTPException(status_code=500, detail=f"Error calling Private-LLM API: {str(e)}")
+
+ async def callAiVision(self, modelCall: AiModelCall) -> AiModelResponse:
+ """
+ Call the Private-LLM API for vision-based analysis.
+
+ Args:
+ modelCall: AiModelCall with messages containing image data
+
+ Returns:
+ AiModelResponse with analysis content
+ """
+ try:
+ messages = modelCall.messages
+ model = modelCall.model
+
+ # Extract prompt and image from messages
+ prompt = ""
+ imageBase64 = None
+
+ for msg in messages:
+ content = msg.get("content", "")
+
+ if isinstance(content, str):
+ prompt += content + "\n"
+ elif isinstance(content, list):
+ for part in content:
+ if isinstance(part, dict):
+ if part.get("type") == "text":
+ prompt += part.get("text", "") + "\n"
+ elif part.get("type") == "image_url":
+ imageUrl = part.get("image_url", {}).get("url", "")
+ # Extract base64 from data URL
+ if imageUrl.startswith("data:"):
+ # Format: data:image/png;base64,
+ parts = imageUrl.split(",", 1)
+ if len(parts) == 2:
+ imageBase64 = parts[1]
+ else:
+ imageBase64 = imageUrl
+
+ if not imageBase64:
+ logger.warning("No image provided for vision model call")
+
+ payload = {
+ "modelName": model.name,
+ "prompt": prompt.strip(),
+ "imageBase64": imageBase64
+ }
+
+ logger.debug(f"Calling Private-LLM vision API with model {model.name}")
+
+ response = await self.httpClient.post(
+ model.apiUrl,
+ json=payload
+ )
+
+ if response.status_code != 200:
+ errorMessage = f"Private-LLM API error: {response.status_code} - {response.text}"
+ logger.error(errorMessage)
+ raise HTTPException(status_code=500, detail=errorMessage)
+
+ responseJson = response.json()
+
+ if not responseJson.get("success", False):
+ errorMsg = responseJson.get("error", "Unknown error")
+ logger.error(f"Private-LLM returned error: {errorMsg}")
+ return AiModelResponse(
+ content="",
+ success=False,
+ error=errorMsg
+ )
+
+ # Extract content from response
+ data = responseJson.get("data", {})
+ rawResponse = responseJson.get("rawResponse", "")
+
+ # Prefer rawResponse for full content
+ content = rawResponse if rawResponse else str(data.get("response", data))
+
+ return AiModelResponse(
+ content=content,
+ success=True,
+ modelId=model.name,
+ metadata={"data": data}
+ )
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error calling Private-LLM vision API: {str(e)}", exc_info=True)
+ return AiModelResponse(
+ content="",
+ success=False,
+ error=f"Error during vision analysis: {str(e)}"
+ )