From 8dfb7caf922c6bc18f8a720cd570b6d31849ea56 Mon Sep 17 00:00:00 2001
From: patrick-motsch <p.motsch@valueon.ch>
Date: Fri, 6 Feb 2026 13:34:50 +0100
Subject: [PATCH] neues text model

---
 modules/aicore/aicorePluginPrivateLlm.py | 26 ++++++++++++------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/modules/aicore/aicorePluginPrivateLlm.py b/modules/aicore/aicorePluginPrivateLlm.py
index 3b9754d2..84a5a6b4 100644
--- a/modules/aicore/aicorePluginPrivateLlm.py
+++ b/modules/aicore/aicorePluginPrivateLlm.py
@@ -217,35 +217,35 @@ class AiPrivateLlm(BaseConnectorAi):
         
         # Define all models with their Ollama backend names
         # Actual model specs (for 32GB RAM server):
-        # - deepseek-ocr: 3.34B params, 8K context, ~6.7GB RAM
-        # - qwen2.5vl:7b: 8.29B params, 125K context, ~6GB RAM
-        # - granite3.2-vision: 2B params, 16K context, ~2.4GB RAM
+        # - qwen2.5:7b: 7.6B params, 128K context, ~4.7GB RAM (Text)
+        # - qwen2.5vl:7b: 8.29B params, 125K context, ~6GB RAM (Vision)
+        # - granite3.2-vision: 2B params, 16K context, ~2.4GB RAM (Vision)
         modelDefinitions = [
-            # OCR Text Model (deepseek-ocr: 3.34B, 8K context)
+            # Text Model (qwen2.5:7b: 7.6B, 128K context)
             {
                 "model": AiModel(
-                    name="poweron-ocr-general",
-                    displayName="PowerOn OCR General",
+                    name="poweron-text-general",
+                    displayName="PowerOn Text General",
                     connectorType="privatellm",
                     apiUrl=f"{self.baseUrl}/api/analyze",
                     temperature=0.1,
-                    maxTokens=4096,
-                    contextLength=8192,  # deepseek-ocr actual context: 8K
+                    maxTokens=8192,
+                    contextLength=128000,  # qwen2.5:7b actual context: 128K
                     costPer1kTokensInput=0.0,  # Flat rate pricing
                     costPer1kTokensOutput=0.0,  # Flat rate pricing
-                    speedRating=8,  # Fast due to smaller model
-                    qualityRating=8,
+                    speedRating=8,  # Fast and efficient
+                    qualityRating=9,  # High quality text model
                     functionCall=self.callAiText,
                     priority=PriorityEnum.COST,
                     processingMode=ProcessingModeEnum.BASIC,
                     operationTypes=createOperationTypeRatings(
                         (OperationTypeEnum.DATA_EXTRACT, 9),
-                        (OperationTypeEnum.DATA_ANALYSE, 7),
+                        (OperationTypeEnum.DATA_ANALYSE, 9),
                     ),
-                    version="deepseek-ocr",
+                    version="qwen2.5:7b",
                     calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_TEXT_PER_CALL
                 ),
-                "ollamaModel": "deepseek-ocr"
+                "ollamaModel": "qwen2.5:7b"
             },
             # Vision General Model (qwen2.5vl:7b: 8.29B, 125K context)
             {