From 8dfb7caf922c6bc18f8a720cd570b6d31849ea56 Mon Sep 17 00:00:00 2001 From: patrick-motsch Date: Fri, 6 Feb 2026 13:34:50 +0100 Subject: [PATCH] neues text model --- modules/aicore/aicorePluginPrivateLlm.py | 26 ++++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/modules/aicore/aicorePluginPrivateLlm.py b/modules/aicore/aicorePluginPrivateLlm.py index 3b9754d2..84a5a6b4 100644 --- a/modules/aicore/aicorePluginPrivateLlm.py +++ b/modules/aicore/aicorePluginPrivateLlm.py @@ -217,35 +217,35 @@ class AiPrivateLlm(BaseConnectorAi): # Define all models with their Ollama backend names # Actual model specs (for 32GB RAM server): - # - deepseek-ocr: 3.34B params, 8K context, ~6.7GB RAM - # - qwen2.5vl:7b: 8.29B params, 125K context, ~6GB RAM - # - granite3.2-vision: 2B params, 16K context, ~2.4GB RAM + # - qwen2.5:7b: 7.6B params, 128K context, ~4.7GB RAM (Text) + # - qwen2.5vl:7b: 8.29B params, 125K context, ~6GB RAM (Vision) + # - granite3.2-vision: 2B params, 16K context, ~2.4GB RAM (Vision) modelDefinitions = [ - # OCR Text Model (deepseek-ocr: 3.34B, 8K context) + # Text Model (qwen2.5:7b: 7.6B, 128K context) { "model": AiModel( - name="poweron-ocr-general", - displayName="PowerOn OCR General", + name="poweron-text-general", + displayName="PowerOn Text General", connectorType="privatellm", apiUrl=f"{self.baseUrl}/api/analyze", temperature=0.1, - maxTokens=4096, - contextLength=8192, # deepseek-ocr actual context: 8K + maxTokens=8192, + contextLength=128000, # qwen2.5:7b actual context: 128K costPer1kTokensInput=0.0, # Flat rate pricing costPer1kTokensOutput=0.0, # Flat rate pricing - speedRating=8, # Fast due to smaller model - qualityRating=8, + speedRating=8, # Fast and efficient + qualityRating=9, # High quality text model functionCall=self.callAiText, priority=PriorityEnum.COST, processingMode=ProcessingModeEnum.BASIC, operationTypes=createOperationTypeRatings( (OperationTypeEnum.DATA_EXTRACT, 9), - (OperationTypeEnum.DATA_ANALYSE, 7), + (OperationTypeEnum.DATA_ANALYSE, 9), ), - version="deepseek-ocr", + version="qwen2.5:7b", calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_TEXT_PER_CALL ), - "ollamaModel": "deepseek-ocr" + "ollamaModel": "qwen2.5:7b" }, # Vision General Model (qwen2.5vl:7b: 8.29B, 125K context) {