From 535bd431748609d2adec08dad7b3e26a127a26b3 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Thu, 11 Jun 2026 16:47:21 +0200
Subject: [PATCH] model fixes
---
app.py | 30 ++++++++++
modules/aicore/aicorePluginMistral.py | 3 +-
modules/aicore/aicorePluginOpenai.py | 24 +-------
modules/aicore/aicorePluginPrivateLlm.py | 52 ++++++++++++++--
modules/datamodels/datamodelKnowledge.py | 10 ++--
.../features/trustee/routeFeatureTrustee.py | 39 ++++++++----
modules/interfaces/interfaceDbKnowledge.py | 59 +++++++++++++++++++
modules/routes/routeDataFiles.py | 27 +++++++--
8 files changed, 196 insertions(+), 48 deletions(-)
diff --git a/app.py b/app.py
index 20ad435c..1ce31ae5 100644
--- a/app.py
+++ b/app.py
@@ -176,6 +176,20 @@ def initLogging():
pass
return True
+ # Suppress h11 LocalProtocolError ("Can't send data when our state is ERROR")
+ # from uvicorn when a client disconnects mid-response (browser abort, HMR, navigation).
+ # The asyncio event-loop handler (below) only catches event-loop-level exceptions;
+ # uvicorn logs this via the standard logging module before it reaches the event loop.
+ class ClientDisconnectFilter(logging.Filter):
+ def filter(self, record):
+ if record.exc_info:
+ excType = record.exc_info[0]
+ if excType and getattr(excType, "__name__", "") == "LocalProtocolError":
+ return False
+ if isinstance(record.msg, str) and "LocalProtocolError" in record.msg:
+ return False
+ return True
+
# Add filter to normalize problematic unicode (e.g., arrows) to ASCII for terminals like cp1252
class UnicodeArrowFilter(logging.Filter):
def filter(self, record):
@@ -204,6 +218,7 @@ def initLogging():
consoleHandler.addFilter(ChromeDevToolsFilter())
consoleHandler.addFilter(HttpcoreStarFilter())
consoleHandler.addFilter(HTTPDebugFilter())
+ consoleHandler.addFilter(ClientDisconnectFilter())
consoleHandler.addFilter(EmojiFilter())
consoleHandler.addFilter(UnicodeArrowFilter())
handlers.append(consoleHandler)
@@ -227,6 +242,7 @@ def initLogging():
fileHandler.addFilter(ChromeDevToolsFilter())
fileHandler.addFilter(HttpcoreStarFilter())
fileHandler.addFilter(HTTPDebugFilter())
+ fileHandler.addFilter(ClientDisconnectFilter())
fileHandler.addFilter(EmojiFilter())
fileHandler.addFilter(UnicodeArrowFilter())
handlers.append(fileHandler)
@@ -255,6 +271,12 @@ def initLogging():
for loggerName in noisyLoggers:
logging.getLogger(loggerName).setLevel(logging.WARNING)
+ # Apply ClientDisconnectFilter to uvicorn's own logger so the
+ # h11 LocalProtocolError is suppressed regardless of handler setup.
+ _disconnectFilter = ClientDisconnectFilter()
+ for _uvName in ("uvicorn.error", "uvicorn"):
+ logging.getLogger(_uvName).addFilter(_disconnectFilter)
+
# Log the current logging configuration
logger = logging.getLogger(__name__)
logger.info(f"Logging initialized with level {logLevelName}")
@@ -347,6 +369,14 @@ async def lifespan(app: FastAPI):
except Exception as e:
logger.warning(f"Bootstrap check failed (may already be initialized): {str(e)}")
+ # Migrate vector column dimensions (idempotent — safe on every startup)
+ try:
+ from modules.interfaces.interfaceDbKnowledge import migrateVectorDimensions
+ migrateVectorDimensions()
+ logger.info("Vector dimension migration check completed")
+ except Exception as e:
+ logger.warning(f"Vector dimension migration failed (non-critical): {e}")
+
# Register all feature definitions in RBAC catalog (for /api/features/ endpoint)
try:
from modules.security.rbacCatalog import getCatalogService
diff --git a/modules/aicore/aicorePluginMistral.py b/modules/aicore/aicorePluginMistral.py
index 8e32c67b..887bfda9 100644
--- a/modules/aicore/aicorePluginMistral.py
+++ b/modules/aicore/aicorePluginMistral.py
@@ -343,7 +343,8 @@ class AiMistral(BaseConnectorAi):
content="", success=False, error="No embeddingInput provided"
)
- payload = {"model": model.name, "input": texts}
+ from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS
+ payload = {"model": model.name, "input": texts, "output_dimension": KNOWLEDGE_EMBEDDING_DIMENSIONS}
response = await self.httpClient.post(model.apiUrl, json=payload)
if response.status_code != 200:
diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py
index 3667d742..56d3c2c0 100644
--- a/modules/aicore/aicorePluginOpenai.py
+++ b/modules/aicore/aicorePluginOpenai.py
@@ -297,27 +297,6 @@ class AiOpenai(BaseConnectorAi):
version="text-embedding-3-small",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00002
),
- AiModel(
- name="text-embedding-3-large",
- displayName="OpenAI Embedding Large",
- connectorType="openai",
- apiUrl="https://api.openai.com/v1/embeddings",
- temperature=0.0,
- maxTokens=0,
- contextLength=8191,
- costPer1kTokensInput=0.00013, # $0.13/M tokens
- costPer1kTokensOutput=0.0,
- speedRating=9,
- qualityRating=10,
- functionCall=self.callEmbedding,
- priority=PriorityEnum.QUALITY,
- processingMode=ProcessingModeEnum.ADVANCED,
- operationTypes=createOperationTypeRatings(
- (OperationTypeEnum.EMBEDDING, 10)
- ),
- version="text-embedding-3-large",
- calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00013
- ),
AiModel(
name="gpt-image-1",
displayName="OpenAI GPT Image",
@@ -547,7 +526,8 @@ class AiOpenai(BaseConnectorAi):
content="", success=False, error="No embeddingInput provided"
)
- payload = {"model": model.name, "input": texts}
+ from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS
+ payload = {"model": model.name, "input": texts, "dimensions": KNOWLEDGE_EMBEDDING_DIMENSIONS}
response = await self.httpClient.post(model.apiUrl, json=payload)
if response.status_code != 200:
diff --git a/modules/aicore/aicorePluginPrivateLlm.py b/modules/aicore/aicorePluginPrivateLlm.py
index b96a1c4a..598b7ffa 100644
--- a/modules/aicore/aicorePluginPrivateLlm.py
+++ b/modules/aicore/aicorePluginPrivateLlm.py
@@ -14,7 +14,7 @@ Models (current — L4 24 GB):
Models (next-gen — RTX PRO 6000 96 GB, auto-activated when pulled in Ollama):
- poweron-text-reasoning: Reasoning (deepseek-r1:70b); complex logic, math, planning
- poweron-vision-general: Vision (llama4:scout); multimodal, long-context documents
-- poweron-embed: Embedding (nomic-embed-text); local RAG embedding
+- poweron-embed: Embedding (mxbai-embed-large); local RAG embedding (1024 dim)
Pricing: byte-based (~per-token via bytes/4), configured via the PRICE_* constants below.
"""
@@ -377,7 +377,7 @@ class AiPrivateLlm(BaseConnectorAi):
),
"ollamaModel": "llama4:scout"
},
- # Local Embedding (nomic-embed-text — replaces OpenAI text-embedding-3-small)
+ # Local Embedding (mxbai-embed-large — nativ 1024 dim, MTEB 64.68)
{
"model": AiModel(
name="poweron-embed",
@@ -386,21 +386,21 @@ class AiPrivateLlm(BaseConnectorAi):
apiUrl=f"{self.baseUrl}/v1/embeddings",
temperature=0.0,
maxTokens=0,
- contextLength=8192,
+ contextLength=512,
costPer1kTokensInput=PRICE_EMBED_PER_1K,
costPer1kTokensOutput=0.0,
speedRating=10,
qualityRating=8,
- functionCall=self.callAiText,
+ functionCall=self.callEmbedding,
priority=PriorityEnum.COST,
processingMode=ProcessingModeEnum.BASIC,
operationTypes=createOperationTypeRatings(
(OperationTypeEnum.EMBEDDING, 9),
),
- version="nomic-embed-text",
+ version="mxbai-embed-large",
calculatepriceCHF=_calcPrivateEmbedPriceCHF
),
- "ollamaModel": "nomic-embed-text"
+ "ollamaModel": "mxbai-embed-large"
},
]
@@ -505,6 +505,46 @@ class AiPrivateLlm(BaseConnectorAi):
logger.error(f"Error calling Private-LLM text API: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error calling Private-LLM API: {str(e)}")
+ async def callEmbedding(self, modelCall: AiModelCall) -> AiModelResponse:
+ """Generate embeddings via the Private-LLM Embedding endpoint (OpenAI-compatible)."""
+ try:
+ model = modelCall.model
+ texts = modelCall.embeddingInput or []
+ if not texts:
+ return AiModelResponse(
+ content="", success=False, error="No embeddingInput provided"
+ )
+
+ payload = {"model": model.version, "input": texts}
+ response = await self.httpClient.post(model.apiUrl, json=payload)
+
+ if response.status_code != 200:
+ errorMessage = f"Private-LLM Embedding API error: {response.status_code} - {response.text}"
+ if response.status_code == 429:
+ raise RateLimitExceededException(errorMessage)
+ raise HTTPException(status_code=500, detail=errorMessage)
+
+ responseJson = response.json()
+ embeddings = [item["embedding"] for item in responseJson["data"]]
+ usage = responseJson.get("usage", {})
+
+ return AiModelResponse(
+ content="",
+ success=True,
+ modelId=model.name,
+ tokensUsed={
+ "input": usage.get("prompt_tokens", 0),
+ "output": 0,
+ "total": usage.get("total_tokens", 0),
+ },
+ metadata={"embeddings": embeddings},
+ )
+ except (RateLimitExceededException, HTTPException):
+ raise
+ except Exception as e:
+ logger.error(f"Error calling Private-LLM Embedding API: {str(e)}")
+ raise HTTPException(status_code=500, detail=f"Error calling Private-LLM Embedding API: {str(e)}")
+
async def callAiVision(self, modelCall: AiModelCall) -> AiModelResponse:
"""
Call the Private-LLM API for vision-based analysis.
diff --git a/modules/datamodels/datamodelKnowledge.py b/modules/datamodels/datamodelKnowledge.py
index ad837ab1..7816135a 100644
--- a/modules/datamodels/datamodelKnowledge.py
+++ b/modules/datamodels/datamodelKnowledge.py
@@ -9,7 +9,7 @@ These models support the 3-tier RAG architecture:
- Global Layer: scope=global (sysAdmin only)
- Workflow Layer: workflowId-scoped (WorkflowMemory)
-Vector fields use json_schema_extra={"db_type": "vector(1536)"} for pgvector.
+Vector fields use json_schema_extra with db_type=vector(KNOWLEDGE_EMBEDDING_DIMENSIONS) for pgvector.
"""
from typing import Dict, Any, List, Optional
@@ -19,6 +19,8 @@ from modules.shared.i18nRegistry import i18nModel
from modules.shared.timeUtils import getUtcTimestamp
import uuid
+KNOWLEDGE_EMBEDDING_DIMENSIONS = 1024
+
@i18nModel("Datei-Inhaltsindex")
class FileContentIndex(PowerOnModel):
@@ -163,7 +165,7 @@ class ContentChunk(PowerOnModel):
embedding: Optional[List[float]] = Field(
default=None,
description="pgvector embedding (NOT NULL for text chunks)",
- json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"},
+ json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"},
)
@@ -210,7 +212,7 @@ class RoundMemory(PowerOnModel):
embedding: Optional[List[float]] = Field(
default=None,
description="Embedding of summary for semantic retrieval",
- json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"},
+ json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"},
)
@@ -251,5 +253,5 @@ class WorkflowMemory(PowerOnModel):
embedding: Optional[List[float]] = Field(
default=None,
description="Optional embedding for semantic lookup",
- json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"},
+ json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"},
)
diff --git a/modules/features/trustee/routeFeatureTrustee.py b/modules/features/trustee/routeFeatureTrustee.py
index 8b5ba94a..dbc96013 100644
--- a/modules/features/trustee/routeFeatureTrustee.py
+++ b/modules/features/trustee/routeFeatureTrustee.py
@@ -81,6 +81,13 @@ def _parsePagination(pagination: Optional[str]) -> Optional[PaginationParams]:
return None
+def _recordField(record, field: str, default=None):
+ """Read a field from a DB record dict or model instance."""
+ if isinstance(record, dict):
+ return record.get(field, default)
+ return getattr(record, field, default)
+
+
def _validateInstanceAccess(instanceId: str, context: RequestContext) -> str:
"""
Validate that the user has access to the feature instance.
@@ -365,7 +372,7 @@ def get_contract_options(
result = interface.getAllContracts(None)
items = result.items if hasattr(result, 'items') else result
- return [{"value": c.id, "label": c.label or c.name or c.id} for c in items]
+ return [{"value": _recordField(c, "id"), "label": _recordField(c, "label") or _recordField(c, "name") or _recordField(c, "id")} for c in items]
@router.get("/{instanceId}/documents/options", response_model=List[Dict[str, Any]])
@@ -381,7 +388,14 @@ def get_document_options(
result = interface.getAllDocuments(None)
items = result.items if hasattr(result, 'items') else result
# Include 'id' for FK resolution in tables
- return [{"id": d.id, "value": d.id, "label": d.documentName or d.id} for d in items]
+ return [
+ {
+ "id": _recordField(d, "id"),
+ "value": _recordField(d, "id"),
+ "label": _recordField(d, "documentName") or _recordField(d, "id"),
+ }
+ for d in items
+ ]
@router.get("/{instanceId}/positions/options", response_model=List[Dict[str, Any]])
@@ -397,18 +411,21 @@ def get_position_options(
result = interface.getAllPositions(None)
items = result.items if hasattr(result, 'items') else result
- def _makePositionLabel(p: TrusteePosition) -> str:
+ def _makePositionLabel(p) -> str:
parts = []
- if p.valuta:
- parts.append(datetime.fromtimestamp(p.valuta, tz=timezone.utc).strftime("%Y-%m-%d"))
- if p.company:
- parts.append(p.company[:30])
- if p.desc:
- parts.append(p.desc[:30])
- return " - ".join(parts) if parts else p.id
+ valuta = _recordField(p, "valuta")
+ if valuta:
+ parts.append(datetime.fromtimestamp(valuta, tz=timezone.utc).strftime("%Y-%m-%d"))
+ company = _recordField(p, "company")
+ if company:
+ parts.append(str(company)[:30])
+ desc = _recordField(p, "desc")
+ if desc:
+ parts.append(str(desc)[:30])
+ return " - ".join(parts) if parts else _recordField(p, "id")
# Include 'id' for FK resolution in tables
- return [{"id": p.id, "value": p.id, "label": _makePositionLabel(p)} for p in items]
+ return [{"id": _recordField(p, "id"), "value": _recordField(p, "id"), "label": _makePositionLabel(p)} for p in items]
# ============================================================================
diff --git a/modules/interfaces/interfaceDbKnowledge.py b/modules/interfaces/interfaceDbKnowledge.py
index c52d999e..9c5a9bd3 100644
--- a/modules/interfaces/interfaceDbKnowledge.py
+++ b/modules/interfaces/interfaceDbKnowledge.py
@@ -732,3 +732,62 @@ def getInterface(currentUser: Optional[User] = None) -> KnowledgeObjects:
interface.setUserContext(currentUser)
return interface
+
+
+def migrateVectorDimensions():
+ """Idempotent boot migration: ensures all vector columns match KNOWLEDGE_EMBEDDING_DIMENSIONS.
+
+ Checks the actual pgvector dimension via pg_attribute.atttypmod.
+ If it differs from the target, nulls existing embeddings and alters the column type.
+ Safe to call on every startup — skips when dimensions already match or table doesn't exist.
+ """
+ from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS
+ targetDim = KNOWLEDGE_EMBEDDING_DIMENSIONS
+
+ interface = getInterface()
+ db = interface.db
+
+ vectorTables = [
+ ("ContentChunk", "embedding"),
+ ("RoundMemory", "embedding"),
+ ("WorkflowMemory", "embedding"),
+ ]
+
+ for table, col in vectorTables:
+ try:
+ with db.borrowConn() as conn:
+ with conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT COUNT(*) FROM information_schema.tables "
+ "WHERE LOWER(table_name) = LOWER(%s) AND table_schema = 'public'",
+ (table,),
+ )
+ if cursor.fetchone()["count"] == 0:
+ continue
+
+ cursor.execute(
+ "SELECT a.atttypmod FROM pg_attribute a "
+ "JOIN pg_class c ON a.attrelid = c.oid "
+ "JOIN pg_namespace n ON c.relnamespace = n.oid "
+ "WHERE c.relname = %s AND a.attname = %s AND n.nspname = 'public'",
+ (table, col),
+ )
+ row = cursor.fetchone()
+ if not row:
+ continue
+
+ currentDim = row["atttypmod"]
+ if currentDim == targetDim:
+ continue
+
+ logger.info(
+ "Migrating %s.%s from vector(%s) to vector(%s) — clearing existing embeddings",
+ table, col, currentDim, targetDim,
+ )
+ cursor.execute(f'UPDATE "{table}" SET "{col}" = NULL WHERE "{col}" IS NOT NULL')
+ cursor.execute(
+ f'ALTER TABLE "{table}" ALTER COLUMN "{col}" TYPE vector({targetDim})'
+ )
+ logger.info("Migration of %s.%s completed", table, col)
+ except Exception as e:
+ logger.error("Vector dimension migration failed for %s.%s: %s", table, col, e)
diff --git a/modules/routes/routeDataFiles.py b/modules/routes/routeDataFiles.py
index 63189e53..41625d26 100644
--- a/modules/routes/routeDataFiles.py
+++ b/modules/routes/routeDataFiles.py
@@ -700,6 +700,7 @@ def get_files(
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
+ owner: Optional[str] = Query(None, description="'me' for own files, 'shared' for files from others"),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
):
@@ -756,6 +757,21 @@ def get_files(
def _filesToDicts(fileItems):
return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems]
+ ownerRecordFilter = None
+ ownerExcludeOwnFiles = False
+ ownerNorm = (owner or "").strip().lower()
+ if ownerNorm == "me":
+ ownerRecordFilter = {"sysCreatedBy": managementInterface.userId}
+ elif ownerNorm == "shared":
+ ownerExcludeOwnFiles = True
+
+ def _applyOwnerFilter(items):
+ """Post-filter for owner=shared: exclude files created by current user."""
+ if not ownerExcludeOwnFiles:
+ return items
+ uid = managementInterface.userId
+ return [f for f in items if (f.get("sysCreatedBy") if isinstance(f, dict) else getattr(f, "sysCreatedBy", None)) != uid]
+
if mode == "groupSummary":
if not pagination:
raise HTTPException(status_code=400, detail="pagination required for groupSummary")
@@ -794,10 +810,12 @@ def get_files(
return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
if not groupByLevels:
- # No grouping: let DB handle pagination directly (fastest path)
- result = managementInterface.getAllFiles(pagination=paginationParams)
+ result = managementInterface.getAllFiles(
+ pagination=paginationParams,
+ recordFilter=ownerRecordFilter,
+ )
if paginationParams and hasattr(result, 'items'):
- enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem, db=appInterface.db)
+ enriched = _applyOwnerFilter(enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem, db=appInterface.db))
resp: dict = {
"items": enriched,
"pagination": PaginationMetadata(
@@ -811,7 +829,8 @@ def get_files(
}
else:
items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
- resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem, db=appInterface.db), "pagination": None}
+ enriched = _applyOwnerFilter(enrichRowsWithFkLabels(_filesToDicts(items), FileItem, db=appInterface.db))
+ resp = {"items": enriched, "pagination": None}
if viewMeta:
resp["appliedView"] = viewMeta.model_dump()
return resp