From 535bd431748609d2adec08dad7b3e26a127a26b3 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Thu, 11 Jun 2026 16:47:21 +0200 Subject: [PATCH] model fixes --- app.py | 30 ++++++++++ modules/aicore/aicorePluginMistral.py | 3 +- modules/aicore/aicorePluginOpenai.py | 24 +------- modules/aicore/aicorePluginPrivateLlm.py | 52 ++++++++++++++-- modules/datamodels/datamodelKnowledge.py | 10 ++-- .../features/trustee/routeFeatureTrustee.py | 39 ++++++++---- modules/interfaces/interfaceDbKnowledge.py | 59 +++++++++++++++++++ modules/routes/routeDataFiles.py | 27 +++++++-- 8 files changed, 196 insertions(+), 48 deletions(-) diff --git a/app.py b/app.py index 20ad435c..1ce31ae5 100644 --- a/app.py +++ b/app.py @@ -176,6 +176,20 @@ def initLogging(): pass return True + # Suppress h11 LocalProtocolError ("Can't send data when our state is ERROR") + # from uvicorn when a client disconnects mid-response (browser abort, HMR, navigation). + # The asyncio event-loop handler (below) only catches event-loop-level exceptions; + # uvicorn logs this via the standard logging module before it reaches the event loop. + class ClientDisconnectFilter(logging.Filter): + def filter(self, record): + if record.exc_info: + excType = record.exc_info[0] + if excType and getattr(excType, "__name__", "") == "LocalProtocolError": + return False + if isinstance(record.msg, str) and "LocalProtocolError" in record.msg: + return False + return True + # Add filter to normalize problematic unicode (e.g., arrows) to ASCII for terminals like cp1252 class UnicodeArrowFilter(logging.Filter): def filter(self, record): @@ -204,6 +218,7 @@ def initLogging(): consoleHandler.addFilter(ChromeDevToolsFilter()) consoleHandler.addFilter(HttpcoreStarFilter()) consoleHandler.addFilter(HTTPDebugFilter()) + consoleHandler.addFilter(ClientDisconnectFilter()) consoleHandler.addFilter(EmojiFilter()) consoleHandler.addFilter(UnicodeArrowFilter()) handlers.append(consoleHandler) @@ -227,6 +242,7 @@ def initLogging(): fileHandler.addFilter(ChromeDevToolsFilter()) fileHandler.addFilter(HttpcoreStarFilter()) fileHandler.addFilter(HTTPDebugFilter()) + fileHandler.addFilter(ClientDisconnectFilter()) fileHandler.addFilter(EmojiFilter()) fileHandler.addFilter(UnicodeArrowFilter()) handlers.append(fileHandler) @@ -255,6 +271,12 @@ def initLogging(): for loggerName in noisyLoggers: logging.getLogger(loggerName).setLevel(logging.WARNING) + # Apply ClientDisconnectFilter to uvicorn's own logger so the + # h11 LocalProtocolError is suppressed regardless of handler setup. + _disconnectFilter = ClientDisconnectFilter() + for _uvName in ("uvicorn.error", "uvicorn"): + logging.getLogger(_uvName).addFilter(_disconnectFilter) + # Log the current logging configuration logger = logging.getLogger(__name__) logger.info(f"Logging initialized with level {logLevelName}") @@ -347,6 +369,14 @@ async def lifespan(app: FastAPI): except Exception as e: logger.warning(f"Bootstrap check failed (may already be initialized): {str(e)}") + # Migrate vector column dimensions (idempotent — safe on every startup) + try: + from modules.interfaces.interfaceDbKnowledge import migrateVectorDimensions + migrateVectorDimensions() + logger.info("Vector dimension migration check completed") + except Exception as e: + logger.warning(f"Vector dimension migration failed (non-critical): {e}") + # Register all feature definitions in RBAC catalog (for /api/features/ endpoint) try: from modules.security.rbacCatalog import getCatalogService diff --git a/modules/aicore/aicorePluginMistral.py b/modules/aicore/aicorePluginMistral.py index 8e32c67b..887bfda9 100644 --- a/modules/aicore/aicorePluginMistral.py +++ b/modules/aicore/aicorePluginMistral.py @@ -343,7 +343,8 @@ class AiMistral(BaseConnectorAi): content="", success=False, error="No embeddingInput provided" ) - payload = {"model": model.name, "input": texts} + from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS + payload = {"model": model.name, "input": texts, "output_dimension": KNOWLEDGE_EMBEDDING_DIMENSIONS} response = await self.httpClient.post(model.apiUrl, json=payload) if response.status_code != 200: diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py index 3667d742..56d3c2c0 100644 --- a/modules/aicore/aicorePluginOpenai.py +++ b/modules/aicore/aicorePluginOpenai.py @@ -297,27 +297,6 @@ class AiOpenai(BaseConnectorAi): version="text-embedding-3-small", calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00002 ), - AiModel( - name="text-embedding-3-large", - displayName="OpenAI Embedding Large", - connectorType="openai", - apiUrl="https://api.openai.com/v1/embeddings", - temperature=0.0, - maxTokens=0, - contextLength=8191, - costPer1kTokensInput=0.00013, # $0.13/M tokens - costPer1kTokensOutput=0.0, - speedRating=9, - qualityRating=10, - functionCall=self.callEmbedding, - priority=PriorityEnum.QUALITY, - processingMode=ProcessingModeEnum.ADVANCED, - operationTypes=createOperationTypeRatings( - (OperationTypeEnum.EMBEDDING, 10) - ), - version="text-embedding-3-large", - calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00013 - ), AiModel( name="gpt-image-1", displayName="OpenAI GPT Image", @@ -547,7 +526,8 @@ class AiOpenai(BaseConnectorAi): content="", success=False, error="No embeddingInput provided" ) - payload = {"model": model.name, "input": texts} + from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS + payload = {"model": model.name, "input": texts, "dimensions": KNOWLEDGE_EMBEDDING_DIMENSIONS} response = await self.httpClient.post(model.apiUrl, json=payload) if response.status_code != 200: diff --git a/modules/aicore/aicorePluginPrivateLlm.py b/modules/aicore/aicorePluginPrivateLlm.py index b96a1c4a..598b7ffa 100644 --- a/modules/aicore/aicorePluginPrivateLlm.py +++ b/modules/aicore/aicorePluginPrivateLlm.py @@ -14,7 +14,7 @@ Models (current — L4 24 GB): Models (next-gen — RTX PRO 6000 96 GB, auto-activated when pulled in Ollama): - poweron-text-reasoning: Reasoning (deepseek-r1:70b); complex logic, math, planning - poweron-vision-general: Vision (llama4:scout); multimodal, long-context documents -- poweron-embed: Embedding (nomic-embed-text); local RAG embedding +- poweron-embed: Embedding (mxbai-embed-large); local RAG embedding (1024 dim) Pricing: byte-based (~per-token via bytes/4), configured via the PRICE_* constants below. """ @@ -377,7 +377,7 @@ class AiPrivateLlm(BaseConnectorAi): ), "ollamaModel": "llama4:scout" }, - # Local Embedding (nomic-embed-text — replaces OpenAI text-embedding-3-small) + # Local Embedding (mxbai-embed-large — nativ 1024 dim, MTEB 64.68) { "model": AiModel( name="poweron-embed", @@ -386,21 +386,21 @@ class AiPrivateLlm(BaseConnectorAi): apiUrl=f"{self.baseUrl}/v1/embeddings", temperature=0.0, maxTokens=0, - contextLength=8192, + contextLength=512, costPer1kTokensInput=PRICE_EMBED_PER_1K, costPer1kTokensOutput=0.0, speedRating=10, qualityRating=8, - functionCall=self.callAiText, + functionCall=self.callEmbedding, priority=PriorityEnum.COST, processingMode=ProcessingModeEnum.BASIC, operationTypes=createOperationTypeRatings( (OperationTypeEnum.EMBEDDING, 9), ), - version="nomic-embed-text", + version="mxbai-embed-large", calculatepriceCHF=_calcPrivateEmbedPriceCHF ), - "ollamaModel": "nomic-embed-text" + "ollamaModel": "mxbai-embed-large" }, ] @@ -505,6 +505,46 @@ class AiPrivateLlm(BaseConnectorAi): logger.error(f"Error calling Private-LLM text API: {str(e)}") raise HTTPException(status_code=500, detail=f"Error calling Private-LLM API: {str(e)}") + async def callEmbedding(self, modelCall: AiModelCall) -> AiModelResponse: + """Generate embeddings via the Private-LLM Embedding endpoint (OpenAI-compatible).""" + try: + model = modelCall.model + texts = modelCall.embeddingInput or [] + if not texts: + return AiModelResponse( + content="", success=False, error="No embeddingInput provided" + ) + + payload = {"model": model.version, "input": texts} + response = await self.httpClient.post(model.apiUrl, json=payload) + + if response.status_code != 200: + errorMessage = f"Private-LLM Embedding API error: {response.status_code} - {response.text}" + if response.status_code == 429: + raise RateLimitExceededException(errorMessage) + raise HTTPException(status_code=500, detail=errorMessage) + + responseJson = response.json() + embeddings = [item["embedding"] for item in responseJson["data"]] + usage = responseJson.get("usage", {}) + + return AiModelResponse( + content="", + success=True, + modelId=model.name, + tokensUsed={ + "input": usage.get("prompt_tokens", 0), + "output": 0, + "total": usage.get("total_tokens", 0), + }, + metadata={"embeddings": embeddings}, + ) + except (RateLimitExceededException, HTTPException): + raise + except Exception as e: + logger.error(f"Error calling Private-LLM Embedding API: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error calling Private-LLM Embedding API: {str(e)}") + async def callAiVision(self, modelCall: AiModelCall) -> AiModelResponse: """ Call the Private-LLM API for vision-based analysis. diff --git a/modules/datamodels/datamodelKnowledge.py b/modules/datamodels/datamodelKnowledge.py index ad837ab1..7816135a 100644 --- a/modules/datamodels/datamodelKnowledge.py +++ b/modules/datamodels/datamodelKnowledge.py @@ -9,7 +9,7 @@ These models support the 3-tier RAG architecture: - Global Layer: scope=global (sysAdmin only) - Workflow Layer: workflowId-scoped (WorkflowMemory) -Vector fields use json_schema_extra={"db_type": "vector(1536)"} for pgvector. +Vector fields use json_schema_extra with db_type=vector(KNOWLEDGE_EMBEDDING_DIMENSIONS) for pgvector. """ from typing import Dict, Any, List, Optional @@ -19,6 +19,8 @@ from modules.shared.i18nRegistry import i18nModel from modules.shared.timeUtils import getUtcTimestamp import uuid +KNOWLEDGE_EMBEDDING_DIMENSIONS = 1024 + @i18nModel("Datei-Inhaltsindex") class FileContentIndex(PowerOnModel): @@ -163,7 +165,7 @@ class ContentChunk(PowerOnModel): embedding: Optional[List[float]] = Field( default=None, description="pgvector embedding (NOT NULL for text chunks)", - json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"}, + json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"}, ) @@ -210,7 +212,7 @@ class RoundMemory(PowerOnModel): embedding: Optional[List[float]] = Field( default=None, description="Embedding of summary for semantic retrieval", - json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"}, + json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"}, ) @@ -251,5 +253,5 @@ class WorkflowMemory(PowerOnModel): embedding: Optional[List[float]] = Field( default=None, description="Optional embedding for semantic lookup", - json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"}, + json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"}, ) diff --git a/modules/features/trustee/routeFeatureTrustee.py b/modules/features/trustee/routeFeatureTrustee.py index 8b5ba94a..dbc96013 100644 --- a/modules/features/trustee/routeFeatureTrustee.py +++ b/modules/features/trustee/routeFeatureTrustee.py @@ -81,6 +81,13 @@ def _parsePagination(pagination: Optional[str]) -> Optional[PaginationParams]: return None +def _recordField(record, field: str, default=None): + """Read a field from a DB record dict or model instance.""" + if isinstance(record, dict): + return record.get(field, default) + return getattr(record, field, default) + + def _validateInstanceAccess(instanceId: str, context: RequestContext) -> str: """ Validate that the user has access to the feature instance. @@ -365,7 +372,7 @@ def get_contract_options( result = interface.getAllContracts(None) items = result.items if hasattr(result, 'items') else result - return [{"value": c.id, "label": c.label or c.name or c.id} for c in items] + return [{"value": _recordField(c, "id"), "label": _recordField(c, "label") or _recordField(c, "name") or _recordField(c, "id")} for c in items] @router.get("/{instanceId}/documents/options", response_model=List[Dict[str, Any]]) @@ -381,7 +388,14 @@ def get_document_options( result = interface.getAllDocuments(None) items = result.items if hasattr(result, 'items') else result # Include 'id' for FK resolution in tables - return [{"id": d.id, "value": d.id, "label": d.documentName or d.id} for d in items] + return [ + { + "id": _recordField(d, "id"), + "value": _recordField(d, "id"), + "label": _recordField(d, "documentName") or _recordField(d, "id"), + } + for d in items + ] @router.get("/{instanceId}/positions/options", response_model=List[Dict[str, Any]]) @@ -397,18 +411,21 @@ def get_position_options( result = interface.getAllPositions(None) items = result.items if hasattr(result, 'items') else result - def _makePositionLabel(p: TrusteePosition) -> str: + def _makePositionLabel(p) -> str: parts = [] - if p.valuta: - parts.append(datetime.fromtimestamp(p.valuta, tz=timezone.utc).strftime("%Y-%m-%d")) - if p.company: - parts.append(p.company[:30]) - if p.desc: - parts.append(p.desc[:30]) - return " - ".join(parts) if parts else p.id + valuta = _recordField(p, "valuta") + if valuta: + parts.append(datetime.fromtimestamp(valuta, tz=timezone.utc).strftime("%Y-%m-%d")) + company = _recordField(p, "company") + if company: + parts.append(str(company)[:30]) + desc = _recordField(p, "desc") + if desc: + parts.append(str(desc)[:30]) + return " - ".join(parts) if parts else _recordField(p, "id") # Include 'id' for FK resolution in tables - return [{"id": p.id, "value": p.id, "label": _makePositionLabel(p)} for p in items] + return [{"id": _recordField(p, "id"), "value": _recordField(p, "id"), "label": _makePositionLabel(p)} for p in items] # ============================================================================ diff --git a/modules/interfaces/interfaceDbKnowledge.py b/modules/interfaces/interfaceDbKnowledge.py index c52d999e..9c5a9bd3 100644 --- a/modules/interfaces/interfaceDbKnowledge.py +++ b/modules/interfaces/interfaceDbKnowledge.py @@ -732,3 +732,62 @@ def getInterface(currentUser: Optional[User] = None) -> KnowledgeObjects: interface.setUserContext(currentUser) return interface + + +def migrateVectorDimensions(): + """Idempotent boot migration: ensures all vector columns match KNOWLEDGE_EMBEDDING_DIMENSIONS. + + Checks the actual pgvector dimension via pg_attribute.atttypmod. + If it differs from the target, nulls existing embeddings and alters the column type. + Safe to call on every startup — skips when dimensions already match or table doesn't exist. + """ + from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS + targetDim = KNOWLEDGE_EMBEDDING_DIMENSIONS + + interface = getInterface() + db = interface.db + + vectorTables = [ + ("ContentChunk", "embedding"), + ("RoundMemory", "embedding"), + ("WorkflowMemory", "embedding"), + ] + + for table, col in vectorTables: + try: + with db.borrowConn() as conn: + with conn.cursor() as cursor: + cursor.execute( + "SELECT COUNT(*) FROM information_schema.tables " + "WHERE LOWER(table_name) = LOWER(%s) AND table_schema = 'public'", + (table,), + ) + if cursor.fetchone()["count"] == 0: + continue + + cursor.execute( + "SELECT a.atttypmod FROM pg_attribute a " + "JOIN pg_class c ON a.attrelid = c.oid " + "JOIN pg_namespace n ON c.relnamespace = n.oid " + "WHERE c.relname = %s AND a.attname = %s AND n.nspname = 'public'", + (table, col), + ) + row = cursor.fetchone() + if not row: + continue + + currentDim = row["atttypmod"] + if currentDim == targetDim: + continue + + logger.info( + "Migrating %s.%s from vector(%s) to vector(%s) — clearing existing embeddings", + table, col, currentDim, targetDim, + ) + cursor.execute(f'UPDATE "{table}" SET "{col}" = NULL WHERE "{col}" IS NOT NULL') + cursor.execute( + f'ALTER TABLE "{table}" ALTER COLUMN "{col}" TYPE vector({targetDim})' + ) + logger.info("Migration of %s.%s completed", table, col) + except Exception as e: + logger.error("Vector dimension migration failed for %s.%s: %s", table, col, e) diff --git a/modules/routes/routeDataFiles.py b/modules/routes/routeDataFiles.py index 63189e53..41625d26 100644 --- a/modules/routes/routeDataFiles.py +++ b/modules/routes/routeDataFiles.py @@ -700,6 +700,7 @@ def get_files( pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"), mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"), column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"), + owner: Optional[str] = Query(None, description="'me' for own files, 'shared' for files from others"), currentUser: User = Depends(getCurrentUser), context: RequestContext = Depends(getRequestContext) ): @@ -756,6 +757,21 @@ def get_files( def _filesToDicts(fileItems): return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems] + ownerRecordFilter = None + ownerExcludeOwnFiles = False + ownerNorm = (owner or "").strip().lower() + if ownerNorm == "me": + ownerRecordFilter = {"sysCreatedBy": managementInterface.userId} + elif ownerNorm == "shared": + ownerExcludeOwnFiles = True + + def _applyOwnerFilter(items): + """Post-filter for owner=shared: exclude files created by current user.""" + if not ownerExcludeOwnFiles: + return items + uid = managementInterface.userId + return [f for f in items if (f.get("sysCreatedBy") if isinstance(f, dict) else getattr(f, "sysCreatedBy", None)) != uid] + if mode == "groupSummary": if not pagination: raise HTTPException(status_code=400, detail="pagination required for groupSummary") @@ -794,10 +810,12 @@ def get_files( return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter) if not groupByLevels: - # No grouping: let DB handle pagination directly (fastest path) - result = managementInterface.getAllFiles(pagination=paginationParams) + result = managementInterface.getAllFiles( + pagination=paginationParams, + recordFilter=ownerRecordFilter, + ) if paginationParams and hasattr(result, 'items'): - enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem, db=appInterface.db) + enriched = _applyOwnerFilter(enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem, db=appInterface.db)) resp: dict = { "items": enriched, "pagination": PaginationMetadata( @@ -811,7 +829,8 @@ def get_files( } else: items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result]) - resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem, db=appInterface.db), "pagination": None} + enriched = _applyOwnerFilter(enrichRowsWithFkLabels(_filesToDicts(items), FileItem, db=appInterface.db)) + resp = {"items": enriched, "pagination": None} if viewMeta: resp["appliedView"] = viewMeta.model_dump() return resp