From 535bd431748609d2adec08dad7b3e26a127a26b3 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@poweron.swiss>
Date: Thu, 11 Jun 2026 16:47:21 +0200
Subject: [PATCH] model fixes

---
 app.py                                        | 30 ++++++++++
 modules/aicore/aicorePluginMistral.py         |  3 +-
 modules/aicore/aicorePluginOpenai.py          | 24 +-------
 modules/aicore/aicorePluginPrivateLlm.py      | 52 ++++++++++++++--
 modules/datamodels/datamodelKnowledge.py      | 10 ++--
 .../features/trustee/routeFeatureTrustee.py   | 39 ++++++++----
 modules/interfaces/interfaceDbKnowledge.py    | 59 +++++++++++++++++++
 modules/routes/routeDataFiles.py              | 27 +++++++--
 8 files changed, 196 insertions(+), 48 deletions(-)

diff --git a/app.py b/app.py
index 20ad435c..1ce31ae5 100644
--- a/app.py
+++ b/app.py
@@ -176,6 +176,20 @@ def initLogging():
                     pass
             return True
 
+    # Suppress h11 LocalProtocolError ("Can't send data when our state is ERROR")
+    # from uvicorn when a client disconnects mid-response (browser abort, HMR, navigation).
+    # The asyncio event-loop handler (below) only catches event-loop-level exceptions;
+    # uvicorn logs this via the standard logging module before it reaches the event loop.
+    class ClientDisconnectFilter(logging.Filter):
+        def filter(self, record):
+            if record.exc_info:
+                excType = record.exc_info[0]
+                if excType and getattr(excType, "__name__", "") == "LocalProtocolError":
+                    return False
+            if isinstance(record.msg, str) and "LocalProtocolError" in record.msg:
+                return False
+            return True
+
     # Add filter to normalize problematic unicode (e.g., arrows) to ASCII for terminals like cp1252
     class UnicodeArrowFilter(logging.Filter):
         def filter(self, record):
@@ -204,6 +218,7 @@ def initLogging():
         consoleHandler.addFilter(ChromeDevToolsFilter())
         consoleHandler.addFilter(HttpcoreStarFilter())
         consoleHandler.addFilter(HTTPDebugFilter())
+        consoleHandler.addFilter(ClientDisconnectFilter())
         consoleHandler.addFilter(EmojiFilter())
         consoleHandler.addFilter(UnicodeArrowFilter())
         handlers.append(consoleHandler)
@@ -227,6 +242,7 @@ def initLogging():
         fileHandler.addFilter(ChromeDevToolsFilter())
         fileHandler.addFilter(HttpcoreStarFilter())
         fileHandler.addFilter(HTTPDebugFilter())
+        fileHandler.addFilter(ClientDisconnectFilter())
         fileHandler.addFilter(EmojiFilter())
         fileHandler.addFilter(UnicodeArrowFilter())
         handlers.append(fileHandler)
@@ -255,6 +271,12 @@ def initLogging():
     for loggerName in noisyLoggers:
         logging.getLogger(loggerName).setLevel(logging.WARNING)
 
+    # Apply ClientDisconnectFilter to uvicorn's own logger so the
+    # h11 LocalProtocolError is suppressed regardless of handler setup.
+    _disconnectFilter = ClientDisconnectFilter()
+    for _uvName in ("uvicorn.error", "uvicorn"):
+        logging.getLogger(_uvName).addFilter(_disconnectFilter)
+
     # Log the current logging configuration
     logger = logging.getLogger(__name__)
     logger.info(f"Logging initialized with level {logLevelName}")
@@ -347,6 +369,14 @@ async def lifespan(app: FastAPI):
     except Exception as e:
         logger.warning(f"Bootstrap check failed (may already be initialized): {str(e)}")
 
+    # Migrate vector column dimensions (idempotent — safe on every startup)
+    try:
+        from modules.interfaces.interfaceDbKnowledge import migrateVectorDimensions
+        migrateVectorDimensions()
+        logger.info("Vector dimension migration check completed")
+    except Exception as e:
+        logger.warning(f"Vector dimension migration failed (non-critical): {e}")
+
     # Register all feature definitions in RBAC catalog (for /api/features/ endpoint)
     try:
         from modules.security.rbacCatalog import getCatalogService
diff --git a/modules/aicore/aicorePluginMistral.py b/modules/aicore/aicorePluginMistral.py
index 8e32c67b..887bfda9 100644
--- a/modules/aicore/aicorePluginMistral.py
+++ b/modules/aicore/aicorePluginMistral.py
@@ -343,7 +343,8 @@ class AiMistral(BaseConnectorAi):
                     content="", success=False, error="No embeddingInput provided"
                 )
 
-            payload = {"model": model.name, "input": texts}
+            from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS
+            payload = {"model": model.name, "input": texts, "output_dimension": KNOWLEDGE_EMBEDDING_DIMENSIONS}
             response = await self.httpClient.post(model.apiUrl, json=payload)
 
             if response.status_code != 200:
diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py
index 3667d742..56d3c2c0 100644
--- a/modules/aicore/aicorePluginOpenai.py
+++ b/modules/aicore/aicorePluginOpenai.py
@@ -297,27 +297,6 @@ class AiOpenai(BaseConnectorAi):
                 version="text-embedding-3-small",
                 calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00002
             ),
-            AiModel(
-                name="text-embedding-3-large",
-                displayName="OpenAI Embedding Large",
-                connectorType="openai",
-                apiUrl="https://api.openai.com/v1/embeddings",
-                temperature=0.0,
-                maxTokens=0,
-                contextLength=8191,
-                costPer1kTokensInput=0.00013,  # $0.13/M tokens
-                costPer1kTokensOutput=0.0,
-                speedRating=9,
-                qualityRating=10,
-                functionCall=self.callEmbedding,
-                priority=PriorityEnum.QUALITY,
-                processingMode=ProcessingModeEnum.ADVANCED,
-                operationTypes=createOperationTypeRatings(
-                    (OperationTypeEnum.EMBEDDING, 10)
-                ),
-                version="text-embedding-3-large",
-                calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00013
-            ),
             AiModel(
                 name="gpt-image-1",
                 displayName="OpenAI GPT Image",
@@ -547,7 +526,8 @@ class AiOpenai(BaseConnectorAi):
                     content="", success=False, error="No embeddingInput provided"
                 )
 
-            payload = {"model": model.name, "input": texts}
+            from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS
+            payload = {"model": model.name, "input": texts, "dimensions": KNOWLEDGE_EMBEDDING_DIMENSIONS}
             response = await self.httpClient.post(model.apiUrl, json=payload)
 
             if response.status_code != 200:
diff --git a/modules/aicore/aicorePluginPrivateLlm.py b/modules/aicore/aicorePluginPrivateLlm.py
index b96a1c4a..598b7ffa 100644
--- a/modules/aicore/aicorePluginPrivateLlm.py
+++ b/modules/aicore/aicorePluginPrivateLlm.py
@@ -14,7 +14,7 @@ Models (current — L4 24 GB):
 Models (next-gen — RTX PRO 6000 96 GB, auto-activated when pulled in Ollama):
 - poweron-text-reasoning: Reasoning (deepseek-r1:70b); complex logic, math, planning
 - poweron-vision-general: Vision (llama4:scout); multimodal, long-context documents
-- poweron-embed: Embedding (nomic-embed-text); local RAG embedding
+- poweron-embed: Embedding (mxbai-embed-large); local RAG embedding (1024 dim)
 
 Pricing: byte-based (~per-token via bytes/4), configured via the PRICE_* constants below.
 """
@@ -377,7 +377,7 @@ class AiPrivateLlm(BaseConnectorAi):
                 ),
                 "ollamaModel": "llama4:scout"
             },
-            # Local Embedding (nomic-embed-text — replaces OpenAI text-embedding-3-small)
+            # Local Embedding (mxbai-embed-large — nativ 1024 dim, MTEB 64.68)
             {
                 "model": AiModel(
                     name="poweron-embed",
@@ -386,21 +386,21 @@ class AiPrivateLlm(BaseConnectorAi):
                     apiUrl=f"{self.baseUrl}/v1/embeddings",
                     temperature=0.0,
                     maxTokens=0,
-                    contextLength=8192,
+                    contextLength=512,
                     costPer1kTokensInput=PRICE_EMBED_PER_1K,
                     costPer1kTokensOutput=0.0,
                     speedRating=10,
                     qualityRating=8,
-                    functionCall=self.callAiText,
+                    functionCall=self.callEmbedding,
                     priority=PriorityEnum.COST,
                     processingMode=ProcessingModeEnum.BASIC,
                     operationTypes=createOperationTypeRatings(
                         (OperationTypeEnum.EMBEDDING, 9),
                     ),
-                    version="nomic-embed-text",
+                    version="mxbai-embed-large",
                     calculatepriceCHF=_calcPrivateEmbedPriceCHF
                 ),
-                "ollamaModel": "nomic-embed-text"
+                "ollamaModel": "mxbai-embed-large"
             },
         ]
         
@@ -505,6 +505,46 @@ class AiPrivateLlm(BaseConnectorAi):
             logger.error(f"Error calling Private-LLM text API: {str(e)}")
             raise HTTPException(status_code=500, detail=f"Error calling Private-LLM API: {str(e)}")
     
+    async def callEmbedding(self, modelCall: AiModelCall) -> AiModelResponse:
+        """Generate embeddings via the Private-LLM Embedding endpoint (OpenAI-compatible)."""
+        try:
+            model = modelCall.model
+            texts = modelCall.embeddingInput or []
+            if not texts:
+                return AiModelResponse(
+                    content="", success=False, error="No embeddingInput provided"
+                )
+
+            payload = {"model": model.version, "input": texts}
+            response = await self.httpClient.post(model.apiUrl, json=payload)
+
+            if response.status_code != 200:
+                errorMessage = f"Private-LLM Embedding API error: {response.status_code} - {response.text}"
+                if response.status_code == 429:
+                    raise RateLimitExceededException(errorMessage)
+                raise HTTPException(status_code=500, detail=errorMessage)
+
+            responseJson = response.json()
+            embeddings = [item["embedding"] for item in responseJson["data"]]
+            usage = responseJson.get("usage", {})
+
+            return AiModelResponse(
+                content="",
+                success=True,
+                modelId=model.name,
+                tokensUsed={
+                    "input": usage.get("prompt_tokens", 0),
+                    "output": 0,
+                    "total": usage.get("total_tokens", 0),
+                },
+                metadata={"embeddings": embeddings},
+            )
+        except (RateLimitExceededException, HTTPException):
+            raise
+        except Exception as e:
+            logger.error(f"Error calling Private-LLM Embedding API: {str(e)}")
+            raise HTTPException(status_code=500, detail=f"Error calling Private-LLM Embedding API: {str(e)}")
+
     async def callAiVision(self, modelCall: AiModelCall) -> AiModelResponse:
         """
         Call the Private-LLM API for vision-based analysis.
diff --git a/modules/datamodels/datamodelKnowledge.py b/modules/datamodels/datamodelKnowledge.py
index ad837ab1..7816135a 100644
--- a/modules/datamodels/datamodelKnowledge.py
+++ b/modules/datamodels/datamodelKnowledge.py
@@ -9,7 +9,7 @@ These models support the 3-tier RAG architecture:
 - Global Layer: scope=global (sysAdmin only)
 - Workflow Layer: workflowId-scoped (WorkflowMemory)
 
-Vector fields use json_schema_extra={"db_type": "vector(1536)"} for pgvector.
+Vector fields use json_schema_extra with db_type=vector(KNOWLEDGE_EMBEDDING_DIMENSIONS) for pgvector.
 """
 
 from typing import Dict, Any, List, Optional
@@ -19,6 +19,8 @@ from modules.shared.i18nRegistry import i18nModel
 from modules.shared.timeUtils import getUtcTimestamp
 import uuid
 
+KNOWLEDGE_EMBEDDING_DIMENSIONS = 1024
+
 
 @i18nModel("Datei-Inhaltsindex")
 class FileContentIndex(PowerOnModel):
@@ -163,7 +165,7 @@ class ContentChunk(PowerOnModel):
     embedding: Optional[List[float]] = Field(
         default=None,
         description="pgvector embedding (NOT NULL for text chunks)",
-        json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"},
+        json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"},
     )
 
 
@@ -210,7 +212,7 @@ class RoundMemory(PowerOnModel):
     embedding: Optional[List[float]] = Field(
         default=None,
         description="Embedding of summary for semantic retrieval",
-        json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"},
+        json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"},
     )
 
 
@@ -251,5 +253,5 @@ class WorkflowMemory(PowerOnModel):
     embedding: Optional[List[float]] = Field(
         default=None,
         description="Optional embedding for semantic lookup",
-        json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"},
+        json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"},
     )
diff --git a/modules/features/trustee/routeFeatureTrustee.py b/modules/features/trustee/routeFeatureTrustee.py
index 8b5ba94a..dbc96013 100644
--- a/modules/features/trustee/routeFeatureTrustee.py
+++ b/modules/features/trustee/routeFeatureTrustee.py
@@ -81,6 +81,13 @@ def _parsePagination(pagination: Optional[str]) -> Optional[PaginationParams]:
     return None
 
 
+def _recordField(record, field: str, default=None):
+    """Read a field from a DB record dict or model instance."""
+    if isinstance(record, dict):
+        return record.get(field, default)
+    return getattr(record, field, default)
+
+
 def _validateInstanceAccess(instanceId: str, context: RequestContext) -> str:
     """
     Validate that the user has access to the feature instance.
@@ -365,7 +372,7 @@ def get_contract_options(
         result = interface.getAllContracts(None)
         items = result.items if hasattr(result, 'items') else result
     
-    return [{"value": c.id, "label": c.label or c.name or c.id} for c in items]
+    return [{"value": _recordField(c, "id"), "label": _recordField(c, "label") or _recordField(c, "name") or _recordField(c, "id")} for c in items]
 
 
 @router.get("/{instanceId}/documents/options", response_model=List[Dict[str, Any]])
@@ -381,7 +388,14 @@ def get_document_options(
     result = interface.getAllDocuments(None)
     items = result.items if hasattr(result, 'items') else result
     # Include 'id' for FK resolution in tables
-    return [{"id": d.id, "value": d.id, "label": d.documentName or d.id} for d in items]
+    return [
+        {
+            "id": _recordField(d, "id"),
+            "value": _recordField(d, "id"),
+            "label": _recordField(d, "documentName") or _recordField(d, "id"),
+        }
+        for d in items
+    ]
 
 
 @router.get("/{instanceId}/positions/options", response_model=List[Dict[str, Any]])
@@ -397,18 +411,21 @@ def get_position_options(
     result = interface.getAllPositions(None)
     items = result.items if hasattr(result, 'items') else result
     
-    def _makePositionLabel(p: TrusteePosition) -> str:
+    def _makePositionLabel(p) -> str:
         parts = []
-        if p.valuta:
-            parts.append(datetime.fromtimestamp(p.valuta, tz=timezone.utc).strftime("%Y-%m-%d"))
-        if p.company:
-            parts.append(p.company[:30])
-        if p.desc:
-            parts.append(p.desc[:30])
-        return " - ".join(parts) if parts else p.id
+        valuta = _recordField(p, "valuta")
+        if valuta:
+            parts.append(datetime.fromtimestamp(valuta, tz=timezone.utc).strftime("%Y-%m-%d"))
+        company = _recordField(p, "company")
+        if company:
+            parts.append(str(company)[:30])
+        desc = _recordField(p, "desc")
+        if desc:
+            parts.append(str(desc)[:30])
+        return " - ".join(parts) if parts else _recordField(p, "id")
     
     # Include 'id' for FK resolution in tables
-    return [{"id": p.id, "value": p.id, "label": _makePositionLabel(p)} for p in items]
+    return [{"id": _recordField(p, "id"), "value": _recordField(p, "id"), "label": _makePositionLabel(p)} for p in items]
 
 
 # ============================================================================
diff --git a/modules/interfaces/interfaceDbKnowledge.py b/modules/interfaces/interfaceDbKnowledge.py
index c52d999e..9c5a9bd3 100644
--- a/modules/interfaces/interfaceDbKnowledge.py
+++ b/modules/interfaces/interfaceDbKnowledge.py
@@ -732,3 +732,62 @@ def getInterface(currentUser: Optional[User] = None) -> KnowledgeObjects:
         interface.setUserContext(currentUser)
 
     return interface
+
+
+def migrateVectorDimensions():
+    """Idempotent boot migration: ensures all vector columns match KNOWLEDGE_EMBEDDING_DIMENSIONS.
+
+    Checks the actual pgvector dimension via pg_attribute.atttypmod.
+    If it differs from the target, nulls existing embeddings and alters the column type.
+    Safe to call on every startup — skips when dimensions already match or table doesn't exist.
+    """
+    from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS
+    targetDim = KNOWLEDGE_EMBEDDING_DIMENSIONS
+
+    interface = getInterface()
+    db = interface.db
+
+    vectorTables = [
+        ("ContentChunk", "embedding"),
+        ("RoundMemory", "embedding"),
+        ("WorkflowMemory", "embedding"),
+    ]
+
+    for table, col in vectorTables:
+        try:
+            with db.borrowConn() as conn:
+                with conn.cursor() as cursor:
+                    cursor.execute(
+                        "SELECT COUNT(*) FROM information_schema.tables "
+                        "WHERE LOWER(table_name) = LOWER(%s) AND table_schema = 'public'",
+                        (table,),
+                    )
+                    if cursor.fetchone()["count"] == 0:
+                        continue
+
+                    cursor.execute(
+                        "SELECT a.atttypmod FROM pg_attribute a "
+                        "JOIN pg_class c ON a.attrelid = c.oid "
+                        "JOIN pg_namespace n ON c.relnamespace = n.oid "
+                        "WHERE c.relname = %s AND a.attname = %s AND n.nspname = 'public'",
+                        (table, col),
+                    )
+                    row = cursor.fetchone()
+                    if not row:
+                        continue
+
+                    currentDim = row["atttypmod"]
+                    if currentDim == targetDim:
+                        continue
+
+                    logger.info(
+                        "Migrating %s.%s from vector(%s) to vector(%s) — clearing existing embeddings",
+                        table, col, currentDim, targetDim,
+                    )
+                    cursor.execute(f'UPDATE "{table}" SET "{col}" = NULL WHERE "{col}" IS NOT NULL')
+                    cursor.execute(
+                        f'ALTER TABLE "{table}" ALTER COLUMN "{col}" TYPE vector({targetDim})'
+                    )
+                    logger.info("Migration of %s.%s completed", table, col)
+        except Exception as e:
+            logger.error("Vector dimension migration failed for %s.%s: %s", table, col, e)
diff --git a/modules/routes/routeDataFiles.py b/modules/routes/routeDataFiles.py
index 63189e53..41625d26 100644
--- a/modules/routes/routeDataFiles.py
+++ b/modules/routes/routeDataFiles.py
@@ -700,6 +700,7 @@ def get_files(
     pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
     mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
     column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
+    owner: Optional[str] = Query(None, description="'me' for own files, 'shared' for files from others"),
     currentUser: User = Depends(getCurrentUser),
     context: RequestContext = Depends(getRequestContext)
 ):
@@ -756,6 +757,21 @@ def get_files(
         def _filesToDicts(fileItems):
             return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems]
 
+        ownerRecordFilter = None
+        ownerExcludeOwnFiles = False
+        ownerNorm = (owner or "").strip().lower()
+        if ownerNorm == "me":
+            ownerRecordFilter = {"sysCreatedBy": managementInterface.userId}
+        elif ownerNorm == "shared":
+            ownerExcludeOwnFiles = True
+
+        def _applyOwnerFilter(items):
+            """Post-filter for owner=shared: exclude files created by current user."""
+            if not ownerExcludeOwnFiles:
+                return items
+            uid = managementInterface.userId
+            return [f for f in items if (f.get("sysCreatedBy") if isinstance(f, dict) else getattr(f, "sysCreatedBy", None)) != uid]
+
         if mode == "groupSummary":
             if not pagination:
                 raise HTTPException(status_code=400, detail="pagination required for groupSummary")
@@ -794,10 +810,12 @@ def get_files(
             return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
 
         if not groupByLevels:
-            # No grouping: let DB handle pagination directly (fastest path)
-            result = managementInterface.getAllFiles(pagination=paginationParams)
+            result = managementInterface.getAllFiles(
+                pagination=paginationParams,
+                recordFilter=ownerRecordFilter,
+            )
             if paginationParams and hasattr(result, 'items'):
-                enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem, db=appInterface.db)
+                enriched = _applyOwnerFilter(enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem, db=appInterface.db))
                 resp: dict = {
                     "items": enriched,
                     "pagination": PaginationMetadata(
@@ -811,7 +829,8 @@ def get_files(
                 }
             else:
                 items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
-                resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem, db=appInterface.db), "pagination": None}
+                enriched = _applyOwnerFilter(enrichRowsWithFkLabels(_filesToDicts(items), FileItem, db=appInterface.db))
+                resp = {"items": enriched, "pagination": None}
             if viewMeta:
                 resp["appliedView"] = viewMeta.model_dump()
             return resp