model fixes
All checks were successful
Deploy Plattform-Core (Int) / test (push) Successful in 59s
Deploy Plattform-Core (Int) / deploy (push) Successful in 10s

This commit is contained in:
ValueOn AG 2026-06-11 16:47:21 +02:00
parent 7061dd4303
commit 535bd43174
8 changed files with 196 additions and 48 deletions

30
app.py
View file

@ -176,6 +176,20 @@ def initLogging():
pass
return True
# Suppress h11 LocalProtocolError ("Can't send data when our state is ERROR")
# from uvicorn when a client disconnects mid-response (browser abort, HMR, navigation).
# The asyncio event-loop handler (below) only catches event-loop-level exceptions;
# uvicorn logs this via the standard logging module before it reaches the event loop.
class ClientDisconnectFilter(logging.Filter):
def filter(self, record):
if record.exc_info:
excType = record.exc_info[0]
if excType and getattr(excType, "__name__", "") == "LocalProtocolError":
return False
if isinstance(record.msg, str) and "LocalProtocolError" in record.msg:
return False
return True
# Add filter to normalize problematic unicode (e.g., arrows) to ASCII for terminals like cp1252
class UnicodeArrowFilter(logging.Filter):
def filter(self, record):
@ -204,6 +218,7 @@ def initLogging():
consoleHandler.addFilter(ChromeDevToolsFilter())
consoleHandler.addFilter(HttpcoreStarFilter())
consoleHandler.addFilter(HTTPDebugFilter())
consoleHandler.addFilter(ClientDisconnectFilter())
consoleHandler.addFilter(EmojiFilter())
consoleHandler.addFilter(UnicodeArrowFilter())
handlers.append(consoleHandler)
@ -227,6 +242,7 @@ def initLogging():
fileHandler.addFilter(ChromeDevToolsFilter())
fileHandler.addFilter(HttpcoreStarFilter())
fileHandler.addFilter(HTTPDebugFilter())
fileHandler.addFilter(ClientDisconnectFilter())
fileHandler.addFilter(EmojiFilter())
fileHandler.addFilter(UnicodeArrowFilter())
handlers.append(fileHandler)
@ -255,6 +271,12 @@ def initLogging():
for loggerName in noisyLoggers:
logging.getLogger(loggerName).setLevel(logging.WARNING)
# Apply ClientDisconnectFilter to uvicorn's own logger so the
# h11 LocalProtocolError is suppressed regardless of handler setup.
_disconnectFilter = ClientDisconnectFilter()
for _uvName in ("uvicorn.error", "uvicorn"):
logging.getLogger(_uvName).addFilter(_disconnectFilter)
# Log the current logging configuration
logger = logging.getLogger(__name__)
logger.info(f"Logging initialized with level {logLevelName}")
@ -347,6 +369,14 @@ async def lifespan(app: FastAPI):
except Exception as e:
logger.warning(f"Bootstrap check failed (may already be initialized): {str(e)}")
# Migrate vector column dimensions (idempotent — safe on every startup)
try:
from modules.interfaces.interfaceDbKnowledge import migrateVectorDimensions
migrateVectorDimensions()
logger.info("Vector dimension migration check completed")
except Exception as e:
logger.warning(f"Vector dimension migration failed (non-critical): {e}")
# Register all feature definitions in RBAC catalog (for /api/features/ endpoint)
try:
from modules.security.rbacCatalog import getCatalogService

View file

@ -343,7 +343,8 @@ class AiMistral(BaseConnectorAi):
content="", success=False, error="No embeddingInput provided"
)
payload = {"model": model.name, "input": texts}
from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS
payload = {"model": model.name, "input": texts, "output_dimension": KNOWLEDGE_EMBEDDING_DIMENSIONS}
response = await self.httpClient.post(model.apiUrl, json=payload)
if response.status_code != 200:

View file

@ -297,27 +297,6 @@ class AiOpenai(BaseConnectorAi):
version="text-embedding-3-small",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00002
),
AiModel(
name="text-embedding-3-large",
displayName="OpenAI Embedding Large",
connectorType="openai",
apiUrl="https://api.openai.com/v1/embeddings",
temperature=0.0,
maxTokens=0,
contextLength=8191,
costPer1kTokensInput=0.00013, # $0.13/M tokens
costPer1kTokensOutput=0.0,
speedRating=9,
qualityRating=10,
functionCall=self.callEmbedding,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.ADVANCED,
operationTypes=createOperationTypeRatings(
(OperationTypeEnum.EMBEDDING, 10)
),
version="text-embedding-3-large",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00013
),
AiModel(
name="gpt-image-1",
displayName="OpenAI GPT Image",
@ -547,7 +526,8 @@ class AiOpenai(BaseConnectorAi):
content="", success=False, error="No embeddingInput provided"
)
payload = {"model": model.name, "input": texts}
from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS
payload = {"model": model.name, "input": texts, "dimensions": KNOWLEDGE_EMBEDDING_DIMENSIONS}
response = await self.httpClient.post(model.apiUrl, json=payload)
if response.status_code != 200:

View file

@ -14,7 +14,7 @@ Models (current — L4 24 GB):
Models (next-gen RTX PRO 6000 96 GB, auto-activated when pulled in Ollama):
- poweron-text-reasoning: Reasoning (deepseek-r1:70b); complex logic, math, planning
- poweron-vision-general: Vision (llama4:scout); multimodal, long-context documents
- poweron-embed: Embedding (nomic-embed-text); local RAG embedding
- poweron-embed: Embedding (mxbai-embed-large); local RAG embedding (1024 dim)
Pricing: byte-based (~per-token via bytes/4), configured via the PRICE_* constants below.
"""
@ -377,7 +377,7 @@ class AiPrivateLlm(BaseConnectorAi):
),
"ollamaModel": "llama4:scout"
},
# Local Embedding (nomic-embed-text — replaces OpenAI text-embedding-3-small)
# Local Embedding (mxbai-embed-large — nativ 1024 dim, MTEB 64.68)
{
"model": AiModel(
name="poweron-embed",
@ -386,21 +386,21 @@ class AiPrivateLlm(BaseConnectorAi):
apiUrl=f"{self.baseUrl}/v1/embeddings",
temperature=0.0,
maxTokens=0,
contextLength=8192,
contextLength=512,
costPer1kTokensInput=PRICE_EMBED_PER_1K,
costPer1kTokensOutput=0.0,
speedRating=10,
qualityRating=8,
functionCall=self.callAiText,
functionCall=self.callEmbedding,
priority=PriorityEnum.COST,
processingMode=ProcessingModeEnum.BASIC,
operationTypes=createOperationTypeRatings(
(OperationTypeEnum.EMBEDDING, 9),
),
version="nomic-embed-text",
version="mxbai-embed-large",
calculatepriceCHF=_calcPrivateEmbedPriceCHF
),
"ollamaModel": "nomic-embed-text"
"ollamaModel": "mxbai-embed-large"
},
]
@ -505,6 +505,46 @@ class AiPrivateLlm(BaseConnectorAi):
logger.error(f"Error calling Private-LLM text API: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error calling Private-LLM API: {str(e)}")
async def callEmbedding(self, modelCall: AiModelCall) -> AiModelResponse:
"""Generate embeddings via the Private-LLM Embedding endpoint (OpenAI-compatible)."""
try:
model = modelCall.model
texts = modelCall.embeddingInput or []
if not texts:
return AiModelResponse(
content="", success=False, error="No embeddingInput provided"
)
payload = {"model": model.version, "input": texts}
response = await self.httpClient.post(model.apiUrl, json=payload)
if response.status_code != 200:
errorMessage = f"Private-LLM Embedding API error: {response.status_code} - {response.text}"
if response.status_code == 429:
raise RateLimitExceededException(errorMessage)
raise HTTPException(status_code=500, detail=errorMessage)
responseJson = response.json()
embeddings = [item["embedding"] for item in responseJson["data"]]
usage = responseJson.get("usage", {})
return AiModelResponse(
content="",
success=True,
modelId=model.name,
tokensUsed={
"input": usage.get("prompt_tokens", 0),
"output": 0,
"total": usage.get("total_tokens", 0),
},
metadata={"embeddings": embeddings},
)
except (RateLimitExceededException, HTTPException):
raise
except Exception as e:
logger.error(f"Error calling Private-LLM Embedding API: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error calling Private-LLM Embedding API: {str(e)}")
async def callAiVision(self, modelCall: AiModelCall) -> AiModelResponse:
"""
Call the Private-LLM API for vision-based analysis.

View file

@ -9,7 +9,7 @@ These models support the 3-tier RAG architecture:
- Global Layer: scope=global (sysAdmin only)
- Workflow Layer: workflowId-scoped (WorkflowMemory)
Vector fields use json_schema_extra={"db_type": "vector(1536)"} for pgvector.
Vector fields use json_schema_extra with db_type=vector(KNOWLEDGE_EMBEDDING_DIMENSIONS) for pgvector.
"""
from typing import Dict, Any, List, Optional
@ -19,6 +19,8 @@ from modules.shared.i18nRegistry import i18nModel
from modules.shared.timeUtils import getUtcTimestamp
import uuid
KNOWLEDGE_EMBEDDING_DIMENSIONS = 1024
@i18nModel("Datei-Inhaltsindex")
class FileContentIndex(PowerOnModel):
@ -163,7 +165,7 @@ class ContentChunk(PowerOnModel):
embedding: Optional[List[float]] = Field(
default=None,
description="pgvector embedding (NOT NULL for text chunks)",
json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"},
json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"},
)
@ -210,7 +212,7 @@ class RoundMemory(PowerOnModel):
embedding: Optional[List[float]] = Field(
default=None,
description="Embedding of summary for semantic retrieval",
json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"},
json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"},
)
@ -251,5 +253,5 @@ class WorkflowMemory(PowerOnModel):
embedding: Optional[List[float]] = Field(
default=None,
description="Optional embedding for semantic lookup",
json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"},
json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"},
)

View file

@ -81,6 +81,13 @@ def _parsePagination(pagination: Optional[str]) -> Optional[PaginationParams]:
return None
def _recordField(record, field: str, default=None):
"""Read a field from a DB record dict or model instance."""
if isinstance(record, dict):
return record.get(field, default)
return getattr(record, field, default)
def _validateInstanceAccess(instanceId: str, context: RequestContext) -> str:
"""
Validate that the user has access to the feature instance.
@ -365,7 +372,7 @@ def get_contract_options(
result = interface.getAllContracts(None)
items = result.items if hasattr(result, 'items') else result
return [{"value": c.id, "label": c.label or c.name or c.id} for c in items]
return [{"value": _recordField(c, "id"), "label": _recordField(c, "label") or _recordField(c, "name") or _recordField(c, "id")} for c in items]
@router.get("/{instanceId}/documents/options", response_model=List[Dict[str, Any]])
@ -381,7 +388,14 @@ def get_document_options(
result = interface.getAllDocuments(None)
items = result.items if hasattr(result, 'items') else result
# Include 'id' for FK resolution in tables
return [{"id": d.id, "value": d.id, "label": d.documentName or d.id} for d in items]
return [
{
"id": _recordField(d, "id"),
"value": _recordField(d, "id"),
"label": _recordField(d, "documentName") or _recordField(d, "id"),
}
for d in items
]
@router.get("/{instanceId}/positions/options", response_model=List[Dict[str, Any]])
@ -397,18 +411,21 @@ def get_position_options(
result = interface.getAllPositions(None)
items = result.items if hasattr(result, 'items') else result
def _makePositionLabel(p: TrusteePosition) -> str:
def _makePositionLabel(p) -> str:
parts = []
if p.valuta:
parts.append(datetime.fromtimestamp(p.valuta, tz=timezone.utc).strftime("%Y-%m-%d"))
if p.company:
parts.append(p.company[:30])
if p.desc:
parts.append(p.desc[:30])
return " - ".join(parts) if parts else p.id
valuta = _recordField(p, "valuta")
if valuta:
parts.append(datetime.fromtimestamp(valuta, tz=timezone.utc).strftime("%Y-%m-%d"))
company = _recordField(p, "company")
if company:
parts.append(str(company)[:30])
desc = _recordField(p, "desc")
if desc:
parts.append(str(desc)[:30])
return " - ".join(parts) if parts else _recordField(p, "id")
# Include 'id' for FK resolution in tables
return [{"id": p.id, "value": p.id, "label": _makePositionLabel(p)} for p in items]
return [{"id": _recordField(p, "id"), "value": _recordField(p, "id"), "label": _makePositionLabel(p)} for p in items]
# ============================================================================

View file

@ -732,3 +732,62 @@ def getInterface(currentUser: Optional[User] = None) -> KnowledgeObjects:
interface.setUserContext(currentUser)
return interface
def migrateVectorDimensions():
"""Idempotent boot migration: ensures all vector columns match KNOWLEDGE_EMBEDDING_DIMENSIONS.
Checks the actual pgvector dimension via pg_attribute.atttypmod.
If it differs from the target, nulls existing embeddings and alters the column type.
Safe to call on every startup skips when dimensions already match or table doesn't exist.
"""
from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS
targetDim = KNOWLEDGE_EMBEDDING_DIMENSIONS
interface = getInterface()
db = interface.db
vectorTables = [
("ContentChunk", "embedding"),
("RoundMemory", "embedding"),
("WorkflowMemory", "embedding"),
]
for table, col in vectorTables:
try:
with db.borrowConn() as conn:
with conn.cursor() as cursor:
cursor.execute(
"SELECT COUNT(*) FROM information_schema.tables "
"WHERE LOWER(table_name) = LOWER(%s) AND table_schema = 'public'",
(table,),
)
if cursor.fetchone()["count"] == 0:
continue
cursor.execute(
"SELECT a.atttypmod FROM pg_attribute a "
"JOIN pg_class c ON a.attrelid = c.oid "
"JOIN pg_namespace n ON c.relnamespace = n.oid "
"WHERE c.relname = %s AND a.attname = %s AND n.nspname = 'public'",
(table, col),
)
row = cursor.fetchone()
if not row:
continue
currentDim = row["atttypmod"]
if currentDim == targetDim:
continue
logger.info(
"Migrating %s.%s from vector(%s) to vector(%s) — clearing existing embeddings",
table, col, currentDim, targetDim,
)
cursor.execute(f'UPDATE "{table}" SET "{col}" = NULL WHERE "{col}" IS NOT NULL')
cursor.execute(
f'ALTER TABLE "{table}" ALTER COLUMN "{col}" TYPE vector({targetDim})'
)
logger.info("Migration of %s.%s completed", table, col)
except Exception as e:
logger.error("Vector dimension migration failed for %s.%s: %s", table, col, e)

View file

@ -700,6 +700,7 @@ def get_files(
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
owner: Optional[str] = Query(None, description="'me' for own files, 'shared' for files from others"),
currentUser: User = Depends(getCurrentUser),
context: RequestContext = Depends(getRequestContext)
):
@ -756,6 +757,21 @@ def get_files(
def _filesToDicts(fileItems):
return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems]
ownerRecordFilter = None
ownerExcludeOwnFiles = False
ownerNorm = (owner or "").strip().lower()
if ownerNorm == "me":
ownerRecordFilter = {"sysCreatedBy": managementInterface.userId}
elif ownerNorm == "shared":
ownerExcludeOwnFiles = True
def _applyOwnerFilter(items):
"""Post-filter for owner=shared: exclude files created by current user."""
if not ownerExcludeOwnFiles:
return items
uid = managementInterface.userId
return [f for f in items if (f.get("sysCreatedBy") if isinstance(f, dict) else getattr(f, "sysCreatedBy", None)) != uid]
if mode == "groupSummary":
if not pagination:
raise HTTPException(status_code=400, detail="pagination required for groupSummary")
@ -794,10 +810,12 @@ def get_files(
return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
if not groupByLevels:
# No grouping: let DB handle pagination directly (fastest path)
result = managementInterface.getAllFiles(pagination=paginationParams)
result = managementInterface.getAllFiles(
pagination=paginationParams,
recordFilter=ownerRecordFilter,
)
if paginationParams and hasattr(result, 'items'):
enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem, db=appInterface.db)
enriched = _applyOwnerFilter(enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem, db=appInterface.db))
resp: dict = {
"items": enriched,
"pagination": PaginationMetadata(
@ -811,7 +829,8 @@ def get_files(
}
else:
items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem, db=appInterface.db), "pagination": None}
enriched = _applyOwnerFilter(enrichRowsWithFkLabels(_filesToDicts(items), FileItem, db=appInterface.db))
resp = {"items": enriched, "pagination": None}
if viewMeta:
resp["appliedView"] = viewMeta.model_dump()
return resp