model fixes
This commit is contained in:
parent
7061dd4303
commit
535bd43174
8 changed files with 196 additions and 48 deletions
30
app.py
30
app.py
|
|
@ -176,6 +176,20 @@ def initLogging():
|
|||
pass
|
||||
return True
|
||||
|
||||
# Suppress h11 LocalProtocolError ("Can't send data when our state is ERROR")
|
||||
# from uvicorn when a client disconnects mid-response (browser abort, HMR, navigation).
|
||||
# The asyncio event-loop handler (below) only catches event-loop-level exceptions;
|
||||
# uvicorn logs this via the standard logging module before it reaches the event loop.
|
||||
class ClientDisconnectFilter(logging.Filter):
|
||||
def filter(self, record):
|
||||
if record.exc_info:
|
||||
excType = record.exc_info[0]
|
||||
if excType and getattr(excType, "__name__", "") == "LocalProtocolError":
|
||||
return False
|
||||
if isinstance(record.msg, str) and "LocalProtocolError" in record.msg:
|
||||
return False
|
||||
return True
|
||||
|
||||
# Add filter to normalize problematic unicode (e.g., arrows) to ASCII for terminals like cp1252
|
||||
class UnicodeArrowFilter(logging.Filter):
|
||||
def filter(self, record):
|
||||
|
|
@ -204,6 +218,7 @@ def initLogging():
|
|||
consoleHandler.addFilter(ChromeDevToolsFilter())
|
||||
consoleHandler.addFilter(HttpcoreStarFilter())
|
||||
consoleHandler.addFilter(HTTPDebugFilter())
|
||||
consoleHandler.addFilter(ClientDisconnectFilter())
|
||||
consoleHandler.addFilter(EmojiFilter())
|
||||
consoleHandler.addFilter(UnicodeArrowFilter())
|
||||
handlers.append(consoleHandler)
|
||||
|
|
@ -227,6 +242,7 @@ def initLogging():
|
|||
fileHandler.addFilter(ChromeDevToolsFilter())
|
||||
fileHandler.addFilter(HttpcoreStarFilter())
|
||||
fileHandler.addFilter(HTTPDebugFilter())
|
||||
fileHandler.addFilter(ClientDisconnectFilter())
|
||||
fileHandler.addFilter(EmojiFilter())
|
||||
fileHandler.addFilter(UnicodeArrowFilter())
|
||||
handlers.append(fileHandler)
|
||||
|
|
@ -255,6 +271,12 @@ def initLogging():
|
|||
for loggerName in noisyLoggers:
|
||||
logging.getLogger(loggerName).setLevel(logging.WARNING)
|
||||
|
||||
# Apply ClientDisconnectFilter to uvicorn's own logger so the
|
||||
# h11 LocalProtocolError is suppressed regardless of handler setup.
|
||||
_disconnectFilter = ClientDisconnectFilter()
|
||||
for _uvName in ("uvicorn.error", "uvicorn"):
|
||||
logging.getLogger(_uvName).addFilter(_disconnectFilter)
|
||||
|
||||
# Log the current logging configuration
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info(f"Logging initialized with level {logLevelName}")
|
||||
|
|
@ -347,6 +369,14 @@ async def lifespan(app: FastAPI):
|
|||
except Exception as e:
|
||||
logger.warning(f"Bootstrap check failed (may already be initialized): {str(e)}")
|
||||
|
||||
# Migrate vector column dimensions (idempotent — safe on every startup)
|
||||
try:
|
||||
from modules.interfaces.interfaceDbKnowledge import migrateVectorDimensions
|
||||
migrateVectorDimensions()
|
||||
logger.info("Vector dimension migration check completed")
|
||||
except Exception as e:
|
||||
logger.warning(f"Vector dimension migration failed (non-critical): {e}")
|
||||
|
||||
# Register all feature definitions in RBAC catalog (for /api/features/ endpoint)
|
||||
try:
|
||||
from modules.security.rbacCatalog import getCatalogService
|
||||
|
|
|
|||
|
|
@ -343,7 +343,8 @@ class AiMistral(BaseConnectorAi):
|
|||
content="", success=False, error="No embeddingInput provided"
|
||||
)
|
||||
|
||||
payload = {"model": model.name, "input": texts}
|
||||
from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS
|
||||
payload = {"model": model.name, "input": texts, "output_dimension": KNOWLEDGE_EMBEDDING_DIMENSIONS}
|
||||
response = await self.httpClient.post(model.apiUrl, json=payload)
|
||||
|
||||
if response.status_code != 200:
|
||||
|
|
|
|||
|
|
@ -297,27 +297,6 @@ class AiOpenai(BaseConnectorAi):
|
|||
version="text-embedding-3-small",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00002
|
||||
),
|
||||
AiModel(
|
||||
name="text-embedding-3-large",
|
||||
displayName="OpenAI Embedding Large",
|
||||
connectorType="openai",
|
||||
apiUrl="https://api.openai.com/v1/embeddings",
|
||||
temperature=0.0,
|
||||
maxTokens=0,
|
||||
contextLength=8191,
|
||||
costPer1kTokensInput=0.00013, # $0.13/M tokens
|
||||
costPer1kTokensOutput=0.0,
|
||||
speedRating=9,
|
||||
qualityRating=10,
|
||||
functionCall=self.callEmbedding,
|
||||
priority=PriorityEnum.QUALITY,
|
||||
processingMode=ProcessingModeEnum.ADVANCED,
|
||||
operationTypes=createOperationTypeRatings(
|
||||
(OperationTypeEnum.EMBEDDING, 10)
|
||||
),
|
||||
version="text-embedding-3-large",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00013
|
||||
),
|
||||
AiModel(
|
||||
name="gpt-image-1",
|
||||
displayName="OpenAI GPT Image",
|
||||
|
|
@ -547,7 +526,8 @@ class AiOpenai(BaseConnectorAi):
|
|||
content="", success=False, error="No embeddingInput provided"
|
||||
)
|
||||
|
||||
payload = {"model": model.name, "input": texts}
|
||||
from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS
|
||||
payload = {"model": model.name, "input": texts, "dimensions": KNOWLEDGE_EMBEDDING_DIMENSIONS}
|
||||
response = await self.httpClient.post(model.apiUrl, json=payload)
|
||||
|
||||
if response.status_code != 200:
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ Models (current — L4 24 GB):
|
|||
Models (next-gen — RTX PRO 6000 96 GB, auto-activated when pulled in Ollama):
|
||||
- poweron-text-reasoning: Reasoning (deepseek-r1:70b); complex logic, math, planning
|
||||
- poweron-vision-general: Vision (llama4:scout); multimodal, long-context documents
|
||||
- poweron-embed: Embedding (nomic-embed-text); local RAG embedding
|
||||
- poweron-embed: Embedding (mxbai-embed-large); local RAG embedding (1024 dim)
|
||||
|
||||
Pricing: byte-based (~per-token via bytes/4), configured via the PRICE_* constants below.
|
||||
"""
|
||||
|
|
@ -377,7 +377,7 @@ class AiPrivateLlm(BaseConnectorAi):
|
|||
),
|
||||
"ollamaModel": "llama4:scout"
|
||||
},
|
||||
# Local Embedding (nomic-embed-text — replaces OpenAI text-embedding-3-small)
|
||||
# Local Embedding (mxbai-embed-large — nativ 1024 dim, MTEB 64.68)
|
||||
{
|
||||
"model": AiModel(
|
||||
name="poweron-embed",
|
||||
|
|
@ -386,21 +386,21 @@ class AiPrivateLlm(BaseConnectorAi):
|
|||
apiUrl=f"{self.baseUrl}/v1/embeddings",
|
||||
temperature=0.0,
|
||||
maxTokens=0,
|
||||
contextLength=8192,
|
||||
contextLength=512,
|
||||
costPer1kTokensInput=PRICE_EMBED_PER_1K,
|
||||
costPer1kTokensOutput=0.0,
|
||||
speedRating=10,
|
||||
qualityRating=8,
|
||||
functionCall=self.callAiText,
|
||||
functionCall=self.callEmbedding,
|
||||
priority=PriorityEnum.COST,
|
||||
processingMode=ProcessingModeEnum.BASIC,
|
||||
operationTypes=createOperationTypeRatings(
|
||||
(OperationTypeEnum.EMBEDDING, 9),
|
||||
),
|
||||
version="nomic-embed-text",
|
||||
version="mxbai-embed-large",
|
||||
calculatepriceCHF=_calcPrivateEmbedPriceCHF
|
||||
),
|
||||
"ollamaModel": "nomic-embed-text"
|
||||
"ollamaModel": "mxbai-embed-large"
|
||||
},
|
||||
]
|
||||
|
||||
|
|
@ -505,6 +505,46 @@ class AiPrivateLlm(BaseConnectorAi):
|
|||
logger.error(f"Error calling Private-LLM text API: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error calling Private-LLM API: {str(e)}")
|
||||
|
||||
async def callEmbedding(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||
"""Generate embeddings via the Private-LLM Embedding endpoint (OpenAI-compatible)."""
|
||||
try:
|
||||
model = modelCall.model
|
||||
texts = modelCall.embeddingInput or []
|
||||
if not texts:
|
||||
return AiModelResponse(
|
||||
content="", success=False, error="No embeddingInput provided"
|
||||
)
|
||||
|
||||
payload = {"model": model.version, "input": texts}
|
||||
response = await self.httpClient.post(model.apiUrl, json=payload)
|
||||
|
||||
if response.status_code != 200:
|
||||
errorMessage = f"Private-LLM Embedding API error: {response.status_code} - {response.text}"
|
||||
if response.status_code == 429:
|
||||
raise RateLimitExceededException(errorMessage)
|
||||
raise HTTPException(status_code=500, detail=errorMessage)
|
||||
|
||||
responseJson = response.json()
|
||||
embeddings = [item["embedding"] for item in responseJson["data"]]
|
||||
usage = responseJson.get("usage", {})
|
||||
|
||||
return AiModelResponse(
|
||||
content="",
|
||||
success=True,
|
||||
modelId=model.name,
|
||||
tokensUsed={
|
||||
"input": usage.get("prompt_tokens", 0),
|
||||
"output": 0,
|
||||
"total": usage.get("total_tokens", 0),
|
||||
},
|
||||
metadata={"embeddings": embeddings},
|
||||
)
|
||||
except (RateLimitExceededException, HTTPException):
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error calling Private-LLM Embedding API: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error calling Private-LLM Embedding API: {str(e)}")
|
||||
|
||||
async def callAiVision(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||
"""
|
||||
Call the Private-LLM API for vision-based analysis.
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ These models support the 3-tier RAG architecture:
|
|||
- Global Layer: scope=global (sysAdmin only)
|
||||
- Workflow Layer: workflowId-scoped (WorkflowMemory)
|
||||
|
||||
Vector fields use json_schema_extra={"db_type": "vector(1536)"} for pgvector.
|
||||
Vector fields use json_schema_extra with db_type=vector(KNOWLEDGE_EMBEDDING_DIMENSIONS) for pgvector.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
|
@ -19,6 +19,8 @@ from modules.shared.i18nRegistry import i18nModel
|
|||
from modules.shared.timeUtils import getUtcTimestamp
|
||||
import uuid
|
||||
|
||||
KNOWLEDGE_EMBEDDING_DIMENSIONS = 1024
|
||||
|
||||
|
||||
@i18nModel("Datei-Inhaltsindex")
|
||||
class FileContentIndex(PowerOnModel):
|
||||
|
|
@ -163,7 +165,7 @@ class ContentChunk(PowerOnModel):
|
|||
embedding: Optional[List[float]] = Field(
|
||||
default=None,
|
||||
description="pgvector embedding (NOT NULL for text chunks)",
|
||||
json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"},
|
||||
json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"},
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -210,7 +212,7 @@ class RoundMemory(PowerOnModel):
|
|||
embedding: Optional[List[float]] = Field(
|
||||
default=None,
|
||||
description="Embedding of summary for semantic retrieval",
|
||||
json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"},
|
||||
json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"},
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -251,5 +253,5 @@ class WorkflowMemory(PowerOnModel):
|
|||
embedding: Optional[List[float]] = Field(
|
||||
default=None,
|
||||
description="Optional embedding for semantic lookup",
|
||||
json_schema_extra={"label": "Embedding", "db_type": "vector(1536)"},
|
||||
json_schema_extra={"label": "Embedding", "db_type": f"vector({KNOWLEDGE_EMBEDDING_DIMENSIONS})"},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -81,6 +81,13 @@ def _parsePagination(pagination: Optional[str]) -> Optional[PaginationParams]:
|
|||
return None
|
||||
|
||||
|
||||
def _recordField(record, field: str, default=None):
|
||||
"""Read a field from a DB record dict or model instance."""
|
||||
if isinstance(record, dict):
|
||||
return record.get(field, default)
|
||||
return getattr(record, field, default)
|
||||
|
||||
|
||||
def _validateInstanceAccess(instanceId: str, context: RequestContext) -> str:
|
||||
"""
|
||||
Validate that the user has access to the feature instance.
|
||||
|
|
@ -365,7 +372,7 @@ def get_contract_options(
|
|||
result = interface.getAllContracts(None)
|
||||
items = result.items if hasattr(result, 'items') else result
|
||||
|
||||
return [{"value": c.id, "label": c.label or c.name or c.id} for c in items]
|
||||
return [{"value": _recordField(c, "id"), "label": _recordField(c, "label") or _recordField(c, "name") or _recordField(c, "id")} for c in items]
|
||||
|
||||
|
||||
@router.get("/{instanceId}/documents/options", response_model=List[Dict[str, Any]])
|
||||
|
|
@ -381,7 +388,14 @@ def get_document_options(
|
|||
result = interface.getAllDocuments(None)
|
||||
items = result.items if hasattr(result, 'items') else result
|
||||
# Include 'id' for FK resolution in tables
|
||||
return [{"id": d.id, "value": d.id, "label": d.documentName or d.id} for d in items]
|
||||
return [
|
||||
{
|
||||
"id": _recordField(d, "id"),
|
||||
"value": _recordField(d, "id"),
|
||||
"label": _recordField(d, "documentName") or _recordField(d, "id"),
|
||||
}
|
||||
for d in items
|
||||
]
|
||||
|
||||
|
||||
@router.get("/{instanceId}/positions/options", response_model=List[Dict[str, Any]])
|
||||
|
|
@ -397,18 +411,21 @@ def get_position_options(
|
|||
result = interface.getAllPositions(None)
|
||||
items = result.items if hasattr(result, 'items') else result
|
||||
|
||||
def _makePositionLabel(p: TrusteePosition) -> str:
|
||||
def _makePositionLabel(p) -> str:
|
||||
parts = []
|
||||
if p.valuta:
|
||||
parts.append(datetime.fromtimestamp(p.valuta, tz=timezone.utc).strftime("%Y-%m-%d"))
|
||||
if p.company:
|
||||
parts.append(p.company[:30])
|
||||
if p.desc:
|
||||
parts.append(p.desc[:30])
|
||||
return " - ".join(parts) if parts else p.id
|
||||
valuta = _recordField(p, "valuta")
|
||||
if valuta:
|
||||
parts.append(datetime.fromtimestamp(valuta, tz=timezone.utc).strftime("%Y-%m-%d"))
|
||||
company = _recordField(p, "company")
|
||||
if company:
|
||||
parts.append(str(company)[:30])
|
||||
desc = _recordField(p, "desc")
|
||||
if desc:
|
||||
parts.append(str(desc)[:30])
|
||||
return " - ".join(parts) if parts else _recordField(p, "id")
|
||||
|
||||
# Include 'id' for FK resolution in tables
|
||||
return [{"id": p.id, "value": p.id, "label": _makePositionLabel(p)} for p in items]
|
||||
return [{"id": _recordField(p, "id"), "value": _recordField(p, "id"), "label": _makePositionLabel(p)} for p in items]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
|
|
|||
|
|
@ -732,3 +732,62 @@ def getInterface(currentUser: Optional[User] = None) -> KnowledgeObjects:
|
|||
interface.setUserContext(currentUser)
|
||||
|
||||
return interface
|
||||
|
||||
|
||||
def migrateVectorDimensions():
|
||||
"""Idempotent boot migration: ensures all vector columns match KNOWLEDGE_EMBEDDING_DIMENSIONS.
|
||||
|
||||
Checks the actual pgvector dimension via pg_attribute.atttypmod.
|
||||
If it differs from the target, nulls existing embeddings and alters the column type.
|
||||
Safe to call on every startup — skips when dimensions already match or table doesn't exist.
|
||||
"""
|
||||
from modules.datamodels.datamodelKnowledge import KNOWLEDGE_EMBEDDING_DIMENSIONS
|
||||
targetDim = KNOWLEDGE_EMBEDDING_DIMENSIONS
|
||||
|
||||
interface = getInterface()
|
||||
db = interface.db
|
||||
|
||||
vectorTables = [
|
||||
("ContentChunk", "embedding"),
|
||||
("RoundMemory", "embedding"),
|
||||
("WorkflowMemory", "embedding"),
|
||||
]
|
||||
|
||||
for table, col in vectorTables:
|
||||
try:
|
||||
with db.borrowConn() as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"SELECT COUNT(*) FROM information_schema.tables "
|
||||
"WHERE LOWER(table_name) = LOWER(%s) AND table_schema = 'public'",
|
||||
(table,),
|
||||
)
|
||||
if cursor.fetchone()["count"] == 0:
|
||||
continue
|
||||
|
||||
cursor.execute(
|
||||
"SELECT a.atttypmod FROM pg_attribute a "
|
||||
"JOIN pg_class c ON a.attrelid = c.oid "
|
||||
"JOIN pg_namespace n ON c.relnamespace = n.oid "
|
||||
"WHERE c.relname = %s AND a.attname = %s AND n.nspname = 'public'",
|
||||
(table, col),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if not row:
|
||||
continue
|
||||
|
||||
currentDim = row["atttypmod"]
|
||||
if currentDim == targetDim:
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
"Migrating %s.%s from vector(%s) to vector(%s) — clearing existing embeddings",
|
||||
table, col, currentDim, targetDim,
|
||||
)
|
||||
cursor.execute(f'UPDATE "{table}" SET "{col}" = NULL WHERE "{col}" IS NOT NULL')
|
||||
cursor.execute(
|
||||
f'ALTER TABLE "{table}" ALTER COLUMN "{col}" TYPE vector({targetDim})'
|
||||
)
|
||||
logger.info("Migration of %s.%s completed", table, col)
|
||||
except Exception as e:
|
||||
logger.error("Vector dimension migration failed for %s.%s: %s", table, col, e)
|
||||
|
|
|
|||
|
|
@ -700,6 +700,7 @@ def get_files(
|
|||
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
|
||||
mode: Optional[str] = Query(None, description="'filterValues' for distinct column values, 'ids' for all filtered IDs"),
|
||||
column: Optional[str] = Query(None, description="Column key (required when mode=filterValues)"),
|
||||
owner: Optional[str] = Query(None, description="'me' for own files, 'shared' for files from others"),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
context: RequestContext = Depends(getRequestContext)
|
||||
):
|
||||
|
|
@ -756,6 +757,21 @@ def get_files(
|
|||
def _filesToDicts(fileItems):
|
||||
return [f.model_dump() if hasattr(f, "model_dump") else (dict(f) if not isinstance(f, dict) else f) for f in fileItems]
|
||||
|
||||
ownerRecordFilter = None
|
||||
ownerExcludeOwnFiles = False
|
||||
ownerNorm = (owner or "").strip().lower()
|
||||
if ownerNorm == "me":
|
||||
ownerRecordFilter = {"sysCreatedBy": managementInterface.userId}
|
||||
elif ownerNorm == "shared":
|
||||
ownerExcludeOwnFiles = True
|
||||
|
||||
def _applyOwnerFilter(items):
|
||||
"""Post-filter for owner=shared: exclude files created by current user."""
|
||||
if not ownerExcludeOwnFiles:
|
||||
return items
|
||||
uid = managementInterface.userId
|
||||
return [f for f in items if (f.get("sysCreatedBy") if isinstance(f, dict) else getattr(f, "sysCreatedBy", None)) != uid]
|
||||
|
||||
if mode == "groupSummary":
|
||||
if not pagination:
|
||||
raise HTTPException(status_code=400, detail="pagination required for groupSummary")
|
||||
|
|
@ -794,10 +810,12 @@ def get_files(
|
|||
return handleIdsMode(managementInterface.db, FileItem, pagination, recordFilter)
|
||||
|
||||
if not groupByLevels:
|
||||
# No grouping: let DB handle pagination directly (fastest path)
|
||||
result = managementInterface.getAllFiles(pagination=paginationParams)
|
||||
result = managementInterface.getAllFiles(
|
||||
pagination=paginationParams,
|
||||
recordFilter=ownerRecordFilter,
|
||||
)
|
||||
if paginationParams and hasattr(result, 'items'):
|
||||
enriched = enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem, db=appInterface.db)
|
||||
enriched = _applyOwnerFilter(enrichRowsWithFkLabels(_filesToDicts(result.items), FileItem, db=appInterface.db))
|
||||
resp: dict = {
|
||||
"items": enriched,
|
||||
"pagination": PaginationMetadata(
|
||||
|
|
@ -811,7 +829,8 @@ def get_files(
|
|||
}
|
||||
else:
|
||||
items = result if isinstance(result, list) else (result.items if hasattr(result, "items") else [result])
|
||||
resp = {"items": enrichRowsWithFkLabels(_filesToDicts(items), FileItem, db=appInterface.db), "pagination": None}
|
||||
enriched = _applyOwnerFilter(enrichRowsWithFkLabels(_filesToDicts(items), FileItem, db=appInterface.db))
|
||||
resp = {"items": enriched, "pagination": None}
|
||||
if viewMeta:
|
||||
resp["appliedView"] = viewMeta.model_dump()
|
||||
return resp
|
||||
|
|
|
|||
Loading…
Reference in a new issue