unified failsafe neutralization architecture

This commit is contained in:
ValueOn AG 2026-03-29 21:55:09 +02:00
parent 3ac25a269a
commit 7e88005721
28 changed files with 1064 additions and 423 deletions

View file

@ -7,9 +7,9 @@ Connects to the private-llm service running on-premise with Ollama backend.
Provides OCR and Vision capabilities via local AI models. Provides OCR and Vision capabilities via local AI models.
Models: Models:
- poweron-ocr-general: Text extraction and OCR (deepseek backend) - poweron-text-general: Text (qwen2.5); NEUTRALIZATION_TEXT + data/plan ops
- poweron-vision-general: General vision tasks (qwen2.5vl backend) - poweron-vision-general: Vision (qwen2.5vl); IMAGE_ANALYSE + NEUTRALIZATION_IMAGE
- poweron-vision-deep: Deep vision analysis (granite3.2 backend) - poweron-vision-deep: Vision (granite3.2); IMAGE_ANALYSE + NEUTRALIZATION_IMAGE
Pricing (CHF per call): Pricing (CHF per call):
- Text models: CHF 0.010 - Text models: CHF 0.010
@ -245,6 +245,7 @@ class AiPrivateLlm(BaseConnectorAi):
(OperationTypeEnum.DATA_ANALYSE, 8), (OperationTypeEnum.DATA_ANALYSE, 8),
(OperationTypeEnum.DATA_GENERATE, 8), (OperationTypeEnum.DATA_GENERATE, 8),
(OperationTypeEnum.DATA_EXTRACT, 8), (OperationTypeEnum.DATA_EXTRACT, 8),
(OperationTypeEnum.NEUTRALIZATION_TEXT, 9),
), ),
version="qwen2.5:7b", version="qwen2.5:7b",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_TEXT_PER_CALL calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_TEXT_PER_CALL
@ -270,6 +271,7 @@ class AiPrivateLlm(BaseConnectorAi):
processingMode=ProcessingModeEnum.ADVANCED, processingMode=ProcessingModeEnum.ADVANCED,
operationTypes=createOperationTypeRatings( operationTypes=createOperationTypeRatings(
(OperationTypeEnum.IMAGE_ANALYSE, 9), (OperationTypeEnum.IMAGE_ANALYSE, 9),
(OperationTypeEnum.NEUTRALIZATION_IMAGE, 9),
), ),
version="qwen2.5vl:7b", version="qwen2.5vl:7b",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_VISION_PER_CALL calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_VISION_PER_CALL
@ -295,6 +297,7 @@ class AiPrivateLlm(BaseConnectorAi):
processingMode=ProcessingModeEnum.DETAILED, processingMode=ProcessingModeEnum.DETAILED,
operationTypes=createOperationTypeRatings( operationTypes=createOperationTypeRatings(
(OperationTypeEnum.IMAGE_ANALYSE, 9), (OperationTypeEnum.IMAGE_ANALYSE, 9),
(OperationTypeEnum.NEUTRALIZATION_IMAGE, 9),
), ),
version="granite3.2-vision", version="granite3.2-vision",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_VISION_PER_CALL calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_VISION_PER_CALL

View file

@ -22,6 +22,10 @@ class OperationTypeEnum(str, Enum):
IMAGE_ANALYSE = "imageAnalyse" IMAGE_ANALYSE = "imageAnalyse"
IMAGE_GENERATE = "imageGenerate" IMAGE_GENERATE = "imageGenerate"
# Neutralization (dedicated model selection; text vs vision backends)
NEUTRALIZATION_TEXT = "neutralizationText"
NEUTRALIZATION_IMAGE = "neutralizationImage"
# Web Operations # Web Operations
WEB_SEARCH_DATA = "webSearch" # Returns list of URLs only WEB_SEARCH_DATA = "webSearch" # Returns list of URLs only
WEB_CRAWL = "webCrawl" # Web crawl for a given URL WEB_CRAWL = "webCrawl" # Web crawl for a given URL

View file

@ -28,6 +28,7 @@ class ReferenceTypeEnum(str, Enum):
ADMIN = "ADMIN" # Admin adjustment ADMIN = "ADMIN" # Admin adjustment
SYSTEM = "SYSTEM" # System credit (e.g., initial credit) SYSTEM = "SYSTEM" # System credit (e.g., initial credit)
STORAGE = "STORAGE" # Metered storage overage (prepay pool) STORAGE = "STORAGE" # Metered storage overage (prepay pool)
SUBSCRIPTION = "SUBSCRIPTION" # AI budget credit from subscription plan
class PeriodTypeEnum(str, Enum): class PeriodTypeEnum(str, Enum):

View file

@ -3,8 +3,10 @@
"""Knowledge Store data models: FileContentIndex, ContentChunk, WorkflowMemory. """Knowledge Store data models: FileContentIndex, ContentChunk, WorkflowMemory.
These models support the 3-tier RAG architecture: These models support the 3-tier RAG architecture:
- Shared Layer: mandateId-scoped, isShared=True - Personal Layer: scope=personal, userId-scoped
- Instance Layer: userId + featureInstanceId-scoped - Instance Layer: scope=featureInstance, featureInstanceId-scoped
- Mandate Layer: scope=mandate, mandateId-scoped (visible to all mandate users)
- Global Layer: scope=global (sysAdmin only)
- Workflow Layer: workflowId-scoped (WorkflowMemory) - Workflow Layer: workflowId-scoped (WorkflowMemory)
Vector fields use json_schema_extra={"db_type": "vector(1536)"} for pgvector. Vector fields use json_schema_extra={"db_type": "vector(1536)"} for pgvector.
@ -20,12 +22,11 @@ import uuid
class FileContentIndex(PowerOnModel): class FileContentIndex(PowerOnModel):
"""Structural index of a file's content objects. Created without AI. """Structural index of a file's content objects. Created without AI.
Lives in the Instance Layer; optionally promoted to Shared Layer via isShared.""" Scope is mirrored from FileItem (poweron_management) at indexing time."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key (typically = fileId)") id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key (typically = fileId)")
userId: str = Field(description="Owner user ID") userId: str = Field(description="Owner user ID")
featureInstanceId: str = Field(default="", description="Feature instance scope") featureInstanceId: str = Field(default="", description="Feature instance scope")
mandateId: str = Field(default="", description="Mandate scope") mandateId: str = Field(default="", description="Mandate scope")
isShared: bool = Field(default=False, description="Visible in Shared Layer for all mandate users")
fileName: str = Field(description="Original file name") fileName: str = Field(description="Original file name")
mimeType: str = Field(description="MIME type of the file") mimeType: str = Field(description="MIME type of the file")
containerPath: Optional[str] = Field(default=None, description="Path within a container (e.g. 'archive.zip/folder/report.pdf')") containerPath: Optional[str] = Field(default=None, description="Path within a container (e.g. 'archive.zip/folder/report.pdf')")
@ -57,7 +58,6 @@ registerModelLabels(
"userId": {"en": "User ID", "fr": "ID utilisateur"}, "userId": {"en": "User ID", "fr": "ID utilisateur"},
"featureInstanceId": {"en": "Feature Instance ID", "fr": "ID de l'instance"}, "featureInstanceId": {"en": "Feature Instance ID", "fr": "ID de l'instance"},
"mandateId": {"en": "Mandate ID", "fr": "ID du mandat"}, "mandateId": {"en": "Mandate ID", "fr": "ID du mandat"},
"isShared": {"en": "Shared", "fr": "Partagé"},
"fileName": {"en": "File Name", "fr": "Nom de fichier"}, "fileName": {"en": "File Name", "fr": "Nom de fichier"},
"mimeType": {"en": "MIME Type", "fr": "Type MIME"}, "mimeType": {"en": "MIME Type", "fr": "Type MIME"},
"containerPath": {"en": "Container Path", "fr": "Chemin du conteneur"}, "containerPath": {"en": "Container Path", "fr": "Chemin du conteneur"},

View file

@ -10,7 +10,7 @@ Multi-Tenant Design:
""" """
import uuid import uuid
from typing import Optional, List, Dict from typing import Optional, List, Dict, Any
from enum import Enum from enum import Enum
from pydantic import BaseModel, Field, EmailStr, field_validator, computed_field from pydantic import BaseModel, Field, EmailStr, field_validator, computed_field
from modules.datamodels.datamodelBase import PowerOnModel from modules.datamodels.datamodelBase import PowerOnModel
@ -303,6 +303,33 @@ registerModelLabels(
) )
def _normalizeTtsVoiceMap(value: Any) -> Optional[Dict[str, str]]:
"""
Coerce ttsVoiceMap payloads to Dict[str, str].
UI/clients may send per-locale objects like {"voiceName": "de-DE-Chirp3-HD-Achird"};
storage and model field type are locale -> voice id string.
"""
if value is None:
return None
if not isinstance(value, dict):
return None
out: Dict[str, str] = {}
for rawKey, rawVal in value.items():
key = str(rawKey)
if rawVal is None:
continue
if isinstance(rawVal, str):
out[key] = rawVal
elif isinstance(rawVal, dict):
vn = rawVal.get("voiceName")
if vn is not None and str(vn).strip() != "":
out[key] = str(vn).strip()
else:
out[key] = str(rawVal)
return out if out else None
class UserVoicePreferences(PowerOnModel): class UserVoicePreferences(PowerOnModel):
"""User-level voice/language preferences, shared across all features.""" """User-level voice/language preferences, shared across all features."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key") id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
@ -315,6 +342,11 @@ class UserVoicePreferences(PowerOnModel):
translationSourceLanguage: Optional[str] = Field(default=None, description="Source language for translations") translationSourceLanguage: Optional[str] = Field(default=None, description="Source language for translations")
translationTargetLanguage: Optional[str] = Field(default=None, description="Target language for translations") translationTargetLanguage: Optional[str] = Field(default=None, description="Target language for translations")
@field_validator("ttsVoiceMap", mode="before")
@classmethod
def _validateTtsVoiceMap(cls, value: Any) -> Optional[Dict[str, str]]:
return _normalizeTtsVoiceMap(value)
registerModelLabels( registerModelLabels(
"UserVoicePreferences", "UserVoicePreferences",

View file

@ -203,6 +203,89 @@ class NeutralizationService:
'processed_info': {'type': 'binary', 'status': 'error', 'error': str(e)} 'processed_info': {'type': 'binary', 'status': 'error', 'error': str(e)}
} }
async def processImageAsync(self, imageBytes: bytes, fileName: str, mimeType: str = "image/png") -> Dict[str, Any]:
"""Analyze image via internal vision model to check for sensitive content.
Returns dict with:
- 'status': 'ok' | 'blocked' | 'error'
- 'hasSensitiveContent': bool
- 'analysis': str (model's analysis text, if available)
- 'processed_info': dict with details
Uses NEUTRALIZATION_IMAGE operation type only internal Private-LLM models.
If no internal model available returns 'blocked'.
"""
import base64
try:
aiService = None
if self._getService:
try:
aiService = self._getService("ai")
except Exception:
pass
if not aiService or not hasattr(aiService, 'callAi'):
logger.warning(f"processImage: AI service not available — blocking image '{fileName}'")
return {
'status': 'blocked',
'hasSensitiveContent': True,
'analysis': '',
'processed_info': {'type': 'image', 'status': 'blocked', 'reason': 'AI service unavailable'}
}
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
_b64Data = base64.b64encode(imageBytes).decode('utf-8')
_dataUrl = f"data:{mimeType};base64,{_b64Data}"
_prompt = (
"Analyze this image for personally identifiable information (PII). "
"Check for: names, addresses, phone numbers, email addresses, ID numbers, "
"faces, signatures, handwritten text, license plates, financial data. "
"Respond with JSON: {\"hasPII\": true/false, \"findings\": [\"...\"]}"
)
_request = AiCallRequest(
prompt=_prompt,
options=AiCallOptions(operationType=OperationTypeEnum.NEUTRALIZATION_IMAGE),
messages=[{"role": "user", "content": [
{"type": "text", "text": _prompt},
{"type": "image_url", "image_url": {"url": _dataUrl}},
]}],
)
_response = await aiService.callAi(_request)
_hasPII = False
_analysis = _response.content if _response and hasattr(_response, 'content') else ''
if _analysis:
_lowerAnalysis = _analysis.lower()
if '"haspii": true' in _lowerAnalysis or '"haspii":true' in _lowerAnalysis:
_hasPII = True
return {
'status': 'blocked' if _hasPII else 'ok',
'hasSensitiveContent': _hasPII,
'analysis': _analysis,
'processed_info': {'type': 'image', 'status': 'blocked' if _hasPII else 'ok', 'fileName': fileName}
}
except Exception as e:
logger.error(f"processImage failed for '{fileName}': {e}")
return {
'status': 'blocked',
'hasSensitiveContent': True,
'analysis': '',
'processed_info': {'type': 'image', 'status': 'error', 'error': str(e)}
}
def processImage(self, imageBytes: bytes, fileName: str, mimeType: str = "image/png") -> Dict[str, Any]:
"""Sync wrapper for processImageAsync. Uses asyncio.run when no event loop is running."""
import asyncio
try:
return asyncio.run(self.processImageAsync(imageBytes, fileName, mimeType))
except RuntimeError:
loop = asyncio.get_event_loop()
return loop.run_until_complete(self.processImageAsync(imageBytes, fileName, mimeType))
def resolveText(self, text: str) -> str: def resolveText(self, text: str) -> str:
if not self.interfaceNeutralizer: if not self.interfaceNeutralizer:
return text return text
@ -295,9 +378,21 @@ class NeutralizationService:
p = part if isinstance(part, dict) else part.model_dump() if hasattr(part, 'model_dump') else part p = part if isinstance(part, dict) else part.model_dump() if hasattr(part, 'model_dump') else part
type_group = p.get('typeGroup', '') type_group = p.get('typeGroup', '')
data = p.get('data', '') data = p.get('data', '')
if type_group in ('binary', 'image') or not (data and str(data).strip()): if type_group == 'binary' or not (data and str(data).strip()):
neutralized_parts.append(part) neutralized_parts.append(part)
continue continue
if type_group == 'image':
import base64 as _b64img
try:
_imgBytes = _b64img.b64decode(str(data))
_imgResult = await self.processImageAsync(_imgBytes, fileName)
if _imgResult.get("status") == "ok":
neutralized_parts.append(part)
else:
logger.warning(f"Image part blocked in binary file '{fileName}' (PII detected), removing")
except Exception as _imgErr:
logger.warning(f"Image check failed in binary file '{fileName}': {_imgErr}, removing (fail-safe)")
continue
nr = self._neutralizeText(str(data), 'text' if type_group != 'table' else 'csv') nr = self._neutralizeText(str(data), 'text' if type_group != 'table' else 'csv')
proc = nr.get('processed_info', {}) or {} proc = nr.get('processed_info', {}) or {}
if isinstance(proc, dict) and proc.get('type') == 'error': if isinstance(proc, dict) and proc.get('type') == 'error':

View file

@ -201,6 +201,13 @@ def initBootstrap(db: DatabaseConnector) -> None:
except Exception as e: except Exception as e:
logger.error(f"Voice & documents migration failed: {e}") logger.error(f"Voice & documents migration failed: {e}")
# Backfill FileContentIndex scope fields from FileItem (one-time)
try:
from modules.migration.migrateRagScopeFields import runMigration as migrateRagScope
migrateRagScope(appDb=db)
except Exception as e:
logger.error(f"RAG scope fields migration failed: {e}")
# After migration: root mandate is purely technical — no feature instances # After migration: root mandate is purely technical — no feature instances
if not migrationDone and mandateId: if not migrationDone and mandateId:
initRootMandateFeatures(db, mandateId) initRootMandateFeatures(db, mandateId)

View file

@ -1931,14 +1931,26 @@ class AppObjects:
raise raise
logger.debug(f"Subscription capacity check skipped: {e}") logger.debug(f"Subscription capacity check skipped: {e}")
def _syncSubscriptionQuantity(self, mandateId: str) -> None: def _syncSubscriptionQuantity(self, mandateId: str, *, raiseOnError: bool = False) -> None:
"""Sync Stripe subscription quantities after a resource mutation.""" """Sync Stripe subscription quantities after a resource mutation.
Args:
raiseOnError: If True, propagate errors (billing-critical paths).
"""
try: try:
from modules.interfaces.interfaceDbSubscription import getInterface as getSubInterface from modules.interfaces.interfaceDbSubscription import getInterface as getSubInterface
from modules.security.rootAccess import getRootUser from modules.security.rootAccess import getRootUser
subIf = getSubInterface(getRootUser(), mandateId) subIf = getSubInterface(getRootUser(), mandateId)
subIf.syncQuantityToStripe(mandateId) operative = subIf.getOperativeForMandate(mandateId)
if not operative:
if raiseOnError:
raise ValueError(f"Kein operatives Abonnement für Mandant {mandateId}")
logger.debug("No operative subscription for mandate %s — quantity sync skipped", mandateId)
return
subIf.syncQuantityToStripe(operative["id"], raiseOnError=raiseOnError)
except Exception as e: except Exception as e:
if raiseOnError:
raise
logger.debug(f"Subscription quantity sync skipped: {e}") logger.debug(f"Subscription quantity sync skipped: {e}")
def deleteUserMandate(self, userId: str, mandateId: str) -> bool: def deleteUserMandate(self, userId: str, mandateId: str) -> bool:

View file

@ -970,6 +970,41 @@ class BillingObjects:
) )
return created return created
# =========================================================================
# Subscription AI-Budget Credit
# =========================================================================
def creditSubscriptionBudget(self, mandateId: str, planKey: str, periodLabel: str = "") -> Optional[Dict[str, Any]]:
"""Credit the plan's budgetAiCHF to the mandate pool account.
Should be called once per billing period (initial activation + each invoice.paid).
Returns the created CREDIT transaction or None if budget is 0."""
from modules.datamodels.datamodelSubscription import _getPlan
plan = _getPlan(planKey)
if not plan or not plan.budgetAiCHF or plan.budgetAiCHF <= 0:
return None
poolAccount = self.getOrCreateMandateAccount(mandateId)
description = f"AI-Budget ({planKey})"
if periodLabel:
description += f" {periodLabel}"
transaction = BillingTransaction(
accountId=poolAccount["id"],
transactionType=TransactionTypeEnum.CREDIT,
amount=plan.budgetAiCHF,
description=description,
referenceType=ReferenceTypeEnum.SUBSCRIPTION,
referenceId=mandateId,
)
created = self.createTransaction(transaction)
logger.info(
"AI-Budget credited mandate=%s plan=%s amount=%.2f CHF",
mandateId, planKey, plan.budgetAiCHF,
)
return created
# ========================================================================= # =========================================================================
# Workflow Cost Query # Workflow Cost Query
# ========================================================================= # =========================================================================

View file

@ -294,7 +294,6 @@ class KnowledgeObjects:
userId: str = None, userId: str = None,
featureInstanceId: str = None, featureInstanceId: str = None,
mandateId: str = None, mandateId: str = None,
isShared: bool = None,
scope: str = None, scope: str = None,
limit: int = 10, limit: int = 10,
minScore: float = None, minScore: float = None,
@ -305,10 +304,9 @@ class KnowledgeObjects:
Args: Args:
queryVector: Query embedding vector. queryVector: Query embedding vector.
userId: Filter by user (Instance Layer). userId: Filter by user (personal scope).
featureInstanceId: Filter by feature instance. featureInstanceId: Filter by feature instance.
mandateId: Filter by mandate (for Shared Layer lookups). mandateId: Filter by mandate (scope=mandate means visible to all mandate users).
isShared: If True, search Shared Layer via FileContentIndex join.
scope: If provided, filter by this specific scope value. scope: If provided, filter by this specific scope value.
If not provided, use scope-union approach (personal + featureInstance + mandate + global). If not provided, use scope-union approach (personal + featureInstance + mandate + global).
limit: Max results. limit: Max results.
@ -323,8 +321,13 @@ class KnowledgeObjects:
recordFilter["contentType"] = contentType recordFilter["contentType"] = contentType
if scope: if scope:
scopeFilter: Dict[str, Any] = {"scope": scope}
if mandateId:
scopeFilter["mandateId"] = mandateId
if featureInstanceId:
scopeFilter["featureInstanceId"] = featureInstanceId
scopedFileIds = self.db.getRecordset( scopedFileIds = self.db.getRecordset(
FileContentIndex, recordFilter={"scope": scope} FileContentIndex, recordFilter=scopeFilter
) )
fileIds = [ fileIds = [
idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None) idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
@ -334,16 +337,6 @@ class KnowledgeObjects:
if not fileIds: if not fileIds:
return [] return []
recordFilter["fileId"] = fileIds recordFilter["fileId"] = fileIds
elif isShared and mandateId:
sharedIndexes = self.db.getRecordset(
FileContentIndex,
recordFilter={"mandateId": mandateId, "isShared": True},
)
sharedFileIds = [idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None) for idx in sharedIndexes]
sharedFileIds = [fid for fid in sharedFileIds if fid]
if not sharedFileIds:
return []
recordFilter["fileId"] = sharedFileIds
elif userId or featureInstanceId or mandateId: elif userId or featureInstanceId or mandateId:
scopedFileIds = self._getScopedFileIds( scopedFileIds = self._getScopedFileIds(
userId=userId, userId=userId,
@ -410,7 +403,7 @@ class KnowledgeObjects:
if mandateId: if mandateId:
files_shared = self.db.getRecordset( files_shared = self.db.getRecordset(
FileContentIndex, FileContentIndex,
recordFilter={"mandateId": mandateId, "isShared": True}, recordFilter={"mandateId": mandateId, "scope": "mandate"},
) )
by_id: Dict[str, Dict[str, Any]] = {} by_id: Dict[str, Dict[str, Any]] = {}
@ -559,6 +552,76 @@ class KnowledgeObjects:
} }
def aggregateMandateRagTotalBytes(mandateId: str) -> int:
"""Sum FileContentIndex.totalSize for a mandate.
Primary strategy (relies on correct scope fields on FileContentIndex):
1. FileContentIndex rows with mandateId on the index
2. FileContentIndex rows with featureInstanceId of any mandate FeatureInstance
Deduplicates by id.
"""
if not mandateId:
return 0
from modules.datamodels.datamodelFeatures import FeatureInstance
from modules.interfaces.interfaceDbApp import getRootInterface
knowDb = getInterface(None).db
appDb = getRootInterface().db
byId: Dict[str, Dict[str, Any]] = {}
for row in knowDb.getRecordset(FileContentIndex, recordFilter={"mandateId": mandateId}):
rid = row.get("id")
if rid:
byId[str(rid)] = row
instances = appDb.getRecordset(FeatureInstance, recordFilter={"mandateId": mandateId})
instIds = [str(inst.get("id", "")) for inst in instances if inst.get("id")]
for instId in instIds:
for row in knowDb.getRecordset(FileContentIndex, recordFilter={"featureInstanceId": instId}):
rid = row.get("id")
if rid and str(rid) not in byId:
byId[str(rid)] = row
# DEPRECATED: file-ID-correlation fallback from poweron_management.
# Only needed for pre-migration data where mandateId/featureInstanceId on the
# FileContentIndex are empty. Remove once migrateRagScopeFields has been run.
_fallbackCount = 0
try:
from modules.datamodels.datamodelFiles import FileItem
from modules.interfaces.interfaceDbManagement import ComponentObjects
mgmtDb = ComponentObjects().db
knowledgeIf = getInterface(None)
fileIds: set = set()
for f in mgmtDb.getRecordset(FileItem, recordFilter={"mandateId": mandateId}):
fid = f.get("id") if isinstance(f, dict) else getattr(f, "id", None)
if fid:
fileIds.add(str(fid))
for instId in instIds:
for f in mgmtDb.getRecordset(FileItem, recordFilter={"featureInstanceId": instId}):
fid = f.get("id") if isinstance(f, dict) else getattr(f, "id", None)
if fid:
fileIds.add(str(fid))
for fid in fileIds:
if fid in byId:
continue
row = knowledgeIf.getFileContentIndex(fid)
if row:
byId[fid] = row
_fallbackCount += 1
except Exception as e:
logger.warning("aggregateMandateRagTotalBytes fallback failed: %s", e)
total = sum(int(r.get("totalSize") or 0) for r in byId.values())
logger.info(
"aggregateMandateRagTotalBytes(%s): %d indexes, %d bytes (fallback: %d)",
mandateId, len(byId), total, _fallbackCount,
)
return total
def getInterface(currentUser: Optional[User] = None) -> KnowledgeObjects: def getInterface(currentUser: Optional[User] = None) -> KnowledgeObjects:
"""Get or create a KnowledgeObjects singleton.""" """Get or create a KnowledgeObjects singleton."""
if "default" not in _instances: if "default" not in _instances:

View file

@ -1053,15 +1053,20 @@ class ComponentObjects:
# Ensure fileName is unique # Ensure fileName is unique
uniqueName = self._generateUniquefileName(name) uniqueName = self._generateUniquefileName(name)
# Use mandateId and featureInstanceId from context for proper data isolation
# Convert None to empty string to satisfy Pydantic validation
mandateId = self.mandateId or "" mandateId = self.mandateId or ""
featureInstanceId = self.featureInstanceId or "" featureInstanceId = self.featureInstanceId or ""
# Create FileItem instance if featureInstanceId:
scope = "featureInstance"
elif mandateId:
scope = "mandate"
else:
scope = "personal"
fileItem = FileItem( fileItem = FileItem(
mandateId=mandateId, mandateId=mandateId,
featureInstanceId=featureInstanceId, featureInstanceId=featureInstanceId,
scope=scope,
fileName=uniqueName, fileName=uniqueName,
mimeType=mimeType, mimeType=mimeType,
fileSize=fileSize, fileSize=fileSize,

View file

@ -309,13 +309,11 @@ class SubscriptionObjects:
return self._getMandateDataVolumeMB(mandateId) return self._getMandateDataVolumeMB(mandateId)
def _getMandateDataVolumeMB(self, mandateId: str) -> float: def _getMandateDataVolumeMB(self, mandateId: str) -> float:
"""Sum RAG index size (FileContentIndex.totalSize) across all feature instances of the mandate.""" """Sum RAG index size (FileContentIndex.totalSize) for the mandate; reads poweron_knowledge."""
try: try:
from modules.datamodels.datamodelKnowledge import FileContentIndex from modules.interfaces.interfaceDbKnowledge import aggregateMandateRagTotalBytes
knowledgeDb = _getAppDatabaseConnector()
indexes = knowledgeDb.getRecordset(FileContentIndex, recordFilter={"mandateId": mandateId}) return aggregateMandateRagTotalBytes(mandateId) / (1024 * 1024)
totalBytes = sum(int(idx.get("totalSize") or 0) for idx in indexes)
return totalBytes / (1024 * 1024)
except Exception: except Exception:
return 0.0 return 0.0
@ -359,11 +357,18 @@ class SubscriptionObjects:
# Stripe quantity sync # Stripe quantity sync
# ========================================================================= # =========================================================================
def syncQuantityToStripe(self, subscriptionId: str) -> None: def syncQuantityToStripe(self, subscriptionId: str, *, raiseOnError: bool = False) -> None:
"""Update Stripe subscription item quantities to match actual active counts. """Update Stripe subscription item quantities to match actual active counts.
Takes subscriptionId, not mandateId.""" Takes subscriptionId, not mandateId.
Args:
raiseOnError: If True, propagate Stripe API errors instead of logging them.
Use True for billing-critical paths (store activation).
"""
sub = self.getById(subscriptionId) sub = self.getById(subscriptionId)
if not sub or not sub.get("stripeSubscriptionId"): if not sub or not sub.get("stripeSubscriptionId"):
if raiseOnError:
raise ValueError(f"Subscription {subscriptionId} hat keine Stripe-Anbindung — Abrechnung nicht möglich.")
return return
mandateId = sub["mandateId"] mandateId = sub["mandateId"]
@ -389,3 +394,5 @@ class SubscriptionObjects:
logger.info("Stripe quantity synced for sub %s: users=%d, instances=%d", subscriptionId, activeUsers, activeInstances) logger.info("Stripe quantity synced for sub %s: users=%d, instances=%d", subscriptionId, activeUsers, activeInstances)
except Exception as e: except Exception as e:
logger.error("syncQuantityToStripe(%s) failed: %s", subscriptionId, e) logger.error("syncQuantityToStripe(%s) failed: %s", subscriptionId, e)
if raiseOnError:
raise

View file

@ -0,0 +1,114 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Migration: Backfill FileContentIndex scope fields from FileItem (Single Source of Truth).
Fixes legacy rows in poweron_knowledge where scope/mandateId/featureInstanceId
are empty or default ("personal") despite the corresponding FileItem having correct values.
Idempotent safe to run multiple times. Uses a DB flag to skip if already completed.
"""
import logging
from modules.shared.configuration import APP_CONFIG
from modules.connectors.connectorDbPostgre import _get_cached_connector
logger = logging.getLogger(__name__)
_MIGRATION_FLAG_KEY = "migration_rag_scope_fields_completed"
def _isMigrationCompleted(appDb) -> bool:
try:
from modules.datamodels.datamodelUam import Mandate
records = appDb.getRecordset(Mandate, recordFilter={"name": _MIGRATION_FLAG_KEY})
return len(records) > 0
except Exception:
return False
def _setMigrationCompleted(appDb) -> None:
try:
from modules.datamodels.datamodelUam import Mandate
flag = Mandate(name=_MIGRATION_FLAG_KEY, description="RAG scope fields migration completed")
appDb.recordCreate(Mandate, flag)
except Exception as e:
logger.error("Could not set migration flag: %s", e)
def runMigration(appDb=None) -> dict:
"""Backfill FileContentIndex rows from FileItem metadata.
Returns dict with counts: {total, updated, skipped, orphaned}.
"""
from modules.datamodels.datamodelKnowledge import FileContentIndex
from modules.datamodels.datamodelFiles import FileItem
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
from modules.interfaces.interfaceDbManagement import ComponentObjects
if appDb is None:
from modules.interfaces.interfaceDbApp import getRootInterface
appDb = getRootInterface().db
if _isMigrationCompleted(appDb):
logger.info("migrateRagScopeFields: already completed, skipping")
return {"total": 0, "updated": 0, "skipped": 0, "orphaned": 0}
knowDb = getKnowledgeInterface(None).db
mgmtDb = ComponentObjects().db
allIndexes = knowDb.getRecordset(FileContentIndex, recordFilter={})
total = len(allIndexes)
updated = 0
skipped = 0
orphaned = 0
logger.info("migrateRagScopeFields: processing %d FileContentIndex rows", total)
for idx in allIndexes:
idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
if not idxId:
skipped += 1
continue
fileItem = mgmtDb._loadRecord(FileItem, str(idxId))
if not fileItem:
orphaned += 1
continue
_get = (lambda k, d="": fileItem.get(k, d)) if isinstance(fileItem, dict) else (lambda k, d="": getattr(fileItem, k, d))
fiScope = _get("scope") or "personal"
fiMandateId = str(_get("mandateId") or "")
fiFeatureInstanceId = str(_get("featureInstanceId") or "")
idxGet = (lambda k, d="": idx.get(k, d)) if isinstance(idx, dict) else (lambda k, d="": getattr(idx, k, d))
currentScope = idxGet("scope") or "personal"
currentMandateId = str(idxGet("mandateId") or "")
currentFeatureInstanceId = str(idxGet("featureInstanceId") or "")
updates = {}
if fiScope != currentScope:
updates["scope"] = fiScope
if fiMandateId and fiMandateId != currentMandateId:
updates["mandateId"] = fiMandateId
if fiFeatureInstanceId and fiFeatureInstanceId != currentFeatureInstanceId:
updates["featureInstanceId"] = fiFeatureInstanceId
if updates:
try:
knowDb.recordModify(FileContentIndex, str(idxId), updates)
updated += 1
logger.debug("migrateRagScopeFields: updated %s -> %s", idxId, updates)
except Exception as e:
logger.error("migrateRagScopeFields: failed to update %s: %s", idxId, e)
skipped += 1
else:
skipped += 1
_setMigrationCompleted(appDb)
logger.info(
"migrateRagScopeFields complete: total=%d, updated=%d, skipped=%d, orphaned=%d",
total, updated, skipped, orphaned,
)
return {"total": total, "updated": updated, "skipped": skipped, "orphaned": orphaned}

View file

@ -576,14 +576,15 @@ def create_feature_instance(
config=data.config config=data.config
) )
# Sync Stripe quantity after successful creation
try: try:
from modules.interfaces.interfaceDbSubscription import getInterface as _getSubIf2 from modules.interfaces.interfaceDbSubscription import getInterface as _getSubIf2
from modules.security.rootAccess import getRootUser as _getRU from modules.security.rootAccess import getRootUser as _getRU
_subIf2 = _getSubIf2(_getRU(), mandateIdStr) _subIf2 = _getSubIf2(_getRU(), mandateIdStr)
_subIf2.syncQuantityToStripe(mandateIdStr) _operative = _subIf2.getOperativeForMandate(mandateIdStr)
except Exception: if _operative:
pass _subIf2.syncQuantityToStripe(_operative["id"], raiseOnError=True)
except Exception as e:
logger.error("Stripe quantity sync failed for admin feature creation in mandate %s: %s", mandateIdStr, e)
logger.info( logger.info(
f"User {context.user.id} created feature instance '{data.label}' " f"User {context.user.id} created feature instance '{data.label}' "

View file

@ -1104,6 +1104,12 @@ def _handleSubscriptionCheckoutCompleted(session, eventId: str) -> None:
updatedSub = subInterface.getById(subscriptionRecordId) updatedSub = subInterface.getById(subscriptionRecordId)
_notifySubscriptionChange(mandateId, "activated", plan, subscriptionRecord=updatedSub, platformUrl=platformUrl) _notifySubscriptionChange(mandateId, "activated", plan, subscriptionRecord=updatedSub, platformUrl=platformUrl)
try:
billingIf = _getRootInterface()
billingIf.creditSubscriptionBudget(mandateId, planKey, periodLabel="Erstaktivierung")
except Exception as ex:
logger.error("creditSubscriptionBudget on activation failed: %s", ex)
logger.info( logger.info(
"Checkout completed: sub=%s -> %s, mandate=%s, plan=%s", "Checkout completed: sub=%s -> %s, mandate=%s, plan=%s",
subscriptionRecordId, toStatus.value, mandateId, planKey, subscriptionRecordId, toStatus.value, mandateId, planKey,
@ -1162,9 +1168,14 @@ def _handleSubscriptionWebhook(event) -> None:
if stripeStatus == "active" and currentStatus == SubscriptionStatusEnum.SCHEDULED: if stripeStatus == "active" and currentStatus == SubscriptionStatusEnum.SCHEDULED:
subInterface.transitionStatus(subId, SubscriptionStatusEnum.SCHEDULED, SubscriptionStatusEnum.ACTIVE) subInterface.transitionStatus(subId, SubscriptionStatusEnum.SCHEDULED, SubscriptionStatusEnum.ACTIVE)
subService.invalidateCache(mandateId) subService.invalidateCache(mandateId)
plan = _getPlan(sub.get("planKey", "")) planKey = sub.get("planKey", "")
plan = _getPlan(planKey)
refreshedSub = subInterface.getById(subId) refreshedSub = subInterface.getById(subId)
_notifySubscriptionChange(mandateId, "activated", plan, subscriptionRecord=refreshedSub, platformUrl=webhookPlatformUrl) _notifySubscriptionChange(mandateId, "activated", plan, subscriptionRecord=refreshedSub, platformUrl=webhookPlatformUrl)
try:
_getRootInterface().creditSubscriptionBudget(mandateId, planKey, periodLabel="Erstaktivierung")
except Exception as ex:
logger.error("creditSubscriptionBudget SCHEDULED->ACTIVE failed: %s", ex)
logger.info("SCHEDULED -> ACTIVE for sub %s (mandate %s)", subId, mandateId) logger.info("SCHEDULED -> ACTIVE for sub %s (mandate %s)", subId, mandateId)
elif stripeStatus == "active" and currentStatus == SubscriptionStatusEnum.PAST_DUE: elif stripeStatus == "active" and currentStatus == SubscriptionStatusEnum.PAST_DUE:
@ -1231,14 +1242,24 @@ def _handleSubscriptionWebhook(event) -> None:
elif event.type == "invoice.paid": elif event.type == "invoice.paid":
period_ts = obj.get("period_start") period_ts = obj.get("period_start")
periodLabel = ""
if period_ts: if period_ts:
period_start_at = datetime.fromtimestamp(int(period_ts), tz=timezone.utc) period_start_at = datetime.fromtimestamp(int(period_ts), tz=timezone.utc)
periodLabel = period_start_at.strftime("%Y-%m-%d")
try: try:
billing_if = _getRootInterface() billing_if = _getRootInterface()
billing_if.resetStorageBillingPeriod(mandateId, period_start_at) billing_if.resetStorageBillingPeriod(mandateId, period_start_at)
billing_if.reconcileMandateStorageBilling(mandateId) billing_if.reconcileMandateStorageBilling(mandateId)
except Exception as ex: except Exception as ex:
logger.error("Storage billing on invoice.paid failed: %s", ex) logger.error("Storage billing on invoice.paid failed: %s", ex)
planKey = sub.get("planKey", "")
try:
billing_if = _getRootInterface()
billing_if.creditSubscriptionBudget(mandateId, planKey, periodLabel=periodLabel or "Periodenverlängerung")
except Exception as ex:
logger.error("creditSubscriptionBudget on invoice.paid failed: %s", ex)
logger.info("Invoice paid for sub %s (mandate %s)", subId, mandateId) logger.info("Invoice paid for sub %s (mandate %s)", subId, mandateId)
return None return None

View file

@ -1,6 +1,6 @@
# Copyright (c) 2025 Patrick Motsch # Copyright (c) 2025 Patrick Motsch
# All rights reserved. # All rights reserved.
from fastapi import APIRouter, HTTPException, Depends, File, UploadFile, Form, Path, Request, status, Query, Response, Body from fastapi import APIRouter, HTTPException, Depends, File, UploadFile, Form, Path, Request, status, Query, Response, Body, BackgroundTasks
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from typing import List, Dict, Any, Optional from typing import List, Dict, Any, Optional
import logging import logging
@ -41,13 +41,16 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
file_meta = mgmtInterface.getFile(fileId) file_meta = mgmtInterface.getFile(fileId)
feature_instance_id = "" feature_instance_id = ""
mandate_id = "" mandate_id = ""
file_scope = "personal"
if file_meta: if file_meta:
if isinstance(file_meta, dict): if isinstance(file_meta, dict):
feature_instance_id = file_meta.get("featureInstanceId") or "" feature_instance_id = file_meta.get("featureInstanceId") or ""
mandate_id = file_meta.get("mandateId") or "" mandate_id = file_meta.get("mandateId") or ""
file_scope = file_meta.get("scope") or "personal"
else: else:
feature_instance_id = getattr(file_meta, "featureInstanceId", None) or "" feature_instance_id = getattr(file_meta, "featureInstanceId", None) or ""
mandate_id = getattr(file_meta, "mandateId", None) or "" mandate_id = getattr(file_meta, "mandateId", None) or ""
file_scope = getattr(file_meta, "scope", None) or "personal"
logger.info(f"Auto-index starting for {fileName} ({len(rawBytes)} bytes, {mimeType})") logger.info(f"Auto-index starting for {fileName} ({len(rawBytes)} bytes, {mimeType})")
@ -61,6 +64,7 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
userId=userId, userId=userId,
featureInstanceId=str(feature_instance_id) if feature_instance_id else "", featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
mandateId=str(mandate_id) if mandate_id else "", mandateId=str(mandate_id) if mandate_id else "",
scope=file_scope,
) )
logger.info( logger.info(
f"Pre-scan complete for {fileName}: " f"Pre-scan complete for {fileName}: "
@ -667,6 +671,7 @@ def batch_move_items(
@limiter.limit("30/minute") @limiter.limit("30/minute")
def updateFileScope( def updateFileScope(
request: Request, request: Request,
background_tasks: BackgroundTasks,
fileId: str = Path(..., description="ID of the file"), fileId: str = Path(..., description="ID of the file"),
scope: str = Body(..., embed=True), scope: str = Body(..., embed=True),
context: RequestContext = Depends(getRequestContext), context: RequestContext = Depends(getRequestContext),
@ -700,19 +705,18 @@ def updateFileScope(
except Exception as e: except Exception as e:
logger.warning(f"Failed to update FileContentIndex scope for file {fileId}: {e}") logger.warning(f"Failed to update FileContentIndex scope for file {fileId}: {e}")
# Trigger re-indexing so RAG embeddings metadata reflects the new scope
try:
fileMeta = managementInterface.getFile(fileId) fileMeta = managementInterface.getFile(fileId)
if fileMeta: if fileMeta:
import asyncio fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
asyncio.ensure_future(_autoIndexFile( mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")
fileId=fileId,
fileName=fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", ""), async def _runReindexAfterScopeChange():
mimeType=fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", ""), try:
user=context.user, await _autoIndexFile(fileId=fileId, fileName=fn, mimeType=mt, user=context.user)
)) except Exception as ex:
except Exception as e: logger.warning("Re-index after scope change failed for %s: %s", fileId, ex)
logger.warning(f"Failed to trigger re-index after scope change for file {fileId}: {e}")
background_tasks.add_task(_runReindexAfterScopeChange)
return {"fileId": fileId, "scope": scope, "updated": True} return {"fileId": fileId, "scope": scope, "updated": True}
except HTTPException: except HTTPException:
@ -726,11 +730,18 @@ def updateFileScope(
@limiter.limit("30/minute") @limiter.limit("30/minute")
def updateFileNeutralize( def updateFileNeutralize(
request: Request, request: Request,
background_tasks: BackgroundTasks,
fileId: str = Path(..., description="ID of the file"), fileId: str = Path(..., description="ID of the file"),
neutralize: bool = Body(..., embed=True), neutralize: bool = Body(..., embed=True),
context: RequestContext = Depends(getRequestContext), context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Toggle neutralization flag on a file.""" """Toggle neutralization flag on a file.
FAILSAFE: When turning neutralize ON, the existing Knowledge Store index
and all content chunks are deleted SYNCHRONOUSLY before the response is
returned. The re-index happens in a background task. If re-indexing
fails the file simply has no index no un-neutralized data can leak.
"""
try: try:
managementInterface = interfaceDbManagement.getInterface( managementInterface = interfaceDbManagement.getInterface(
context.user, context.user,
@ -740,35 +751,54 @@ def updateFileNeutralize(
managementInterface.updateFile(fileId, {"neutralize": neutralize}) managementInterface.updateFile(fileId, {"neutralize": neutralize})
# Update FileContentIndex neutralization metadata
try:
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
from modules.datamodels.datamodelKnowledge import FileContentIndex
knowledgeDb = getKnowledgeInterface() knowledgeDb = getKnowledgeInterface()
neutralizationStatus = "neutralized" if neutralize else "original"
if neutralize:
# ── CRITICAL: purge existing (potentially un-neutralized) index
# This MUST succeed before the response is sent so that no stale
# raw-text chunks remain searchable while re-indexing runs.
try:
knowledgeDb.deleteFileContentIndex(fileId)
logger.info("Neutralize toggle ON: deleted index + chunks for file %s", fileId)
except Exception as e:
logger.error("Neutralize toggle ON: FAILED to delete index for file %s: %s", fileId, e)
raise HTTPException(
status_code=500,
detail=f"Could not purge existing index for neutralization — aborting toggle. Error: {e}",
)
else:
# Turning neutralize OFF: update metadata only; re-index will overwrite
try:
from modules.datamodels.datamodelKnowledge import FileContentIndex
indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fileId}) indices = knowledgeDb.db.getRecordset(FileContentIndex, recordFilter={"id": fileId})
for idx in indices: for idx in indices:
idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None) idxId = idx.get("id") if isinstance(idx, dict) else getattr(idx, "id", None)
if idxId: if idxId:
knowledgeDb.db.recordModify(FileContentIndex, idxId, {"neutralizationStatus": neutralizationStatus}) knowledgeDb.db.recordModify(FileContentIndex, idxId, {
"neutralizationStatus": "original",
"isNeutralized": False,
})
except Exception as e: except Exception as e:
logger.warning(f"Failed to update FileContentIndex neutralize for file {fileId}: {e}") logger.warning("Failed to update FileContentIndex after neutralize-OFF for %s: %s", fileId, e)
# Trigger re-indexing so content is re-processed with/without neutralization # Background re-index (safe: if it fails, there is simply no index)
try:
fileMeta = managementInterface.getFile(fileId) fileMeta = managementInterface.getFile(fileId)
if fileMeta: if fileMeta:
import asyncio fn = fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", "")
asyncio.ensure_future(_autoIndexFile( mt = fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", "")
fileId=fileId,
fileName=fileMeta.fileName if hasattr(fileMeta, "fileName") else fileMeta.get("fileName", ""), async def _runReindexAfterNeutralizeToggle():
mimeType=fileMeta.mimeType if hasattr(fileMeta, "mimeType") else fileMeta.get("mimeType", ""), try:
user=context.user, await _autoIndexFile(fileId=fileId, fileName=fn, mimeType=mt, user=context.user)
)) except Exception as ex:
except Exception as e: logger.error("Re-index after neutralize toggle failed for %s: %s (file has NO index until next re-index)", fileId, ex)
logger.warning(f"Failed to trigger re-index after neutralize change for file {fileId}: {e}")
background_tasks.add_task(_runReindexAfterNeutralizeToggle)
return {"fileId": fileId, "neutralize": neutralize, "updated": True} return {"fileId": fileId, "neutralize": neutralize, "updated": True}
except HTTPException:
raise
except Exception as e: except Exception as e:
logger.error(f"Error updating file neutralize flag: {e}") logger.error(f"Error updating file neutralize flag: {e}")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))

View file

@ -282,8 +282,9 @@ def activateStoreFeature(
context: RequestContext = Depends(getRequestContext) context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Activate a store feature. Creates a new FeatureInstance in the target mandate. Activate a store feature. Billing-gated: a feature instance is ONLY created
If user has no admin mandate, auto-creates a personal mandate. if the Stripe subscription quantity update succeeds (proration confirmed).
On any billing failure the provisioned instance is rolled back.
""" """
featureCode = data.featureCode featureCode = data.featureCode
userId = str(context.user.id) userId = str(context.user.id)
@ -302,21 +303,39 @@ def activateStoreFeature(
if not _isUserAdminInMandate(db, userId, mandateId): if not _isUserAdminInMandate(db, userId, mandateId):
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Not admin in target mandate") raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Not admin in target mandate")
# Check subscription capacity # ── 1. Resolve subscription & plan ──────────────────────────────
from modules.datamodels.datamodelSubscription import MandateSubscription, BUILTIN_PLANS from modules.datamodels.datamodelSubscription import MandateSubscription, BUILTIN_PLANS, SubscriptionStatusEnum
subs = db.getRecordset(MandateSubscription, recordFilter={"mandateId": mandateId}) from modules.interfaces.interfaceDbSubscription import _getRootInterface as _getSubRoot
if subs:
sub = subs[0] subInterface = _getSubRoot()
plan = BUILTIN_PLANS.get(sub.get("planKey")) operative = subInterface.getOperativeForMandate(mandateId)
if not operative:
raise HTTPException(
status_code=status.HTTP_402_PAYMENT_REQUIRED,
detail="Kein aktives Abonnement. Bitte zuerst ein Abo abschliessen.",
)
planKey = operative.get("planKey", "")
plan = BUILTIN_PLANS.get(planKey)
isBillable = plan is not None and (plan.pricePerFeatureInstanceCHF or 0) > 0
if isBillable:
if not operative.get("stripeSubscriptionId") or not operative.get("stripeItemIdInstances"):
raise HTTPException(
status_code=status.HTTP_402_PAYMENT_REQUIRED,
detail="Stripe-Abonnement ist nicht vollständig eingerichtet — Aktivierung nicht möglich.",
)
# ── 2. Capacity check ───────────────────────────────────────────
if plan and plan.maxFeatureInstances is not None: if plan and plan.maxFeatureInstances is not None:
currentInstances = db.getRecordset(FeatureInstance, recordFilter={"mandateId": mandateId}) currentInstances = db.getRecordset(FeatureInstance, recordFilter={"mandateId": mandateId})
if len(currentInstances) >= plan.maxFeatureInstances: if len(currentInstances) >= plan.maxFeatureInstances:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_402_PAYMENT_REQUIRED, status_code=status.HTTP_402_PAYMENT_REQUIRED,
detail=f"Feature instance limit reached ({plan.maxFeatureInstances}). Upgrade your plan." detail=f"Feature-Instanz-Limit erreicht ({plan.maxFeatureInstances}). Bitte Plan upgraden.",
) )
# Create new FeatureInstance # ── 3. Provision instance ───────────────────────────────────────
featureInterface = getFeatureInterface(db) featureInterface = getFeatureInterface(db)
featureLabel = featureDef.get("label", {}).get("en", featureCode) featureLabel = featureDef.get("label", {}).get("en", featureCode)
instance = featureInterface.createFeatureInstance( instance = featureInterface.createFeatureInstance(
@ -332,7 +351,6 @@ def activateStoreFeature(
instanceId = instance.get("id") if isinstance(instance, dict) else instance.id instanceId = instance.get("id") if isinstance(instance, dict) else instance.id
# Grant FeatureAccess with admin role — MUST be feature-specific (e.g. workspace-admin)
instanceRoles = db.getRecordset(Role, recordFilter={"featureInstanceId": instanceId}) instanceRoles = db.getRecordset(Role, recordFilter={"featureInstanceId": instanceId})
adminRoleId = None adminRoleId = None
for ir in instanceRoles: for ir in instanceRoles:
@ -342,21 +360,34 @@ def activateStoreFeature(
break break
if not adminRoleId: if not adminRoleId:
_rollbackInstance(db, instanceId)
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"No feature-specific admin role (e.g. {featureCode}-admin) found for instance {instanceId}. " detail=f"Keine Feature-Admin-Rolle für {featureCode} gefunden — Rollback.",
f"Template roles were not correctly copied.",
) )
rootInterface.createFeatureAccess(userId, instanceId, roleIds=[adminRoleId]) rootInterface.createFeatureAccess(userId, instanceId, roleIds=[adminRoleId])
# Sync subscription quantity # ── 4. Billing gate: Stripe quantity sync (MUST succeed) ────────
if isBillable:
try:
rootInterface._syncSubscriptionQuantity(mandateId, raiseOnError=True)
except Exception as e:
logger.error("Stripe billing for feature activation failed — rolling back instance %s: %s", instanceId, e)
_rollbackInstance(db, instanceId, userId=userId)
raise HTTPException(
status_code=status.HTTP_402_PAYMENT_REQUIRED,
detail=f"Stripe-Abrechnung fehlgeschlagen: {e}. Feature wurde NICHT aktiviert.",
)
else:
try: try:
rootInterface._syncSubscriptionQuantity(mandateId) rootInterface._syncSubscriptionQuantity(mandateId)
except Exception as e: except Exception as e:
logger.warning(f"Failed to sync subscription quantity: {e}") logger.warning("Non-critical Stripe sync failed for free feature: %s", e)
logger.info(f"User {userId} activated '{featureCode}' in mandate {mandateId} (instance={instanceId})") # ── 5. Confirmed — notify ──────────────────────────────────────
_notifyFeatureActivation(mandateId, featureLabel, featureCode, sub=operative, plan=plan)
logger.info("User %s activated '%s' in mandate %s (instance=%s, billed=%s)", userId, featureCode, mandateId, instanceId, isBillable)
return { return {
"featureCode": featureCode, "featureCode": featureCode,
@ -412,11 +443,10 @@ def deactivateStoreFeature(
instanceDeleted = True instanceDeleted = True
logger.info(f"Orphan Control: deleted instance {instanceId} (no remaining accesses)") logger.info(f"Orphan Control: deleted instance {instanceId} (no remaining accesses)")
# Sync subscription quantity
try: try:
rootInterface._syncSubscriptionQuantity(mandateId) rootInterface._syncSubscriptionQuantity(mandateId, raiseOnError=True)
except Exception as e: except Exception as e:
logger.warning(f"Failed to sync subscription quantity: {e}") logger.error("Stripe quantity sync after deactivation failed for mandate %s: %s", mandateId, e)
logger.info(f"User {userId} deactivated instance {instanceId} in mandate {mandateId} (deleted={instanceDeleted})") logger.info(f"User {userId} deactivated instance {instanceId} in mandate {mandateId} (deleted={instanceDeleted})")
@ -433,3 +463,52 @@ def deactivateStoreFeature(
except Exception as e: except Exception as e:
logger.error(f"Error deactivating store feature: {e}") logger.error(f"Error deactivating store feature: {e}")
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
# ============================================================================
# Internal helpers
# ============================================================================
def _rollbackInstance(db, instanceId: str, userId: str = None) -> None:
"""Delete a freshly provisioned FeatureInstance (and its access) on billing failure."""
try:
if userId:
accesses = db.getRecordset(FeatureAccess, recordFilter={"userId": userId, "featureInstanceId": instanceId})
for a in accesses:
db.recordDelete(FeatureAccess, a.get("id"))
db.recordDelete(FeatureInstance, instanceId)
logger.info("Rolled back feature instance %s (billing gate)", instanceId)
except Exception as e:
logger.error("Rollback of instance %s failed: %s", instanceId, e)
def _notifyFeatureActivation(
mandateId: str,
featureLabel: str,
featureCode: str,
sub: dict = None,
plan = None,
) -> None:
"""Send email notification to mandate admins about a newly activated feature."""
try:
from modules.shared.notifyMandateAdmins import notifyMandateAdmins
priceLine = ""
if plan and plan.pricePerFeatureInstanceCHF:
priceLine = f"Kosten: CHF {plan.pricePerFeatureInstanceCHF:.2f} / {plan.billingPeriod.value} (anteilig via Stripe-Proration)."
bodyParagraphs = [
f"Die Feature-Instanz «{featureLabel}» ({featureCode}) wurde soeben für Ihren Mandanten aktiviert.",
]
if priceLine:
bodyParagraphs.append(priceLine)
bodyParagraphs.append("Die Stripe-Abrechnung wird automatisch angepasst.")
notifyMandateAdmins(
mandateId=mandateId,
subject=f"Feature aktiviert: {featureLabel}",
headline="Neue Feature-Instanz aktiviert",
bodyParagraphs=bodyParagraphs,
)
except Exception as e:
logger.warning("_notifyFeatureActivation failed for mandate %s: %s", mandateId, e)

View file

@ -183,7 +183,7 @@ def activatePlan(
@router.post("/cancel", response_model=Dict[str, Any]) @router.post("/cancel", response_model=Dict[str, Any])
@limiter.limit("5/minute") @limiter.limit("30/minute")
def cancelSubscription( def cancelSubscription(
request: Request, request: Request,
data: CancelRequest, data: CancelRequest,
@ -209,7 +209,7 @@ def cancelSubscription(
@router.post("/reactivate", response_model=Dict[str, Any]) @router.post("/reactivate", response_model=Dict[str, Any])
@limiter.limit("5/minute") @limiter.limit("30/minute")
def reactivateSubscription( def reactivateSubscription(
request: Request, request: Request,
data: ReactivateRequest, data: ReactivateRequest,
@ -235,7 +235,7 @@ def reactivateSubscription(
@router.post("/force-cancel", response_model=Dict[str, Any]) @router.post("/force-cancel", response_model=Dict[str, Any])
@limiter.limit("5/minute") @limiter.limit("30/minute")
def forceCancel( def forceCancel(
request: Request, request: Request,
data: ForceCancelRequest, data: ForceCancelRequest,
@ -451,46 +451,47 @@ def _getDataVolumeUsage(
"""Calculate current data volume usage for a mandate vs. plan limit.""" """Calculate current data volume usage for a mandate vs. plan limit."""
from modules.interfaces.interfaceDbApp import getRootInterface from modules.interfaces.interfaceDbApp import getRootInterface
from modules.datamodels.datamodelFiles import FileItem from modules.datamodels.datamodelFiles import FileItem
from modules.datamodels.datamodelSubscription import MandateSubscription, SubscriptionPlan from modules.datamodels.datamodelFeatures import FeatureInstance
from modules.datamodels.datamodelFeature import FeatureInstance from modules.interfaces.interfaceDbKnowledge import aggregateMandateRagTotalBytes
from modules.interfaces.interfaceDbManagement import getInterface as getMgmtInterface
from modules.interfaces.interfaceDbSubscription import _getRootInterface as _getSubRootIf
rootIf = getRootInterface() rootIf = getRootInterface()
mandateId = targetMandateId mandateId = targetMandateId
instances = rootIf.db.getRecordset(FeatureInstance, recordFilter={"mandateId": mandateId}) instances = rootIf.db.getRecordset(FeatureInstance, recordFilter={"mandateId": mandateId})
totalBytes = 0 instIds = [str(inst.get("id") or "") for inst in instances if inst.get("id")]
for inst in instances:
instId = inst.get("id") if isinstance(inst, dict) else getattr(inst, "id", None) mgmtDb = getMgmtInterface().db
if not instId: totalFileBytes = 0
continue for instId in instIds:
files = rootIf.db.getRecordset(FileItem, recordFilter={"featureInstanceId": instId}) files = mgmtDb.getRecordset(FileItem, recordFilter={"featureInstanceId": instId})
for f in files: for f in files:
size = f.get("fileSize") if isinstance(f, dict) else getattr(f, "fileSize", 0) size = f.get("fileSize") if isinstance(f, dict) else getattr(f, "fileSize", 0)
totalBytes += (size or 0) totalFileBytes += (size or 0)
mandateFiles = mgmtDb.getRecordset(FileItem, recordFilter={"mandateId": mandateId})
for f in mandateFiles:
size = f.get("fileSize") if isinstance(f, dict) else getattr(f, "fileSize", 0)
totalFileBytes += (size or 0)
filesMB = round(totalFileBytes / (1024 * 1024), 2)
filesMB = round(totalBytes / (1024 * 1024), 2) ragBytes = aggregateMandateRagTotalBytes(mandateId)
from modules.datamodels.datamodelKnowledge import FileContentIndex
ragIndexes = rootIf.db.getRecordset(FileContentIndex, recordFilter={"mandateId": mandateId})
ragBytes = sum(int(idx.get("totalSize") or 0) if isinstance(idx, dict) else int(getattr(idx, "totalSize", 0) or 0) for idx in ragIndexes)
ragMB = round(ragBytes / (1024 * 1024), 2) ragMB = round(ragBytes / (1024 * 1024), 2)
maxMB = None maxMB = None
subs = rootIf.db.getRecordset(MandateSubscription, recordFilter={"mandateId": mandateId}) subIf = _getSubRootIf()
for sub in subs: operative = subIf.getOperativeForMandate(mandateId)
planKey = sub.get("planKey") if isinstance(sub, dict) else getattr(sub, "planKey", "") if operative:
if planKey: plan = subIf.getPlan(operative.get("planKey") or "")
plans = rootIf.db.getRecordset(SubscriptionPlan, recordFilter={"planKey": planKey}) if plan and plan.maxDataVolumeMB is not None:
for plan in plans: maxMB = int(plan.maxDataVolumeMB)
limit = plan.get("maxDataVolumeMB") if isinstance(plan, dict) else getattr(plan, "maxDataVolumeMB", None)
if limit:
maxMB = limit
break
if maxMB:
break
usedMB = ragMB usedMB = ragMB
percentUsed = round((usedMB / maxMB) * 100, 1) if maxMB else None percentUsed = round((usedMB / maxMB) * 100, 1) if maxMB else None
logger.info(
"data-volume mandate=%s: files=%.2f MB, rag=%.2f MB, max=%s MB",
mandateId, filesMB, ragMB, maxMB,
)
return { return {
"mandateId": mandateId, "mandateId": mandateId,
"usedMB": usedMB, "usedMB": usedMB,

View file

@ -463,7 +463,7 @@ async def save_voice_settings(
currentUser: User = Depends(getCurrentUser) currentUser: User = Depends(getCurrentUser)
): ):
"""Save voice settings for the current user (writes to UserVoicePreferences).""" """Save voice settings for the current user (writes to UserVoicePreferences)."""
from modules.datamodels.datamodelUam import UserVoicePreferences from modules.datamodels.datamodelUam import UserVoicePreferences, _normalizeTtsVoiceMap
from modules.security.rootAccess import getRootInterface from modules.security.rootAccess import getRootInterface
rootInterface = getRootInterface() rootInterface = getRootInterface()
userId = str(currentUser.id) userId = str(currentUser.id)
@ -473,6 +473,8 @@ async def save_voice_settings(
"translationSourceLanguage", "translationTargetLanguage", "translationSourceLanguage", "translationTargetLanguage",
} }
updateData = {k: v for k, v in settings.items() if k in allowedFields} updateData = {k: v for k, v in settings.items() if k in allowedFields}
if "ttsVoiceMap" in updateData:
updateData["ttsVoiceMap"] = _normalizeTtsVoiceMap(updateData["ttsVoiceMap"])
existing = rootInterface.db.getRecordset( existing = rootInterface.db.getRecordset(
UserVoicePreferences, recordFilter={"userId": userId} UserVoicePreferences, recordFilter={"userId": userId}

View file

@ -14,7 +14,7 @@ from typing import Any, Dict
from fastapi import APIRouter, Body, Depends, HTTPException, Query, Request, status from fastapi import APIRouter, Body, Depends, HTTPException, Query, Request, status
from modules.auth import getCurrentUser, limiter from modules.auth import getCurrentUser, limiter
from modules.datamodels.datamodelUam import User, UserVoicePreferences from modules.datamodels.datamodelUam import User, UserVoicePreferences, _normalizeTtsVoiceMap
from modules.interfaces.interfaceDbApp import getRootInterface from modules.interfaces.interfaceDbApp import getRootInterface
from modules.interfaces.interfaceVoiceObjects import getVoiceInterface from modules.interfaces.interfaceVoiceObjects import getVoiceInterface
@ -79,6 +79,8 @@ def updateVoicePreferences(
"translationTargetLanguage", "translationTargetLanguage",
} }
updateData = {k: v for k, v in preferences.items() if k in allowedFields} updateData = {k: v for k, v in preferences.items() if k in allowedFields}
if "ttsVoiceMap" in updateData:
updateData["ttsVoiceMap"] = _normalizeTtsVoiceMap(updateData["ttsVoiceMap"])
if existing: if existing:
existingRecord = existing[0] existingRecord = existing[0]

View file

@ -27,6 +27,28 @@ _MAX_TOOL_RESULT_CHARS = 50_000
_BINARY_SIGNATURES = (b"%PDF", b"\x89PNG", b"\xff\xd8\xff", b"GIF8", b"PK\x03\x04", b"Rar!", b"\x1f\x8b") _BINARY_SIGNATURES = (b"%PDF", b"\x89PNG", b"\xff\xd8\xff", b"GIF8", b"PK\x03\x04", b"Rar!", b"\x1f\x8b")
def _resolveFileScope(fileId: str, context: dict) -> tuple:
"""Resolve featureInstanceId and mandateId for a file from context or management DB.
Returns (featureInstanceId, mandateId) never None, always strings.
"""
fiId = context.get("featureInstanceId", "") or ""
mId = context.get("mandateId", "") or ""
if fiId and mId:
return fiId, mId
try:
from modules.datamodels.datamodelFiles import FileItem
from modules.interfaces.interfaceDbManagement import ComponentObjects
fm = ComponentObjects().db._loadRecord(FileItem, fileId)
if fm:
_get = (lambda k: fm.get(k, "")) if isinstance(fm, dict) else (lambda k: getattr(fm, k, ""))
fiId = fiId or str(_get("featureInstanceId") or "")
mId = mId or str(_get("mandateId") or "")
except Exception:
pass
return fiId, mId
def _looksLikeBinary(data: bytes, sampleSize: int = 1024) -> bool: def _looksLikeBinary(data: bytes, sampleSize: int = 1024) -> bool:
"""Detect binary content by checking for magic bytes and non-printable char ratio.""" """Detect binary content by checking for magic bytes and non-printable char ratio."""
if any(data[:8].startswith(sig) for sig in _BINARY_SIGNATURES): if any(data[:8].startswith(sig) for sig in _BINARY_SIGNATURES):
@ -602,16 +624,29 @@ def _registerCoreTools(registry: ToolRegistry, services):
if knowledgeService: if knowledgeService:
try: try:
userId = context.get("userId", "") userId = context.get("userId", "")
_fiId, _mId = _resolveFileScope(fileId, context)
await knowledgeService.indexFile( await knowledgeService.indexFile(
fileId=fileId, fileName=fileName, mimeType=mimeType, fileId=fileId, fileName=fileName, mimeType=mimeType,
userId=userId, contentObjects=contentObjects, userId=userId, contentObjects=contentObjects,
featureInstanceId=_fiId,
mandateId=_mId,
) )
except Exception: except Exception:
pass pass
joined = ""
if knowledgeService:
_chunks = knowledgeService._knowledgeDb.getContentChunks(fileId)
_textChunks = [
c for c in (_chunks or [])
if c.get("contentType") != "image" and c.get("data")
]
if _textChunks:
joined = "\n\n".join(c["data"] for c in _textChunks)
if not joined:
textParts = [o["data"] for o in contentObjects if o["contentType"] != "image"] textParts = [o["data"] for o in contentObjects if o["contentType"] != "image"]
if textParts: joined = "\n\n".join(textParts) if textParts else ""
joined = "\n\n".join(textParts) if joined:
chunked = _applyOffsetLimit(joined, offset, limit) chunked = _applyOffsetLimit(joined, offset, limit)
if chunked is not None: if chunked is not None:
return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked) return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked)
@ -642,6 +677,36 @@ def _registerCoreTools(registry: ToolRegistry, services):
try: try:
text = rawBytes.decode(encoding) text = rawBytes.decode(encoding)
if text.strip(): if text.strip():
_fileNeedNeutralize = False
try:
from modules.datamodels.datamodelFiles import FileItem as _FI
from modules.interfaces.interfaceDbManagement import ComponentObjects as _CO
_fRec = _CO().db._loadRecord(_FI, fileId)
if _fRec:
_fG = (lambda k, d=None: _fRec.get(k, d)) if isinstance(_fRec, dict) else (lambda k, d=None: getattr(_fRec, k, d))
_fileNeedNeutralize = bool(_fG("neutralize", False))
except Exception:
pass
if _fileNeedNeutralize:
try:
_nSvc = services.getService("neutralization") if hasattr(services, "getService") else None
if _nSvc and hasattr(_nSvc, 'processText'):
_nResult = _nSvc.processText(text)
if _nResult and _nResult.get("neutralized_text"):
text = _nResult["neutralized_text"]
logger.debug(f"readFile: neutralized text for file {fileId}")
else:
logger.warning(f"readFile: neutralization failed for file {fileId}, blocking text (fail-safe)")
return ToolResult(toolCallId="", toolName="readFile", success=True,
data="[File requires neutralization but neutralization failed. Content blocked for data protection.]")
else:
logger.warning(f"readFile: neutralization required but service unavailable for file {fileId}")
return ToolResult(toolCallId="", toolName="readFile", success=True,
data="[File requires neutralization but service unavailable. Content blocked for data protection.]")
except Exception as _nErr:
logger.error(f"readFile: neutralization error for file {fileId}: {_nErr}")
return ToolResult(toolCallId="", toolName="readFile", success=True,
data="[File requires neutralization but an error occurred. Content blocked for data protection.]")
chunked = _applyOffsetLimit(text, offset, limit) chunked = _applyOffsetLimit(text, offset, limit)
if chunked is not None: if chunked is not None:
return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked) return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked)
@ -1562,7 +1627,7 @@ def _registerCoreTools(registry: ToolRegistry, services):
} }
async def _resolveDataSource(dsId: str): async def _resolveDataSource(dsId: str):
"""Resolve a DataSource record and return (connectionId, service, path) or raise.""" """Resolve a DataSource record and return (connectionId, service, path, neutralize) or raise."""
chatService = services.chat chatService = services.chat
ds = chatService.getDataSource(dsId) if hasattr(chatService, "getDataSource") else None ds = chatService.getDataSource(dsId) if hasattr(chatService, "getDataSource") else None
if not ds: if not ds:
@ -1571,11 +1636,12 @@ def _registerCoreTools(registry: ToolRegistry, services):
sourceType = ds.get("sourceType", "") sourceType = ds.get("sourceType", "")
path = ds.get("path", "/") path = ds.get("path", "/")
label = ds.get("label", "") label = ds.get("label", "")
neutralize = bool(ds.get("neutralize", False))
service = _SOURCE_TYPE_TO_SERVICE.get(sourceType, sourceType) service = _SOURCE_TYPE_TO_SERVICE.get(sourceType, sourceType)
if not connectionId: if not connectionId:
raise ValueError(f"DataSource '{dsId}' has no connectionId") raise ValueError(f"DataSource '{dsId}' has no connectionId")
logger.info(f"Resolved DataSource '{dsId}' ({label}): sourceType={sourceType}, service={service}, connectionId={connectionId}, path={path[:80]}") logger.info(f"Resolved DataSource '{dsId}' ({label}): sourceType={sourceType}, service={service}, connectionId={connectionId}, path={path[:80]}, neutralize={neutralize}")
return connectionId, service, path return connectionId, service, path, neutralize
_MAIL_SERVICES = {"outlook", "gmail"} _MAIL_SERVICES = {"outlook", "gmail"}
@ -1589,7 +1655,7 @@ def _registerCoreTools(registry: ToolRegistry, services):
error="Provide either dataSourceId OR connectionId+service") error="Provide either dataSourceId OR connectionId+service")
try: try:
if dsId: if dsId:
connectionId, service, basePath = await _resolveDataSource(dsId) connectionId, service, basePath, _neutralize = await _resolveDataSource(dsId)
else: else:
connectionId, service, basePath = directConnId, directService, args.get("path", "/") connectionId, service, basePath = directConnId, directService, args.get("path", "/")
if subPath: if subPath:
@ -1632,7 +1698,7 @@ def _registerCoreTools(registry: ToolRegistry, services):
error="Provide either dataSourceId OR connectionId+service") error="Provide either dataSourceId OR connectionId+service")
try: try:
if dsId: if dsId:
connectionId, service, basePath = await _resolveDataSource(dsId) connectionId, service, basePath, _neutralize = await _resolveDataSource(dsId)
else: else:
connectionId, service, basePath = directConnId, directService, args.get("path", "/") connectionId, service, basePath = directConnId, directService, args.get("path", "/")
from modules.connectors.connectorResolver import ConnectorResolver from modules.connectors.connectorResolver import ConnectorResolver
@ -1666,8 +1732,9 @@ def _registerCoreTools(registry: ToolRegistry, services):
try: try:
from modules.connectors.connectorResolver import ConnectorResolver from modules.connectors.connectorResolver import ConnectorResolver
from modules.connectors.connectorProviderBase import DownloadResult as _DR from modules.connectors.connectorProviderBase import DownloadResult as _DR
_sourceNeutralize = False
if dsId: if dsId:
connectionId, service, basePath = await _resolveDataSource(dsId) connectionId, service, basePath, _sourceNeutralize = await _resolveDataSource(dsId)
else: else:
connectionId, service, basePath = directConnId, directService, "/" connectionId, service, basePath = directConnId, directService, "/"
fullPath = filePath if filePath.startswith("/") else f"{basePath.rstrip('/')}/{filePath}" fullPath = filePath if filePath.startswith("/") else f"{basePath.rstrip('/')}/{filePath}"
@ -1710,6 +1777,8 @@ def _registerCoreTools(registry: ToolRegistry, services):
fiId = context.get("featureInstanceId") or (services.featureInstanceId if services else "") fiId = context.get("featureInstanceId") or (services.featureInstanceId if services else "")
if fiId: if fiId:
chatService.interfaceDbComponent.updateFile(fileItem.id, {"featureInstanceId": fiId}) chatService.interfaceDbComponent.updateFile(fileItem.id, {"featureInstanceId": fiId})
if _sourceNeutralize:
chatService.interfaceDbComponent.updateFile(fileItem.id, {"neutralize": True})
tempFolderId = _getOrCreateTempFolder(chatService) tempFolderId = _getOrCreateTempFolder(chatService)
if tempFolderId: if tempFolderId:
chatService.interfaceDbComponent.updateFile(fileItem.id, {"folderId": tempFolderId}) chatService.interfaceDbComponent.updateFile(fileItem.id, {"folderId": tempFolderId})
@ -2040,9 +2109,12 @@ def _registerCoreTools(registry: ToolRegistry, services):
}) })
if contentObjects: if contentObjects:
_diFiId, _diMId = _resolveFileScope(fileId, context)
await knowledgeService.indexFile( await knowledgeService.indexFile(
fileId=fileId, fileName=fileName, mimeType=fileMime, fileId=fileId, fileName=fileName, mimeType=fileMime,
userId=context.get("userId", ""), contentObjects=contentObjects, userId=context.get("userId", ""), contentObjects=contentObjects,
featureInstanceId=_diFiId,
mandateId=_diMId,
) )
chunks = knowledgeService._knowledgeDb.getContentChunks(fileId) chunks = knowledgeService._knowledgeDb.getContentChunks(fileId)
@ -2088,9 +2160,22 @@ def _registerCoreTools(registry: ToolRegistry, services):
dataUrl = f"data:{mimeType};base64,{imageData}" dataUrl = f"data:{mimeType};base64,{imageData}"
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum as OTE from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum as OTE
_opType = OTE.IMAGE_ANALYSE
try:
from modules.datamodels.datamodelFiles import FileItem as _FileItemModel
from modules.interfaces.interfaceDbManagement import ComponentObjects as _CO
_fRow = _CO().db._loadRecord(_FileItemModel, fileId)
if _fRow:
_fGet = (lambda k, d=None: _fRow.get(k, d)) if isinstance(_fRow, dict) else (lambda k, d=None: getattr(_fRow, k, d))
if bool(_fGet("neutralize", False)):
_opType = OTE.NEUTRALIZATION_IMAGE
logger.info(f"describeImage: file {fileId} has neutralize=True, using NEUTRALIZATION_IMAGE (internal models only)")
except Exception:
pass
visionRequest = AiCallRequest( visionRequest = AiCallRequest(
prompt=prompt, prompt=prompt,
options=AiCallOptions(operationType=OTE.IMAGE_ANALYSE), options=AiCallOptions(operationType=_opType),
messages=[{"role": "user", "content": [ messages=[{"role": "user", "content": [
{"type": "text", "text": prompt}, {"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": dataUrl}}, {"type": "image_url", "image_url": {"url": dataUrl}},
@ -3099,6 +3184,11 @@ def _registerCoreTools(registry: ToolRegistry, services):
recordFilter={"featureInstanceId": featureInstanceId, "workspaceInstanceId": workspaceInstanceId}, recordFilter={"featureInstanceId": featureInstanceId, "workspaceInstanceId": workspaceInstanceId},
) )
_anySourceNeutralize = any(
bool(ds.get("neutralize", False) if isinstance(ds, dict) else getattr(ds, "neutralize", False))
for ds in (featureDataSources or [])
)
from modules.security.rbacCatalog import getCatalogService from modules.security.rbacCatalog import getCatalogService
catalog = getCatalogService() catalog = getCatalogService()
if not featureDataSources: if not featureDataSources:
@ -3133,6 +3223,8 @@ def _registerCoreTools(registry: ToolRegistry, services):
) )
async def _subAgentAiCall(req): async def _subAgentAiCall(req):
if _anySourceNeutralize:
req.requireNeutralization = True
return await aiService.callAi(req) return await aiService.callAi(req)
try: try:

View file

@ -200,10 +200,6 @@ class AiService:
finally: finally:
self.aiObjects.billingCallback = None self.aiObjects.billingCallback = None
# Rehydrate neutralization placeholders in response
if _wasNeutralized and response and hasattr(response, 'content') and response.content:
response.content = self._rehydrateResponse(response.content)
# Attach neutralization exclusion metadata if any parts failed # Attach neutralization exclusion metadata if any parts failed
if _excludedDocs and response: if _excludedDocs and response:
if not hasattr(response, 'metadata') or response.metadata is None: if not hasattr(response, 'metadata') or response.metadata is None:
@ -240,10 +236,7 @@ class AiService:
self.aiObjects.billingCallback = self._createBillingCallback() self.aiObjects.billingCallback = self._createBillingCallback()
try: try:
async for chunk in self.aiObjects.callWithTextContextStream(request): async for chunk in self.aiObjects.callWithTextContextStream(request):
# Rehydrate the final AiCallResponse (non-str chunks are the final response)
if not isinstance(chunk, str): if not isinstance(chunk, str):
if _wasNeutralized and hasattr(chunk, 'content') and chunk.content:
chunk.content = self._rehydrateResponse(chunk.content)
if _excludedDocs: if _excludedDocs:
if not hasattr(chunk, 'metadata') or chunk.metadata is None: if not hasattr(chunk, 'metadata') or chunk.metadata is None:
chunk.metadata = {} chunk.metadata = {}
@ -566,34 +559,70 @@ detectedIntent-Werte:
def _shouldNeutralize(self, request: AiCallRequest) -> bool: def _shouldNeutralize(self, request: AiCallRequest) -> bool:
"""Check if this AI request should have neutralization applied. """Check if this AI request should have neutralization applied.
Per-request override: requireNeutralization=True forces it, False skips it.
Only applies to text prompts -- not embeddings or image processing.""" OR-logic across three sources (any True neutralize):
1. Feature-Instance config (NeutralizationConfig.enabled)
2. Workflow/Session (context.requireNeutralization)
3. Per-request (request.requireNeutralization)
No source can override another's True with False.
"""
try: try:
if request.requireNeutralization is False: if not request.prompt and not request.messages and not request.context:
return False
if not request.prompt and not request.messages:
return False return False
_sources = []
# Source 1: Feature-Instance config
_neutralSvc = self._get_service("neutralization")
if _neutralSvc and hasattr(_neutralSvc, 'getConfig'):
_config = _neutralSvc.getConfig()
if _config and getattr(_config, 'enabled', False):
_sources.append("featureInstance")
# Source 2: Workflow / Session context
_ctx = getattr(self.services, '_context', None)
_ctxFlag = getattr(_ctx, "requireNeutralization", None) if _ctx else None
if _ctxFlag is True:
_sources.append("context")
# Source 3: Per-request flag
if request.requireNeutralization is True: if request.requireNeutralization is True:
_sources.append("request")
if _sources:
logger.debug(f"Neutralization required by: {', '.join(_sources)}")
request.requireNeutralization = True
return True return True
neutralSvc = self._get_service("neutralization")
if not neutralSvc:
return False return False
config = neutralSvc.getConfig() if hasattr(neutralSvc, 'getConfig') else None except Exception as e:
if not config or not getattr(config, 'enabled', False): logger.error(f"_shouldNeutralize check failed: {e} — defaulting to False")
return False
return True
except Exception:
return False return False
def _neutralizeRequest(self, request: AiCallRequest) -> Tuple[AiCallRequest, bool, List[str]]: def _neutralizeRequest(self, request: AiCallRequest) -> Tuple[AiCallRequest, bool, List[str]]:
"""Neutralize the prompt text and messages in an AiCallRequest. """Neutralize the prompt text and messages in an AiCallRequest.
Returns (modifiedRequest, wasNeutralized, excludedDocs). Returns (modifiedRequest, wasNeutralized, excludedDocs).
Fail-safe: failing parts are excluded instead of aborting the entire call."""
FAILSAFE behaviour when ``requireNeutralization is True`` (explicit):
- Service unavailable raises (caller must not send raw data to AI).
- Prompt neutralization fails raises.
- Individual message neutralization fails message is **removed**
(not kept in original form) and noted in excludedDocs.
When neutralization is only config-driven (requireNeutralization is
None) the behaviour is softer: failures are logged and originals are
kept but a warning is emitted.
"""
_hardMode = request.requireNeutralization is True
excludedDocs: List[str] = [] excludedDocs: List[str] = []
neutralSvc = self._get_service("neutralization") neutralSvc = self._get_service("neutralization")
if not neutralSvc or not hasattr(neutralSvc, 'processText'): if not neutralSvc or not hasattr(neutralSvc, 'processText'):
logger.warning("Neutralization required but neutralization service is unavailable — continuing without neutralization") if _hardMode:
raise RuntimeError("Neutralization explicitly required but service unavailable — AI call BLOCKED")
logger.warning("Neutralization required by config but service unavailable — continuing without neutralization")
excludedDocs.append("Neutralization service unavailable; prompt sent un-neutralized") excludedDocs.append("Neutralization service unavailable; prompt sent un-neutralized")
return request, False, excludedDocs return request, False, excludedDocs
@ -607,28 +636,148 @@ detectedIntent-Werte:
_wasNeutralized = True _wasNeutralized = True
logger.debug("Neutralized prompt in AiCallRequest") logger.debug("Neutralized prompt in AiCallRequest")
else: else:
if _hardMode:
raise RuntimeError(f"Prompt neutralization returned empty — AI call BLOCKED (hard mode)")
logger.warning("Neutralization of prompt returned no neutralized_text — sending original prompt") logger.warning("Neutralization of prompt returned no neutralized_text — sending original prompt")
excludedDocs.append("Prompt neutralization failed; original prompt used") excludedDocs.append("Prompt neutralization failed; original prompt used")
except RuntimeError:
raise
except Exception as e: except Exception as e:
if _hardMode:
raise RuntimeError(f"Prompt neutralization failed — AI call BLOCKED: {e}") from e
logger.warning(f"Neutralization of prompt failed: {e} — sending original prompt") logger.warning(f"Neutralization of prompt failed: {e} — sending original prompt")
excludedDocs.append(f"Prompt neutralization error: {e}") excludedDocs.append(f"Prompt neutralization error: {e}")
if request.context:
try:
result = neutralSvc.processText(request.context)
if result and result.get("neutralized_text"):
request.context = result["neutralized_text"]
_wasNeutralized = True
logger.debug("Neutralized context in AiCallRequest")
else:
if _hardMode:
raise RuntimeError("Context neutralization returned empty — AI call BLOCKED (hard mode)")
logger.warning("Neutralization of context returned no neutralized_text — sending original context")
excludedDocs.append("Context neutralization failed; original context used")
except RuntimeError:
raise
except Exception as e:
if _hardMode:
raise RuntimeError(f"Context neutralization failed — AI call BLOCKED: {e}") from e
logger.warning(f"Neutralization of context failed: {e} — sending original context")
excludedDocs.append(f"Context neutralization error: {e}")
if request.messages and isinstance(request.messages, list): if request.messages and isinstance(request.messages, list):
cleanMessages = []
for idx, msg in enumerate(request.messages): for idx, msg in enumerate(request.messages):
content = msg.get("content") if isinstance(msg, dict) else None content = msg.get("content") if isinstance(msg, dict) else None
if not isinstance(content, str) or not content: if content is None:
cleanMessages.append(msg)
continue
if isinstance(content, str):
if not content:
cleanMessages.append(msg)
continue continue
try: try:
result = neutralSvc.processText(content) result = neutralSvc.processText(content)
if result and result.get("neutralized_text"): if result and result.get("neutralized_text"):
msg["content"] = result["neutralized_text"] msg["content"] = result["neutralized_text"]
_wasNeutralized = True _wasNeutralized = True
cleanMessages.append(msg)
else:
if _hardMode:
logger.warning(f"Message[{idx}] neutralization empty — REMOVING message (hard mode)")
excludedDocs.append(f"Message[{idx}] neutralization failed; message REMOVED")
else: else:
logger.warning(f"Neutralization of message[{idx}] returned no neutralized_text — keeping original") logger.warning(f"Neutralization of message[{idx}] returned no neutralized_text — keeping original")
excludedDocs.append(f"Message[{idx}] neutralization failed; original kept") excludedDocs.append(f"Message[{idx}] neutralization failed; original kept")
cleanMessages.append(msg)
except Exception as e: except Exception as e:
if _hardMode:
logger.warning(f"Message[{idx}] neutralization error — REMOVING message (hard mode): {e}")
excludedDocs.append(f"Message[{idx}] neutralization error; message REMOVED: {e}")
else:
logger.warning(f"Neutralization of message[{idx}] failed: {e} — keeping original") logger.warning(f"Neutralization of message[{idx}] failed: {e} — keeping original")
excludedDocs.append(f"Message[{idx}] neutralization error: {e}") excludedDocs.append(f"Message[{idx}] neutralization error: {e}")
cleanMessages.append(msg)
elif isinstance(content, list):
_cleanParts = []
for _partIdx, _part in enumerate(content):
if not isinstance(_part, dict):
_cleanParts.append(_part)
continue
_partType = _part.get("type", "")
if _partType == "text" and _part.get("text"):
try:
_result = neutralSvc.processText(_part["text"])
if _result and _result.get("neutralized_text"):
_part["text"] = _result["neutralized_text"]
_wasNeutralized = True
_cleanParts.append(_part)
else:
if _hardMode:
logger.warning(f"Message[{idx}].content[{_partIdx}] text neutralization empty — REMOVING part")
excludedDocs.append(f"Message[{idx}].content[{_partIdx}] text removed")
else:
_cleanParts.append(_part)
except Exception as e:
if _hardMode:
logger.warning(f"Message[{idx}].content[{_partIdx}] text neutralization error — REMOVING: {e}")
excludedDocs.append(f"Message[{idx}].content[{_partIdx}] text error: {e}")
else:
_cleanParts.append(_part)
elif _partType == "image_url":
if _hardMode:
logger.warning(f"Message[{idx}].content[{_partIdx}] image_url — REMOVING (neutralization active)")
excludedDocs.append(f"Message[{idx}].content[{_partIdx}] image removed (neutralization)")
else:
_cleanParts.append(_part)
else:
_cleanParts.append(_part)
if _cleanParts:
msg["content"] = _cleanParts
cleanMessages.append(msg)
elif _hardMode:
logger.warning(f"Message[{idx}] all parts removed — REMOVING message")
excludedDocs.append(f"Message[{idx}] fully removed after neutralization")
else:
cleanMessages.append(msg)
request.messages = cleanMessages
if hasattr(request, 'contentParts') and request.contentParts:
_cleanParts = []
for _cpIdx, _cp in enumerate(request.contentParts):
_tg = getattr(_cp, 'typeGroup', '') or ''
_data = getattr(_cp, 'data', '') or ''
if _tg in ('text', 'table') and _data:
try:
_result = neutralSvc.processText(str(_data))
if _result and _result.get("neutralized_text"):
_cp.data = _result["neutralized_text"]
_wasNeutralized = True
_cleanParts.append(_cp)
else:
if _hardMode:
logger.warning(f"ContentPart[{_cpIdx}] neutralization empty — REMOVING")
excludedDocs.append(f"ContentPart[{_cpIdx}] removed")
else:
_cleanParts.append(_cp)
except Exception as e:
if _hardMode:
logger.warning(f"ContentPart[{_cpIdx}] neutralization error — REMOVING: {e}")
excludedDocs.append(f"ContentPart[{_cpIdx}] error: {e}")
else:
_cleanParts.append(_cp)
elif _tg == 'image':
if _hardMode:
logger.warning(f"ContentPart[{_cpIdx}] image — REMOVING (neutralization active)")
excludedDocs.append(f"ContentPart[{_cpIdx}] image removed")
else:
_cleanParts.append(_cp)
else:
_cleanParts.append(_cp)
request.contentParts = _cleanParts
return request, _wasNeutralized, excludedDocs return request, _wasNeutralized, excludedDocs

View file

@ -83,12 +83,47 @@ class KnowledgeService:
""" """
contentObjects = contentObjects or [] contentObjects = contentObjects or []
# 1. Create FileContentIndex # 1. Resolve scope fields from FileItem (Single Source of Truth)
# FileItem lives in poweron_management; its scope/mandateId/featureInstanceId
# are authoritative and must be mirrored onto the FileContentIndex.
resolvedScope = "personal"
resolvedMandateId = mandateId
resolvedFeatureInstanceId = featureInstanceId
resolvedUserId = userId
_shouldNeutralize = False
try:
from modules.datamodels.datamodelFiles import FileItem as _FileItem
_dbComponent = getattr(self._context, "interfaceDbComponent", None)
_fileRecords = _dbComponent.getRecordset(_FileItem, recordFilter={"id": fileId}) if _dbComponent else []
if not _fileRecords:
from modules.interfaces.interfaceDbManagement import ComponentObjects
_row = ComponentObjects().db._loadRecord(_FileItem, fileId)
if _row:
_fileRecords = [_row]
if _fileRecords:
_fileRecord = _fileRecords[0]
_get = (lambda k, d=None: _fileRecord.get(k, d)) if isinstance(_fileRecord, dict) else (lambda k, d=None: getattr(_fileRecord, k, d))
_shouldNeutralize = bool(_get("neutralize", False))
_fileScope = _get("scope")
if _fileScope:
resolvedScope = _fileScope
if not resolvedMandateId:
resolvedMandateId = str(_get("mandateId", "") or "")
if not resolvedFeatureInstanceId:
resolvedFeatureInstanceId = str(_get("featureInstanceId", "") or "")
_fileCreatedBy = _get("sysCreatedBy")
if _fileCreatedBy:
resolvedUserId = str(_fileCreatedBy)
except Exception:
pass
# 2. Create FileContentIndex with correct scope from the start
index = FileContentIndex( index = FileContentIndex(
id=fileId, id=fileId,
userId=userId, userId=resolvedUserId,
featureInstanceId=featureInstanceId, featureInstanceId=resolvedFeatureInstanceId,
mandateId=mandateId, mandateId=resolvedMandateId,
scope=resolvedScope,
fileName=fileName, fileName=fileName,
mimeType=mimeType, mimeType=mimeType,
containerPath=containerPath, containerPath=containerPath,
@ -108,28 +143,9 @@ class KnowledgeService:
) )
self._knowledgeDb.upsertFileContentIndex(index) self._knowledgeDb.upsertFileContentIndex(index)
# 2. Chunk text content objects and create embeddings # 3. Chunk text content objects and create embeddings
textObjects = [o for o in contentObjects if o.get("contentType") == "text"] textObjects = [o for o in contentObjects if o.get("contentType") == "text"]
# Read FileItem attributes for index metadata and neutralization
_shouldNeutralize = False
try:
from modules.datamodels.datamodelFiles import FileItem as _FileItem
_dbComponent = getattr(self._context, 'interfaceDbComponent', None)
_fileRecords = _dbComponent.getRecordset(_FileItem, recordFilter={"id": fileId}) if _dbComponent else []
if _fileRecords:
_fileRecord = _fileRecords[0]
_get = (lambda k, d=None: _fileRecord.get(k, d)) if isinstance(_fileRecord, dict) else (lambda k, d=None: getattr(_fileRecord, k, d))
_shouldNeutralize = bool(_get("neutralize", False))
_fileScope = _get("scope")
if _fileScope:
index.scope = _fileScope
_fileCreatedBy = _get("sysCreatedBy")
if _fileCreatedBy:
index.userId = str(_fileCreatedBy)
except Exception:
pass
if _shouldNeutralize and textObjects: if _shouldNeutralize and textObjects:
_neutralizedObjects = [] _neutralizedObjects = []
try: try:
@ -142,9 +158,7 @@ class KnowledgeService:
if not _textContent: if not _textContent:
continue continue
try: try:
_neutralResult = _neutralSvc.processText( _neutralResult = _neutralSvc.processText(_textContent)
_textContent, userId=userId, featureInstanceId=featureInstanceId
)
if _neutralResult and _neutralResult.get("neutralized_text"): if _neutralResult and _neutralResult.get("neutralized_text"):
_obj["data"] = _neutralResult["neutralized_text"] _obj["data"] = _neutralResult["neutralized_text"]
_neutralizedObjects.append(_obj) _neutralizedObjects.append(_obj)
@ -176,8 +190,8 @@ class KnowledgeService:
contentChunk = ContentChunk( contentChunk = ContentChunk(
contentObjectId=chunk["contentObjectId"], contentObjectId=chunk["contentObjectId"],
fileId=fileId, fileId=fileId,
userId=userId, userId=resolvedUserId,
featureInstanceId=featureInstanceId, featureInstanceId=resolvedFeatureInstanceId,
contentType="text", contentType="text",
data=chunk["data"], data=chunk["data"],
contextRef=chunk["contextRef"], contextRef=chunk["contextRef"],
@ -185,14 +199,36 @@ class KnowledgeService:
) )
self._knowledgeDb.upsertContentChunk(contentChunk) self._knowledgeDb.upsertContentChunk(contentChunk)
# 3. Store non-text content objects (images, etc.) without embedding # 4. Store non-text content objects (images, etc.) without embedding
nonTextObjects = [o for o in contentObjects if o.get("contentType") != "text"] nonTextObjects = [o for o in contentObjects if o.get("contentType") != "text"]
if _shouldNeutralize and nonTextObjects:
import base64 as _b64
_filteredNonText = []
for _obj in nonTextObjects:
if _obj.get("contentType") != "image":
_filteredNonText.append(_obj)
continue
_imgData = (_obj.get("data", "") or "").strip()
if not _imgData:
_filteredNonText.append(_obj)
continue
try:
_imgBytes = _b64.b64decode(_imgData)
_imgResult = await _neutralSvc.processImageAsync(_imgBytes, fileName)
if _imgResult.get("status") == "ok":
_filteredNonText.append(_obj)
logger.debug(f"Image chunk OK for file {fileId}, storing")
else:
logger.warning(f"Image chunk blocked for file {fileId} (PII detected), skipping (fail-safe)")
except Exception as _imgErr:
logger.warning(f"Image neutralization check failed for file {fileId}: {_imgErr}, skipping (fail-safe)")
nonTextObjects = _filteredNonText
for obj in nonTextObjects: for obj in nonTextObjects:
contentChunk = ContentChunk( contentChunk = ContentChunk(
contentObjectId=obj.get("contentObjectId", ""), contentObjectId=obj.get("contentObjectId", ""),
fileId=fileId, fileId=fileId,
userId=userId, userId=resolvedUserId,
featureInstanceId=featureInstanceId, featureInstanceId=resolvedFeatureInstanceId,
contentType=obj.get("contentType", "other"), contentType=obj.get("contentType", "other"),
data=obj.get("data", ""), data=obj.get("data", ""),
contextRef=obj.get("contextRef", {}), contextRef=obj.get("contextRef", {}),
@ -200,21 +236,23 @@ class KnowledgeService:
) )
self._knowledgeDb.upsertContentChunk(contentChunk) self._knowledgeDb.upsertContentChunk(contentChunk)
self._knowledgeDb.updateFileStatus(fileId, "indexed") # 5. Final upsert ALWAYS — persists scope, neutralization status, etc.
index.status = "indexed" index.status = "indexed"
if _shouldNeutralize: if _shouldNeutralize:
try:
index.neutralizationStatus = "completed" index.neutralizationStatus = "completed"
index.isNeutralized = True index.isNeutralized = True
self._knowledgeDb.upsertFileContentIndex(index) self._knowledgeDb.upsertFileContentIndex(index)
except Exception as e:
logger.debug(f"Could not set neutralizationStatus for file {fileId}: {e}") logger.info(
logger.info(f"Indexed file {fileId} ({fileName}): {len(contentObjects)} objects, {len(textObjects)} text chunks") "Indexed file %s (%s): %d objects, %d text chunks, scope=%s, mandate=%s, instance=%s",
if mandateId: fileId, fileName, len(contentObjects), len(textObjects),
resolvedScope, resolvedMandateId, resolvedFeatureInstanceId,
)
if resolvedMandateId:
try: try:
from modules.interfaces.interfaceDbBilling import _getRootInterface from modules.interfaces.interfaceDbBilling import _getRootInterface
_getRootInterface().reconcileMandateStorageBilling(str(mandateId)) _getRootInterface().reconcileMandateStorageBilling(str(resolvedMandateId))
except Exception as ex: except Exception as ex:
logger.warning("reconcileMandateStorageBilling after index failed: %s", ex) logger.warning("reconcileMandateStorageBilling after index failed: %s", ex)
return index return index
@ -328,17 +366,18 @@ class KnowledgeService:
if entities: if entities:
builder.add(priority=3, label="Workflow Context", items=entities, isKeyValue=True, maxChars=2000) builder.add(priority=3, label="Workflow Context", items=entities, isKeyValue=True, maxChars=2000)
# Layer 3: Shared Layer (mandate-wide shared documents) # Layer 3: Mandate-scoped documents (visible to all mandate users)
sharedChunks = self._knowledgeDb.semanticSearch( if mandateId:
mandateChunks = self._knowledgeDb.semanticSearch(
queryVector=queryVector, queryVector=queryVector,
scope="mandate",
mandateId=mandateId, mandateId=mandateId,
isShared=True,
limit=10, limit=10,
minScore=0.7, minScore=0.7,
isSysAdmin=isSysAdmin, isSysAdmin=isSysAdmin,
) )
if sharedChunks: if mandateChunks:
builder.add(priority=4, label="Shared Knowledge", items=sharedChunks, maxChars=2000) builder.add(priority=4, label="Shared Knowledge", items=mandateChunks, maxChars=2000)
# Layer 4: Cross-workflow hint (other conversations in this workspace) # Layer 4: Cross-workflow hint (other conversations in this workspace)
if workflowHintItems: if workflowHintItems:

View file

@ -31,6 +31,7 @@ async def preScanDocument(
userId: str = "", userId: str = "",
featureInstanceId: str = "", featureInstanceId: str = "",
mandateId: str = "", mandateId: str = "",
scope: str = "personal",
) -> FileContentIndex: ) -> FileContentIndex:
"""Create a structural FileContentIndex without AI. """Create a structural FileContentIndex without AI.
@ -56,6 +57,7 @@ async def preScanDocument(
userId=userId, userId=userId,
featureInstanceId=featureInstanceId, featureInstanceId=featureInstanceId,
mandateId=mandateId, mandateId=mandateId,
scope=scope,
fileName=fileName, fileName=fileName,
mimeType=mimeType, mimeType=mimeType,
totalObjects=totalObjects, totalObjects=totalObjects,

View file

@ -7,9 +7,7 @@ All mandate-level notifications (subscription changes, billing warnings, etc.)
MUST go through notifyMandateAdmins() to ensure consistent recipient resolution MUST go through notifyMandateAdmins() to ensure consistent recipient resolution
and delivery. and delivery.
Recipients are the union of: Recipients: all users with the mandate-level "admin" RBAC role.
1. BillingSettings.notifyEmails for the mandate (configured contact addresses)
2. All users with the mandate-level "admin" RBAC role
""" """
from __future__ import annotations from __future__ import annotations
@ -96,10 +94,10 @@ def _resolveMandateAdminEmails(mandateId: str) -> List[str]:
def _resolveAllRecipients(mandateId: str) -> List[str]: def _resolveAllRecipients(mandateId: str) -> List[str]:
"""Union of BillingSettings.notifyEmails + all mandate admin user emails, deduplicated.""" """Mandate admin user emails only (RBAC-resolved), deduplicated."""
seen: Set[str] = set() seen: Set[str] = set()
result: List[str] = [] result: List[str] = []
for email in _resolveMandateContactEmails(mandateId) + _resolveMandateAdminEmails(mandateId): for email in _resolveMandateAdminEmails(mandateId):
if email and email not in seen: if email and email not in seen:
seen.add(email) seen.add(email)
result.append(email) result.append(email)
@ -233,7 +231,7 @@ def notifyMandateAdmins(
rawHtmlBlock: Optional[str] = None, rawHtmlBlock: Optional[str] = None,
) -> int: ) -> int:
""" """
Send a styled HTML notification to all mandate admins and configured contacts. Send a styled HTML notification to all mandate admins.
Args: Args:
mandateId: The mandate to notify admins for. mandateId: The mandate to notify admins for.

View file

@ -6,7 +6,7 @@ import time
from typing import Dict, Any from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList from modules.datamodels.datamodelDocref import DocumentReferenceList
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentExtracted, ContentPart from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -101,74 +101,6 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
# Pass operationId for hierarchical per-document progress logging # Pass operationId for hierarchical per-document progress logging
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId) extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
# Check if neutralization is enabled and should be applied automatically
neutralizationEnabled = False
try:
config = self.services.neutralization.getConfig()
neutralizationEnabled = config and config.enabled
except Exception as e:
logger.debug(f"Could not check neutralization config: {str(e)}")
# Neutralize extracted data if enabled (for dynamic mode: after extraction, before AI processing)
if neutralizationEnabled:
self.services.chat.progressLogUpdate(operationId, 0.7, "Neutralizing extracted data")
logger.info("Neutralization enabled - neutralizing extracted content data")
# Neutralize each ContentExtracted result
for extracted in extractedResults:
if extracted.parts:
neutralizedParts = []
for part in extracted.parts:
if not isinstance(part, ContentPart):
# Try to parse as ContentPart if it's a dict
if isinstance(part, dict):
try:
part = ContentPart(**part)
except Exception as e:
logger.warning(f"Could not parse ContentPart: {str(e)}")
neutralizedParts.append(part)
continue
else:
neutralizedParts.append(part)
continue
# Neutralize the data field if it contains text
if part.data:
try:
# Call neutralization service
neutralizationResult = self.services.neutralization.processText(part.data)
if neutralizationResult and 'neutralized_text' in neutralizationResult:
# Replace data with neutralized text
neutralizedData = neutralizationResult['neutralized_text']
# Create new ContentPart with neutralized data
neutralizedPart = ContentPart(
id=part.id,
parentId=part.parentId,
label=part.label,
typeGroup=part.typeGroup,
mimeType=part.mimeType,
data=neutralizedData,
metadata=part.metadata.copy() if part.metadata else {}
)
neutralizedParts.append(neutralizedPart)
else:
# Neutralization failed, use original part
logger.warning(f"Neutralization did not return neutralized_text for part {part.id}")
neutralizedParts.append(part)
except Exception as e:
logger.error(f"Error neutralizing part {part.id}: {str(e)}")
# On error, use original part
neutralizedParts.append(part)
else:
# No data to neutralize, keep original part
neutralizedParts.append(part)
# Update extracted result with neutralized parts
extracted.parts = neutralizedParts
logger.info(f"Neutralized {len(neutralizedParts)} content parts")
# Build ActionDocuments from ContentExtracted results # Build ActionDocuments from ContentExtracted results
self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents") self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
actionDocuments = [] actionDocuments = []
@ -190,7 +122,6 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
"documentIndex": i, "documentIndex": i,
"extractedId": extracted.id, "extractedId": extracted.id,
"partCount": len(extracted.parts) if extracted.parts else 0, "partCount": len(extracted.parts) if extracted.parts else 0,
"neutralized": neutralizationEnabled,
"originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None "originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None
} }
actionDoc = ActionDocument( actionDoc = ActionDocument(

View file

@ -16,7 +16,6 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"context_neutralize_{workflowId}_{int(time.time())}" operationId = f"context_neutralize_{workflowId}_{int(time.time())}"
# Check if neutralization is enabled
neutralizationEnabled = False neutralizationEnabled = False
try: try:
config = self.services.neutralization.getConfig() config = self.services.neutralization.getConfig()
@ -144,8 +143,25 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
neutralizedParts.append(part) neutralizedParts.append(part)
continue continue
# Neutralize the data field if it contains text # Neutralize the data field based on typeGroup
if part.data: _typeGroup = getattr(part, 'typeGroup', '') or ''
if _typeGroup == 'image' and part.data:
import base64 as _b64
try:
self.services.chat.progressLogUpdate(
operationId,
0.3 + (i / len(chatDocuments)) * 0.6,
f"Checking image part {len(neutralizedParts) + 1} of document {i+1}"
)
_imgBytes = _b64.b64decode(str(part.data))
_imgResult = await self.services.neutralization.processImageAsync(_imgBytes, f"part_{part.id}")
if _imgResult.get("status") == "ok":
neutralizedParts.append(part)
else:
logger.warning(f"Fail-Safe: Image part {part.id} blocked (PII detected), SKIPPING")
except Exception as _imgErr:
logger.error(f"Fail-Safe: Image check failed for part {part.id}: {_imgErr}, SKIPPING")
elif part.data:
try: try:
self.services.chat.progressLogUpdate( self.services.chat.progressLogUpdate(
operationId, operationId,
@ -153,14 +169,11 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
f"Neutralizing part {len(neutralizedParts) + 1} of document {i+1}" f"Neutralizing part {len(neutralizedParts) + 1} of document {i+1}"
) )
# Call neutralization service
neutralizationResult = self.services.neutralization.processText(part.data) neutralizationResult = self.services.neutralization.processText(part.data)
if neutralizationResult and 'neutralized_text' in neutralizationResult: if neutralizationResult and 'neutralized_text' in neutralizationResult:
# Replace data with neutralized text
neutralizedData = neutralizationResult['neutralized_text'] neutralizedData = neutralizationResult['neutralized_text']
# Create new ContentPart with neutralized data
neutralizedPart = ContentPart( neutralizedPart = ContentPart(
id=part.id, id=part.id,
parentId=part.parentId, parentId=part.parentId,
@ -172,15 +185,12 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
) )
neutralizedParts.append(neutralizedPart) neutralizedParts.append(neutralizedPart)
else: else:
# Fail-Safe: neutralization incomplete, skip this part
logger.warning(f"Fail-Safe: Neutralization incomplete for part {part.id}, SKIPPING (not passing original)") logger.warning(f"Fail-Safe: Neutralization incomplete for part {part.id}, SKIPPING (not passing original)")
continue continue
except Exception as e: except Exception as e:
logger.error(f"Fail-Safe: Error neutralizing part {part.id}, SKIPPING document (not passing original): {str(e)}") logger.error(f"Fail-Safe: Error neutralizing part {part.id}, SKIPPING document (not passing original): {str(e)}")
# Fail-Safe: do NOT pass original data to AI
continue continue
else: else:
# No data to neutralize, keep original part
neutralizedParts.append(part) neutralizedParts.append(part)
# Create neutralized ContentExtracted object # Create neutralized ContentExtracted object

View file

@ -352,12 +352,6 @@ class WorkflowManager:
for i, doc in enumerate(documents, 1): for i, doc in enumerate(documents, 1):
docListText += f"\n{i}. {doc.fileName} ({doc.mimeType}, {doc.fileSize} bytes)" docListText += f"\n{i}. {doc.fileName} ({doc.mimeType}, {doc.fileSize} bytes)"
_userId = getattr(getattr(self.services, 'user', None), 'id', '') or ''
_featureInstanceId = getattr(self.services, 'featureInstanceId', '') or ''
_promptForAnalysis, _wasNeutralized, _mappingId = await self._neutralizePromptIfRequired(
userPrompt, userId=_userId, featureInstanceId=_featureInstanceId
)
analysisPrompt = f"""You are an input analyzer. From the user's message, perform ALL of the following in one pass: analysisPrompt = f"""You are an input analyzer. From the user's message, perform ALL of the following in one pass:
1. detectedLanguage: Detect ISO 639-1 language code (e.g., de, en, fr, it) 1. detectedLanguage: Detect ISO 639-1 language code (e.g., de, en, fr, it)
@ -407,7 +401,7 @@ Return ONLY JSON (no markdown) with this exact structure:
The following is the user's original input message. Analyze intent, normalize the request, and determine complexity: The following is the user's original input message. Analyze intent, normalize the request, and determine complexity:
################ USER INPUT START ################# ################ USER INPUT START #################
{_promptForAnalysis.replace('{', '{{').replace('}', '}}') if _promptForAnalysis else ''} {userPrompt.replace('{', '{{').replace('}', '}}') if userPrompt else ''}
################ USER INPUT FINISH ################# ################ USER INPUT FINISH #################
""" """
@ -425,12 +419,6 @@ The following is the user's original input message. Analyze intent, normalize th
jsonEnd = aiResponse.rfind('}') + 1 if aiResponse else 0 jsonEnd = aiResponse.rfind('}') + 1 if aiResponse else 0
if jsonStart != -1 and jsonEnd > jsonStart: if jsonStart != -1 and jsonEnd > jsonStart:
result = json.loads(aiResponse[jsonStart:jsonEnd]) result = json.loads(aiResponse[jsonStart:jsonEnd])
if _wasNeutralized:
for _field in ('normalizedRequest', 'intent', 'workflowName'):
if _field in result and result[_field]:
result[_field] = await self._rehydrateResponseIfNeeded(
result[_field], True, userId=_userId, featureInstanceId=_featureInstanceId
)
return result return result
else: else:
logger.warning("Could not parse combined analysis response, using defaults") logger.warning("Could not parse combined analysis response, using defaults")
@ -490,7 +478,6 @@ The following is the user's original input message. Analyze intent, normalize th
if userInput.prompt: if userInput.prompt:
try: try:
originalPromptBytes = userInput.prompt.encode('utf-8') originalPromptBytes = userInput.prompt.encode('utf-8')
originalPromptBytes = await self._neutralizeContentIfEnabled(originalPromptBytes, "text/markdown")
fileItem = self.services.interfaceDbComponent.createFile( fileItem = self.services.interfaceDbComponent.createFile(
name="user_prompt_original.md", name="user_prompt_original.md",
mimeType="text/markdown", mimeType="text/markdown",
@ -680,7 +667,6 @@ The following is the user's original input message. Analyze intent, normalize th
if userInput.prompt: if userInput.prompt:
try: try:
originalPromptBytes = userInput.prompt.encode('utf-8') originalPromptBytes = userInput.prompt.encode('utf-8')
originalPromptBytes = await self._neutralizeContentIfEnabled(originalPromptBytes, "text/markdown")
fileItem = self.services.interfaceDbComponent.createFile( fileItem = self.services.interfaceDbComponent.createFile(
name="user_prompt_original.md", name="user_prompt_original.md",
mimeType="text/markdown", mimeType="text/markdown",
@ -821,7 +807,6 @@ The following is the user's original input message. Analyze intent, normalize th
if userInput.prompt: if userInput.prompt:
try: try:
originalPromptBytes = userInput.prompt.encode('utf-8') originalPromptBytes = userInput.prompt.encode('utf-8')
originalPromptBytes = await self._neutralizeContentIfEnabled(originalPromptBytes, "text/markdown")
fileItem = self.services.interfaceDbComponent.createFile( fileItem = self.services.interfaceDbComponent.createFile(
name="user_prompt_original.md", name="user_prompt_original.md",
mimeType="text/markdown", mimeType="text/markdown",
@ -1365,82 +1350,3 @@ The following is the user's original input message. Analyze intent, normalize th
"""Set user language for the service center""" """Set user language for the service center"""
self.services.user.language = language self.services.user.language = language
async def _neutralizePromptIfRequired(self, prompt: str, userId: str, featureInstanceId: str) -> tuple:
"""Neutralize prompt text if the workflow context requires it.
Returns (processedPrompt, wasNeutralized, mappingId)."""
try:
_neutralSvc = getattr(self.services, 'neutralization', None)
if not _neutralSvc:
return prompt, False, None
_config = _neutralSvc.getConfig() if hasattr(_neutralSvc, 'getConfig') else None
if not _config or not getattr(_config, 'enabled', False):
return prompt, False, None
_result = _neutralSvc.processText(prompt, userId=userId, featureInstanceId=featureInstanceId)
if _result and _result.get("neutralized_text"):
return _result["neutralized_text"], True, _result.get("mappingId")
return prompt, False, None
except Exception as e:
logger.warning(f"Prompt neutralization failed: {e}")
return prompt, False, None
async def _rehydrateResponseIfNeeded(self, response: str, wasNeutralized: bool, userId: str, featureInstanceId: str) -> str:
"""Replace placeholders in AI response with original values."""
if not wasNeutralized or not response:
return response
try:
_neutralSvc = getattr(self.services, 'neutralization', None)
if not _neutralSvc:
return response
_rehydrated = _neutralSvc.resolveText(response, userId=userId, featureInstanceId=featureInstanceId)
return _rehydrated if _rehydrated else response
except Exception as e:
logger.warning(f"Response re-hydration failed: {e}")
return response
async def _neutralizeContentIfEnabled(self, contentBytes: bytes, mimeType: str) -> bytes:
"""Neutralize content if neutralization is enabled in user settings"""
try:
# Automation hub may not have neutralization service; skip if unavailable
neutralization = getattr(self.services, 'neutralization', None)
if not neutralization:
return contentBytes
# Check if neutralization is enabled
config = neutralization.getConfig()
if not config or not config.enabled:
return contentBytes
# Decode content to text for neutralization
try:
textContent = contentBytes.decode('utf-8')
except UnicodeDecodeError:
# Try alternative encodings
for enc in ['latin-1', 'cp1252', 'iso-8859-1']:
try:
textContent = contentBytes.decode(enc)
break
except UnicodeDecodeError:
continue
else:
# If unable to decode, return original bytes (binary content)
logger.debug(f"Unable to decode content for neutralization, skipping: {mimeType}")
return contentBytes
# Neutralize the text content
# Note: The neutralization service should use names from config when processing
result = neutralization.processText(textContent)
if result and 'neutralized_text' in result:
neutralizedText = result['neutralized_text']
# Encode back to bytes using the same encoding
try:
return neutralizedText.encode('utf-8')
except Exception as e:
logger.warning(f"Error encoding neutralized text: {str(e)}")
return contentBytes
else:
logger.warning("Neutralization did not return neutralized_text")
return contentBytes
except Exception as e:
logger.error(f"Error during content neutralization: {str(e)}")
# Return original content on error
return contentBytes