Merge pull request #165 from valueonag/int
Some checks failed
Deploy Gateway / deploy (push) Failing after 3s

Int
This commit is contained in:
Patrick Motsch 2026-05-17 00:08:54 +02:00 committed by GitHub
commit a31e0dadc3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
92 changed files with 7449 additions and 877 deletions

View file

@ -56,7 +56,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@v5
- name: Determine environment
id: env

View file

@ -22,10 +22,10 @@ jobs:
contents: read #This is required for actions/checkout
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- name: Set up Python version
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: '3.11'
@ -49,7 +49,7 @@ jobs:
run: zip release.zip ./* -r
- name: Upload artifact for deployment jobs
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v6
with:
name: python-app
path: |
@ -66,7 +66,7 @@ jobs:
steps:
- name: Download artifact from build job
uses: actions/download-artifact@v4
uses: actions/download-artifact@v7
with:
name: python-app

View file

@ -22,10 +22,10 @@ jobs:
contents: read #This is required for actions/checkout
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- name: Set up Python version
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: '3.11'
@ -49,7 +49,7 @@ jobs:
run: zip release.zip ./* -r
- name: Upload artifact for deployment jobs
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v6
with:
name: python-app
path: |
@ -66,7 +66,7 @@ jobs:
steps:
- name: Download artifact from build job
uses: actions/download-artifact@v4
uses: actions/download-artifact@v7
with:
name: python-app

View file

@ -25,10 +25,10 @@ jobs:
contents: write # push requirements.lock
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: '3.11'

8
app.py
View file

@ -404,8 +404,10 @@ async def lifespan(app: FastAPI):
try:
from modules.serviceCenter.services.serviceBackgroundJobs.mainBackgroundJobService import (
recoverInterruptedJobs,
registerZombieKillerScheduler,
)
recoverInterruptedJobs()
registerZombieKillerScheduler(intervalMinutes=5)
except Exception as e:
logger.warning(f"BackgroundJob recovery failed (non-critical): {e}")
@ -604,6 +606,12 @@ app.include_router(promptRouter)
from modules.routes.routeDataConnections import router as connectionsRouter
app.include_router(connectionsRouter)
from modules.routes.routeRagInventory import router as ragInventoryRouter
app.include_router(ragInventoryRouter)
from modules.routes.routeAdminSttBenchmark import router as sttBenchmarkRouter
app.include_router(sttBenchmarkRouter)
from modules.routes.routeTableViews import router as tableViewsRouter
app.include_router(tableViewsRouter)

View file

@ -0,0 +1,97 @@
# Development Environment Configuration
# System Configuration
APP_ENV_TYPE = dev
APP_ENV_LABEL = Development Instance Patrick
APP_API_URL = http://localhost:8000
APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/notes/key.txt
APP_INIT_PASS_ADMIN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEeFFtRGtQeVUtcjlrU3dab1ZxUm9WSks0MlJVYUtERFlqUElHemZrOGNENk1tcmJNX3Vxc01UMDhlNU40VzZZRVBpUGNmT3podzZrOGhOeEJIUEt4eVlSWG5UYXA3d09DVXlLT21Kb1JYSUU9
APP_INIT_PASS_EVENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERzZjNm56WGVBdjJTeG5Udjd6OGQwUVotYXUzQjJ1YVNyVXVBa3NZVml3ODU0MVNkZjhWWmJwNUFkc19BcHlHMTU1Q3BRcHU0cDBoZkFlR2l6UEZQU3d2U3MtMDh5UDZteGFoQ0EyMUE1ckE9
# PostgreSQL DB Host
DB_HOST=localhost
DB_USER=poweron_dev
DB_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEcUIxNEFfQ2xnS0RrSC1KNnUxTlVvTGZoMHgzaEI4Z3NlVzVROTVLak5Ubi1vaEZubFZaMTFKMGd6MXAxekN2d2NvMy1hRjg2UVhybktlcFA5anZ1WjFlQmZhcXdwaGhWdzRDc3ExeUhzWTg9
DB_PORT=5432
# Security Configuration
APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2ZlUFRlcFdOZ001RnlzM2FhLWhRV2tjWWFhaWQwQ3hkcUFvbThMcndxSjFpYTdfRV9OZGhTcksxbXFTZWg5MDZvOHpCVXBHcDJYaHlJM0tyNWRZckZsVHpQcmxTZHJoZUs1M3lfU2ljRnJaTmNSQ0w0X085OXI0QW80M2xfQnJqZmZ6VEh3TUltX0xzeE42SGtZPQ==
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
APP_LOGGING_CONSOLE_ENABLED = True
APP_LOGGING_FILE_ENABLED = True
APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP.
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kxaG9WY1FJaWdCbVFVaTllUlJfU3Y3MmJkRmkzMDVDWUNtZEhlNVhISzJPcy00ZUVZcklYLXFMV0dIODV3NXNSSFBKQ0ZsZllES3diTEgySDF0T1ZCbFZHREZtcXFGSWNZN1NJbzJzczRRQWxoeVNsNzlsa0VzMHJPWHUydjBBclo=
Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyUW96aXFVOVJlLUdyRlVvT1hVU09ILWtMZnV2M19mVUxGMnFPV3FzNTdQa3dTbHVGTDBHTk01ZThLcjh6QUR5VldVZUpfcDlZNTh5YldtLWtjTll6VzJNQ3JCQ3ZubHdmd2JvaExDOXdvQ1pjWDVQTUtFWVAtUHhwS1lFQnJXWk4=
Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyd1hPd09vcVFtbVg0Sm5Nd1VYVEEtWjZMZkFndmFVS0ZlcTU0dzJnYVYzRkZWbjh0QldyZkhseDV2cUgxYkNHTzF6MXhqQlZ2N0UtbmhPeWRKUHBVdzV0Q1ROaWNuN2xjMmVzMjNZQ2ZYZ3dOTHgxaU5sTGRjVHpfakhYeWF0ZGU=
Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kySXoyd1BmTnhOd1owTUJOWm53WlZMMjFHNGJhSUwyd2NDUW9BanlRWVJPLU5jYzRlcm5QeW96d0JYUkVWVWd2dGNBVEpJbElZY2lWb0o5S0gyNnhoV1pnNXhpSFEyaklZZjcwX2lVU0ktMEJGN01DMDhXQ3k4R1BXc1Q3ejFjOEg=
Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
Service_CLICKUP_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd4ZWVBeHVtRnpIT0VBN0tSZDhLRmFmN05DOVBOelJtLWhkVnJDRVBqUkh3bDFTZFRWaWQ1cWowdGNLUk5IQzlGN1J6RFVCaW8zRnBwLVBnclJfdWgxV3pVRzFEV2lwcW5Rc19Xa1ROWXNJcUF0ajZaYUxOUXk0WHRsRmJLM25FaHV5T2IxdV92ZW1nRjhzaGpwU0l2Wm9FTkRnY2lJVjhuNHUwT29salAxYV8wPQ==
Service_CLICKUP_OAUTH_REDIRECT_URI = http://localhost:8000/api/clickup/auth/connect/callback
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
# Stripe Billing (both end with _SECRET for encryption script)
STRIPE_SECRET_KEY_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5aHNGejgzQmpTdmprdzQxR19KZkh3MlhYUTNseFN3WnlaWjh2SDZyalN6aU9xSktkbUQwUnZrVnlvbGVRQm4yZFdiRU5aSEk5WVJuUnR4VUwtTm9OVk1WWmJQeU5QaDdib0hfVWV5U1BfYTFXRmdoOWdnOWxkb3JFQmF3bm45UjFUVUxmWGtGRkFKUGd6bmhpQlFnaVI3Q2lLdDlsY1VESk1vOEM0ZFBJNW1qcVZ0N2tPYmRLNmVKajZ2M3o3S05lWnRRVG5LdkRseW4wQ3VjNHNQZTZUdz09
STRIPE_WEBHOOK_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5dDJMSHBrVk8wTzJhU2xzTTZCZWdvWmU2NGI2WklfRXRJZVUzaVYyOU9GLUZsalUwa2lPdEgtUHo0dVVvRDU1cy1saHJyU0Rxa2xQZjBuakExQzk3bmxBcU9WbEIxUEtpR1JoUFMxZG9ISGRZUXFhdFpSMGxvQUV3a0VLQllfUUtCOHZwTGdteV9rYTFOazBfSlN3ekNWblFpakJlZVlCTmNkWWQ4Sm01a1RCWTlnTlFHWVA0MkZYMlprUExrWFN2V0NVU1BTd1NKczFJbVo3VHpLdlc4UT09
STRIPE_API_VERSION = 2026-01-28.clover
STRIPE_AUTOMATIC_TAX_ENABLED = false
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
# AI configuration
Connector_AiOpenai_API_SECRET = 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
Connector_AiAnthropic_API_SECRET = D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
Connector_AiPerplexity_API_SECRET = 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
Connector_AiTavily_API_SECRET = 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
Connector_AiPrivateLlm_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGRHM5eFdUVmVZU1R1cHBwN1RlMUx4T0NlLTJLUFFVX3J2OElDWFpuZmJHVmp4Z3BNNWMwZUVVZUd2TFhRSjVmVkVlcFlVRWtybXh0ZHloZ01ZcnVvX195YjdlWVdEcjZSWFFTTlNBWUlaTlNoLWhqVFBIb0thVlBiaWhjYjFQOFY=
Connector_AiMistral_API_SECRET = ogaEVD2fFmiIWHDhKn8oGM0FShFxnAtT
Service_MSFT_TENANT_ID = common
# Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0=
# Feature SyncDelta JIRA configuration
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0=
# Teamsbot Browser Bot Service
# For local testing: run the bot locally with `npm run dev` in service-teams-browser-bot
# The bot will connect back to localhost:8000 via WebSocket
TEAMSBOT_BROWSER_BOT_URL = http://localhost:4100
# Debug Configuration
APP_DEBUG_CHAT_WORKFLOW_ENABLED = True
APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True
APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
# Zurich WFS Parcels (dynamic map layer). Default: Stadt Zürich OGD. Override for full canton if wfs.zh.ch resolves.
# Connector_ZhWfsParcels_WFS_URL = https://wfs.zh.ch/av
# Connector_ZhWfsParcels_TYPENAMES = av_li_liegenschaften_a

View file

@ -19,7 +19,7 @@ APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2Z
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
@ -32,18 +32,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP.
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kxaG9WY1FJaWdCbVFVaTllUlJfU3Y3MmJkRmkzMDVDWUNtZEhlNVhISzJPcy00ZUVZcklYLXFMV0dIODV3NXNSSFBKQ0ZsZllES3diTEgySDF0T1ZCbFZHREZtcXFGSWNZN1NJbzJzczRRQWxoeVNsNzlsa0VzMHJPWHUydjBBclo=
Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyUW96aXFVOVJlLUdyRlVvT1hVU09ILWtMZnV2M19mVUxGMnFPV3FzNTdQa3dTbHVGTDBHTk01ZThLcjh6QUR5VldVZUpfcDlZNTh5YldtLWtjTll6VzJNQ3JCQ3ZubHdmd2JvaExDOXdvQ1pjWDVQTUtFWVAtUHhwS1lFQnJXWk4=
Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyd1hPd09vcVFtbVg0Sm5Nd1VYVEEtWjZMZkFndmFVS0ZlcTU0dzJnYVYzRkZWbjh0QldyZkhseDV2cUgxYkNHTzF6MXhqQlZ2N0UtbmhPeWRKUHBVdzV0Q1ROaWNuN2xjMmVzMjNZQ2ZYZ3dOTHgxaU5sTGRjVHpfakhYeWF0ZGU=
Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kySXoyd1BmTnhOd1owTUJOWm53WlZMMjFHNGJhSUwyd2NDUW9BanlRWVJPLU5jYzRlcm5QeW96d0JYUkVWVWd2dGNBVEpJbElZY2lWb0o5S0gyNnhoV1pnNXhpSFEyaklZZjcwX2lVU0ktMEJGN01DMDhXQ3k4R1BXc1Q3ejFjOEg=
Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
@ -61,12 +61,12 @@ STRIPE_AUTOMATIC_TAX_ENABLED = false
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
# AI configuration
Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQnBaSnM4TWFRRmxVQmNQblVIYmc1Y0Q3aW9zZUtDWlNWdGZjbFpncGp2NHN2QjkxMWxibUJnZDBId252MWk5TXN3Yk14ajFIdi1CTkx2ZWx2QzF5OFR6LUx5azQ3dnNLaXJBOHNxc0tlWmtZcTFVelF4eXBSM2JkbHd2eTM0VHNXdHNtVUprZWtPVzctNlJsZHNmM20tU1N6Q1Q2cHFYSi1tNlhZNDNabTVuaEVGWmIydEhadTcyMlBURmw2aUJxOF9GTzR0dTZiNGZfOFlHaVpPZ1A1LXhhOEFtN1J5TEVNNWtMcGpyNkMzSl8xRnZsaTF1WTZrOUZmb0cxVURjSGFLS2dIYTQyZEJtTm90bEYxVWxNNXVPdTVjaVhYbXhxT3JsVDM5VjZMVFZKSE1tZnM9
Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpENmFBWG16STFQUVZxNzZZRzRLYTA4X3lRanF1VkF4cU45OExNMzlsQmdISGFxTUxud1dXODBKcFhMVG9KNjdWVnlTTFFROVc3NDlsdlNHLUJXeG41NDBHaXhHR0VHVWl5UW9RNkVWbmlhakRKVW5pM0R4VHk0LUw0TV9LdkljNHdBLXJua21NQkl2b3l4UkVkMGN1YjBrMmJEeWtMay1jbmxrYWJNbUV0aktCXzU1djR2d2RSQXZORTNwcG92ZUVvVGMtQzQzTTVncEZTRGRtZUFIZWQ0dz09
Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5ZmdDZ3hrSElrMnQzNFAtel9wX191VjVzN2g1LWZoa0V1YklubEdmMEJDdEZiR1RWeVZrM3V3enBHX3p6WUtTS0kwYkFyVEF0Nm8zX05CelVQcFJUc0lwVW5iNFczc1p1WWJ2WFBmd0lpLUxxWndEeUh0b2hGUHVpN19vb19nMTBnV1A1VmNpWERVX05lQ29VS20wTjZ3PT0=
Connector_AiTavily_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQTdnUHMwd2pIaXNtMmtCTFREd0pyQXRKb1F5eGtHSnkyOGZiUnlBOFc0b3Vzcndrc3ViRm1nMDJIOEZKYWxqdWNkZGh5N0Z4R0JlQmxXSG5pVnJUR2VYckZhMWNMZ1FNeXJ3enJLVlpiblhOZTNleUg3ZzZyUzRZanFSeDlVMkI=
Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlRzI0aTZXSHhTbklJa1ROdTRJdW94b1p0ZUhRdW55Zzdza2RsUWduSDRhMlVCYnQ4MnA0VU9ZZi10M0tySGwxV2ZweGZQei1FaWU4QThnWVE2LUI2WGhSVWhLbnVkRU5uZDgyVV90TjZXd1czaDdUaXJBU0hNWndfOFZTNzZ4UnNjenlqRWw2T2lBZFphWjNYcmtXSVZhcTUwUGhiYmhMM2lNemM0UFZwUndZVUYtTFUxVUctSEtoWmRkSE1GamJuS0FLMkhmWWNVTmJHemJCRzdhQ0Z6UzVFb1l1b1JjQWliQkt6MlNHeFBHb0h5VlBZWW1NUDNlTWpPV0lqZldjeWs2MExiTHJRdGF4b214c191ZkswVHZCcGVPMzZHaElrbWFvSVlqOXRjTkU9
Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlbXEzdGc2NFExb3AzVUw0cEhkZzlNRjZxTVlJMV81LTZhVXhoNXBpYUlMN0FxUUJHQlJnS0N3OV85Uk9sa1J3M1lyZExSMWVsbzVSdzdWQUVsUVp2dzhfLThmY2lNb1NhVGlvbnhLR0NnSmhsOVp2RkxfODc2SFpDYlBkcWp4aFFtNldtZGQ2LUhBZVM5VXk4RTNHNzQyV3FnMVNJMW9yOGpRRkQxUC1hZ3NOOHhqV3Y4LWJSNjFYQ3dwQmhrRWJRRzhaX1N4aFlWLTVsaEJmOWxkTjZMZz09
Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlNV9felVPcHVyMU9kVGhGZEt0MG9iRzRrTVM4TFJvSHhGOVo0U1ROWkdEMzRSWjhtMnFrZUhHTHNXelpLZ014RzRkMlIxZDJwcjEwc1dRamY5ekJMR1VLb2w4eEZqZENBRnFaZlRhb1h5VE05Tml1ZlVBWHBaTkJaZUE5NWprVklva0ZFZnB4cFFudGdkalpmTlBhdV9nPT0=
Connector_AiTavily_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlY1R2WGpuazk5M05SeDIyLWd3bHpKN3lUdlVFdjhvZEJXdlM4bGlBdTB1TjRia051YllDQ2lwM0V3R3dPd2lKVWxoSm9BNWl1ZFFlVkZ5cXh4TFRVU0Z4NVU5WVRjSUJPc01La3JyaVZSNkhYWU9PR00yMENEb0dRT3l5enEwSFlWZVVzTVR0UWQ4eUxvRmZvWHl0c0xRPT0=
Connector_AiPrivateLlm_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGRHM5eFdUVmVZU1R1cHBwN1RlMUx4T0NlLTJLUFFVX3J2OElDWFpuZmJHVmp4Z3BNNWMwZUVVZUd2TFhRSjVmVkVlcFlVRWtybXh0ZHloZ01ZcnVvX195YjdlWVdEcjZSWFFTTlNBWUlaTlNoLWhqVFBIb0thVlBiaWhjYjFQOFY=
Connector_AiMistral_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGeEQxYUIxOHhia0JlQWpWQ2dWQWZzY3l6SWwyUnJoR1hRQWloX2lxb2lGNkc4UnA4U2tWNjJaYzB1d1hvNG9fWUp1N3V4OW9FMGhaWVhjSlVwWEc1X2loVDBSZDEtdHdfcTA5QkcxQTR4OHc4RkRzclJrU2d1RFZpNDJkRDRURlE=
Connector_AiMistral_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlelh2T2hqNGcxV0hMV1FKbmFDZjVHUWF6T2FXbGlCSnQzSzNXLWJHeXBFWE1nUlh1b1NHY1JRSEVtTVEtc1MtUnZrX2ZCcURqQ2FYNmFWa2xudGJtS3g2eVo4MFZMd09nZTBNMmo1ZHU0bzBJdFRqLVhHSVZNb2Zrc0VkUXI0SVk=
Service_MSFT_TENANT_ID = common
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True
APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync
# Manadate Pre-Processing Servers
PREPROCESS_ALTHAUS_CHAT_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGbEphQ3ZUMlFMQ2EwSGpoSE9NNzRJNTJtaGk1N0RGakdIYnVVeVFHZmF5OXB3QTVWLVNaZk9wNkhfQkZWRnVwRGRxem9iRzJIWXdpX1NIN2FwSExfT3c9PQ==
# Preprocessor API Configuration
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss

View file

@ -0,0 +1,92 @@
# Integration Environment Configuration
# System Configuration
APP_ENV_TYPE = int
APP_ENV_LABEL = Integration Instance
APP_API_URL = https://gateway-int.poweron.swiss
# Force SameSite=None+Secure for auth cookies (cross-site UI on poweron-center.net). Optional if APP_API_URL is https://
APP_COOKIE_SECURE = true
APP_KEY_SYSVAR = CONFIG_KEY
APP_INIT_PASS_ADMIN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjWm41MWZ4TUZGaVlrX3pWZWNwakJsY3Facm0wLVZDd1VKeTFoZEVZQnItcEdUUnVJS1NXeDBpM2xKbGRsYmxOSmRhc29PZjJSU2txQjdLbUVrTTE1NEJjUXBHbV9NOVJWZUR3QlJkQnJvTEU9
APP_INIT_PASS_EVENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjdmtrakgxa0djekZVNGtTZV8wM2I5UUpCZllveVBMWXROYk5yS3BiV3JEelJSM09VYTRONHpnY3VtMGxDRk5JTEZSRFhtcDZ0RVRmZ1RicTFhb3c5dVZRQ1o4SmlkLVpPTW5MMTU2eTQ0Vkk9
# PostgreSQL DB Host
DB_HOST=gateway-int-server.postgres.database.azure.com
DB_USER=heeshkdlby
DB_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjczYzOUtTa21MMGJVTUQ5UmFfdWc3YlhCbWZOeXFaNEE1QzdJV3BLVjhnalBkLVVCMm5BZzdxdlFXQXc2RHYzLWtPSFZkZE1iWG9rQ1NkVWlpRnF5TURVbnl1cm9iYXlSMGYxd1BGYVc0VDA9
DB_PORT=5432
# Security Configuration
APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZzTEp4aDR0MktWRjNoeVBrY1Npd1R0VE9YVHp3M2w1cXRzbUxNaU82QUJvaDNFeVQyN05KblRWblBvbWtoT0VXbkNBbDQ5OHhwSUFnaDZGRG10Vmgtdm1YUkRsYUhFMzRVZURmSFlDTFIzVWg4MXNueDZyMGc5aVpFdWRxY3dkTExGM093ZTVUZVl5LUhGWnlRPQ==
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
APP_LOGGING_LOG_DIR = /home/site/wwwroot/
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
APP_LOGGING_CONSOLE_ENABLED = True
APP_LOGGING_FILE_ENABLED = True
APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kydlVubld1d1h6SUNSWW1aZ3p4X3Zod1NDTjhZVnVYS2lqOERGTFp2OXJ4TGRiNlRLVFpzLUVDTUhkZGhGUWdxa1djdEV5UWkyblN1UHZoaFBjaExNTEpGMG1PRGJEbDdHVll0Ungwcl9JemZ4ZXFzZUNFQmFlZi1DZFlCekU1S3E=
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyS1hWZXEzUzZTTE5MUlJncVowMU95Y0hmV1hveDBZOWdLU1RIUWt3SGlXNGxVTXVKc2QyQmtmWTlJRU43ZnRDdnlDTGxQY0hTU25CWWFFdDhUem9HU0VYcTFJTVFEbVk0dUhmVzJNVlEzNTNWdjdmaW9WeUVDVW5PRmNFZEQzNTY=
Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyUTUwNXNGaHRNaGxxbF9sdWJ3Q0xLYU5yOHB4Yk8zMDZvQ29yaEhWOE5JMENXRk5jb2ZBdzRKQ2ZTTld6ZlIxemhOYzN1VE10TjBDRWZEMXlLVWRNYjZ0VG5RZ3I3NWt0SEJzMzdsUmRzcVNmbktRNHZqTUF6a2EyUkVUSFJnZFE=
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyV1FRVjF0c0d3d0dyWU1TdW9HdXVkdHdsVWZKYTJjbGZPRDhMRjA2M0FkaUZIVmhIUmFKNjg2ekFodHd6NG80VTI3TC1icW1LZ01jWVZuQ1pKRm5nMW5UREJEaGp2Wl9oRDRCSmZVT0JpTnkwXzgwY0pkV29yczQ5akF2d1ZGcVY=
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
Service_CLICKUP_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5SE1uVURMNVE3NkM4cHBKa2R2TjBnLWdpSXI5dHpKWGExZVFiUF95TFNnZ1NwLWFLdmh6eWFZTHVHYTBzU2FGRUpLYkVyM1NvZjZkWDZHN21qUER5ZVNOaGpCc3NrUGd3VnFTclF3OW1nUlVuWXQ1UVhDLVpyb1BwRExOeFpDeVhtbEhDVnd4TVdpbzNBNk5QQWFPdjdza0xBWGxFY1E3WFpCSUlNa1l4RDlBPQ==
Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/clickup/auth/connect/callback
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
# Stripe Billing (both end with _SECRET for encryption script)
STRIPE_SECRET_KEY_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5ekdBaGNGVUlOQUpncTlzLWlTV0V5OWZzQkpDczhCUGw4U1JpTHZ0d3pfYlFNWElLRlNiNlNsaDRYTGZUTkg2OUFrTW1GZXpOUjBVbmRQWjN6ekhHd2ZSQ195OHlaeWh1TmxrUm10V2R3YmdncmFLbFMzVjdqcWJMSUJPR2xuSEozclNoZG1rZVBTaWg3OFQ1Qzdxb0wyQ2RKazc2dG1aZXBUTXlvbDZqLS1KOVI5M3BGc3NQZkZRbnFpRjIwWmh2ZHlVNlpxZVo2dWNmMjQ5eW02QmtzUT09
STRIPE_WEBHOOK_SECRET = INT_ENC:Z0FBQUFBQnA4UXZiUUVqTl9lREVRWTh1aHFDcFpwcXRkOUx4MS1ham9Ddkl6T0xzMnJuM1hhUHdGNG5CenY1MUg4RlJBOGFQTWl5cVd5MjJ2REItcHYyRmdLX3ZlT2p5Z3BRVkMtQnRoTVkteXlfaU92MVBtOEI0Ni1kbGlfa0NiRmFRRXNHLVE2NHI=
STRIPE_API_VERSION = 2026-01-28.clover
STRIPE_AUTOMATIC_TAX_ENABLED = false
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
# AI configuration
Connector_AiOpenai_API_SECRET = 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
Connector_AiAnthropic_API_SECRET = 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
Connector_AiPerplexity_API_SECRET = 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
Connector_AiTavily_API_SECRET = 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
Connector_AiPrivateLlm_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGSjZ1NWh0aWc1R3Z4MHNaeS1HamtUbndhcUZFZDlqUDhjSmg5eHFfdlVkU0RsVkJ2UVRaMWs3aWhraG5jSlc0YkxNWHVmR2JoSW5ENFFCdkJBM0VienlKSnhzNnBKbTJOUTFKczRfWlQ3bWpmUkRTT1I1OGNUSTlQdExacGRpeXg=
Connector_AiMistral_API_SECRET = ogaEVD2fFmiIWHDhKn8oGM0FShFxnAtT
Service_MSFT_TENANT_ID = common
# Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0=
# Feature SyncDelta JIRA configuration
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0=
# Teamsbot Browser Bot Service
TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io
# Debug Configuration
APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss

View file

@ -21,7 +21,7 @@ APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZ
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
@ -34,18 +34,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kydlVubld1d1h6SUNSWW1aZ3p4X3Zod1NDTjhZVnVYS2lqOERGTFp2OXJ4TGRiNlRLVFpzLUVDTUhkZGhGUWdxa1djdEV5UWkyblN1UHZoaFBjaExNTEpGMG1PRGJEbDdHVll0Ungwcl9JemZ4ZXFzZUNFQmFlZi1DZFlCekU1S3E=
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyS1hWZXEzUzZTTE5MUlJncVowMU95Y0hmV1hveDBZOWdLU1RIUWt3SGlXNGxVTXVKc2QyQmtmWTlJRU43ZnRDdnlDTGxQY0hTU25CWWFFdDhUem9HU0VYcTFJTVFEbVk0dUhmVzJNVlEzNTNWdjdmaW9WeUVDVW5PRmNFZEQzNTY=
Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyUTUwNXNGaHRNaGxxbF9sdWJ3Q0xLYU5yOHB4Yk8zMDZvQ29yaEhWOE5JMENXRk5jb2ZBdzRKQ2ZTTld6ZlIxemhOYzN1VE10TjBDRWZEMXlLVWRNYjZ0VG5RZ3I3NWt0SEJzMzdsUmRzcVNmbktRNHZqTUF6a2EyUkVUSFJnZFE=
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyV1FRVjF0c0d3d0dyWU1TdW9HdXVkdHdsVWZKYTJjbGZPRDhMRjA2M0FkaUZIVmhIUmFKNjg2ekFodHd6NG80VTI3TC1icW1LZ01jWVZuQ1pKRm5nMW5UREJEaGp2Wl9oRDRCSmZVT0JpTnkwXzgwY0pkV29yczQ5akF2d1ZGcVY=
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
@ -63,12 +63,12 @@ STRIPE_AUTOMATIC_TAX_ENABLED = false
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
# AI configuration
Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4MENkQ2xJVmE5WFZKUkh2SHJFby1YVXN3ZmVxRkptS3ZWRmlwdU93ZEJjSjlMV2NGbU5mS3NCdmFfcmFYTEJNZXFIQ3ozTWE4ZC1pemlQNk9wbjU1d3BPS0ZCTTZfOF8yWmVXMWx0TU1DamlJLVFhSTJXclZsY3hMVWlPcXVqQWtMdER4T252NHZUWEhUOTdIN1VGR3ltazEweXFqQ0lvb0hYWmxQQnpxb0JwcFNhRDNGWXdoRTVJWm9FalZpTUF5b1RqZlRaYnVKYkp0NWR5Vko1WWJ0Wmg2VWJzYXZ0Z3Q4UkpsTldDX2dsekhKMmM4YjRoa2RwemMwYVQwM2cyMFlvaU5mOTVTWGlROU8xY2ZVRXlxZzJqWkxURWlGZGI2STZNb0NpdEtWUnM9
Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjT1ZlRWVJdVZMT3ljSFJDcFdxRFBRVkZhS204NnN5RDBlQ0tpenhTM0FFVktuWW9mWHNwRWx2dHB0eDBSZ0JFQnZKWlp6c01pVGREWHd1eGpERnU0Q2xhaks1clQ1ZXVsdnd2ZzhpNXNQS1BhY3FjSkdkVEhHalNaRGR4emhpakZncnpDQUVxOHVXQzVUWmtQc0FsYmFwTF9TSG5FOUFtWk5Ick1NcHFvY2s1T1c2WXlRUFFJZnh6TWhuaVpMYmppcDR0QUx0a0R6RXlwbGRYb1R4dzJkUT09
Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQnB5dkd6UkhtU3lhYmZMSlo0bklQZ2s3UTFBSkprZTNwWkg5Q2lVa0wtenhxWXpva21xVDVMRjdKSmhpTmxWS05IUTRoRHdCbktSRVVjcVFnY1RfV0N2S2dyV0dTMlhxQlRFVm41RkFTWVQzQThuVkZwdlNuVC05QlVRVXB6Qjk3akNpYmY1MFR6R1ByMzlIMllRZlRRYVVRN2ZBPT0=
Connector_AiTavily_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkdkJMTDY0akhXNzZDWHVYSEt1cDZoOWEzSktneHZEV2JndTNmWlNSMV9KbFNIZmQzeVlrNE5qUEIwcUlBSGM1a0hOZ3J6djIyOVhnZzI3M1dIUkdicl9FVXF3RGktMmlEYmhnaHJfWTdGUkktSXVUSGdQMC1vSEV6VE8zR2F1SVk=
Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlYUZpRDFqLWhQajZxSElqMEMzdGZIRm5TeDBSSFlqenpZYVJEa1BtRXM1M21pd3hjTGZvSDJPcGJoY2gyQlNncWNwNkNIR0NFQnpjXzA5U2t6Zm1DWWNNVEZrTE5DVzRQVGdlZzRldGoyRWhaeTJfYjBHd0ludWpGcWdqd3hKTHJ5T0piVE15Tk1YZUZnSnE4OXdKOUhXd292dHpWMkxlR3dNclc1N2t0ckFoMmd5WTlBci11MXRGNV9UTlFCSmdOOE83bGJyODFUQ3E2NXJpRHJWZUM0cHFHekNJa0FlN3hjd2VFQ1Nqa1JFQ2NFdjlMWW1TbEV4TVZBeDFEZVVnUWlBVUV1Z0NUNHV0RE1fTEJaLTQxQksyVE1LSE1sSG0ycG9fTS1hNzh4dTQ9
Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlRHFpNThJb3g3UU05cUw4SVJpOXBTblU5QzU1WFItZ2JkNXVILVN4VHp0Umh2RjJyZXJMNVp5OWFxLWhjRjhub3cxajkxMVRQMnZQdVBGT21obWN0Q0NlOU80MVhMMXRWb1l3cWNpR2Ytc1d0WnVlRUN1TTZ4NjFQcDd0Wll4cFN6dzk1OU5SZGNJck54WmNoeElITzEzejJrczVSQnp6ZTBINGtENHFiT3NnWjdUME9xXzJ5Y0N3dHk5QnpBRkpyVTgxOE0xTVllR2JMUC0yTkwyWWxHQT09
Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFla1h1R1M3QlQ5XzJhS0x4eXFpTkZ3WHpLMWVZZldRMGpMX2psMFZ2RmpETTZMZ3ZXblo2MnhyemxYWXRsMHN1LXdZU3k5ampEMjMtdzcyb1J4Ri1rTmxPOWhJMF9MMEtzZ3d5dFZxSFY3TjNac3ZpTVJxUFFmUVpXeHEtbVBTUmtiR0lhQjhVcjM3U1NNX1ZHY1NxUFJ3PT0=
Connector_AiTavily_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlbmRSZVRjTzVKRklFbFgwdVZJaE5jNVoyX3dVTVlRUFVUenc4X1JOX2laOHRoTU9mN1lTUVRzb2xNZjJXVjhEYnVIaXdkSWN4NEpJbTFJZFN2cmkwUkJ0ZXNKT2NidktjdDFJX1BkZ3QwU3dQRzg0aG9aNmtxc1FZZ1ZBRjQyM3lOSS1EYkpqWmxoV0xWWE1Fc01uN3RnPT0=
Connector_AiPrivateLlm_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGSjZ1NWh0aWc1R3Z4MHNaeS1HamtUbndhcUZFZDlqUDhjSmg5eHFfdlVkU0RsVkJ2UVRaMWs3aWhraG5jSlc0YkxNWHVmR2JoSW5ENFFCdkJBM0VienlKSnhzNnBKbTJOUTFKczRfWlQ3bWpmUkRTT1I1OGNUSTlQdExacGRpeXg=
Connector_AiMistral_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGZTNtZ1E4TWIxSEU1OUlreUpxZkJIR0Vxcm9xRHRUbnBxbTQ1cXlkbnltWkJVdTdMYWZ4c3Fsam42TERWUTVhNzZFMU9xVjdyRGFCYml6bmZsZFd2YmJzemlrSWN6Q3o3X0NXX2xXNUQteTNONHdKYzJ5YVpLLWdhU2JhSTJQZnI=
Connector_AiMistral_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlU2tMLTFnQWhET2Nia2pTcVpBakRaSVFDdUpHRzZ1bkhGVVhMeEVlSnFZU3F3UFRBUkNMMU4tQU92OUdTeDlpM2VZbXJzLURQZ1lPLVB3azgxSDZabkhkSHJ5Y005aWhtcDJzajk3a2JDQUxCZlNKRGw5elJuSzJMUUpTZ2hiSlU=
Service_MSFT_TENANT_ID = common
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
# Manadate Pre-Processing Servers
PREPROCESS_ALTHAUS_CHAT_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4UkNBelhvckxCQUVjZm94N3BZUDcxaEMyckE2dm1lRVhqODhrWU1SUjNXZ3dQZlVJOWhveXFkZXpobW5xT0NneGZ2SkNUblFmYXd0WTBYNTl3UmRnSWc9PQ==
# Preprocessor API Configuration
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss

View file

@ -32,19 +32,19 @@ APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyeUZORDYxOFdlNHk1N25kV3pSQVJMUVFwLUFlMzlzQjQ1eVljOTlzX184RndsTmtTV1FjdWkyQlBiUkdCbGt5S2ltZjJxa2I2dHBMdnJqZnhFSnBCampHYjB3RG5URDM1YzZSLVd6TGdaRXRVcEdadE5zM2thNV9SZy1KZDdLSHY=
Service_MSFT_AUTH_REDIRECT_URI=https://api.poweron.swiss/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySk5uMmlWczBWTE00MHBIcWlBbVJmVmc3MlBWbDA1YTFaS3psZjVLd3d1X2FvRHV0X0c5blpLV0FpY05aMTJMMzUtcG8wakF2TlM3SGQ2VjFZM3JLT1MwTlZ0bm9BRlpkbHVPQTFNaXJvazlQRzN4M2ZZNEVhV1JHV190dWluSUk=
Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
Service_GOOGLE_AUTH_REDIRECT_URI =
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
Service_GOOGLE_DATA_REDIRECT_URI =
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kybjVVZ0FldUE1NTJiY2U1N0I0aVU0Z2hfeWlYc2tTdmlxTS1NdGxsRnFHdjZVcW5RRHZkUFhzUTVyX2RaZHlrQThRdTdCRmVBelBOcDlsbFQyd19SZExuWEM5aTcwQ0FvY3ctMUlWU1pndDE0MkdzeTZZRHkwLWU3aW56LW1jS20=
Service_GOOGLE_AUTH_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyMnFma3VPOVJtTFFrNDRLN0NkWHY2dUZDWlJzdDVMd3p3N19IY0tWdURRRzExOGZCMjJOYmpKT1E0cTVwYlgtcVJINTY0anZPc1VoTW00cHl6NVh3ZHVTek1oT1RqWUhtamRkZ1dENWlwNTlZSU1oNWczeGdEOC1Gbk5XU2RBcmI=
Service_GOOGLE_DATA_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
@ -62,12 +62,12 @@ STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah
# AI configuration
Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3UxLUdwY2hhdGYxRGp4VFJqYXZIcmkxM1ZyOWV2M0Z4MHdFNkVYQ0ROb1d6LUZFUEdvMHhLMEtXYVBCRzM5TlYyY3ROYWtJRk41cDZxd0tYYi00MjVqMTh4QVcyTXl0bmVocEFHbXQwREpwNi1vODdBNmwzazE5bkpNelE2WXpvblIzWlQwbGdEelI2WXFqT1RibXVHcjNWbVhwYzBOM25XTzNmTDAwUjRvYk4yNjIyZHc5c2RSZzREQUFCdUwyb0ZuOXN1dzI2c2FKdXI4NGxEbk92czZWamJXU3ZSbUlLejZjRklRRk4tLV9aVUFZekI2bTU4OHYxNTUybDg3RVo0ZTh6dXNKRW5GNXVackZvcm9laGI0X3R6V3M9
Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09
Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6NG5CTm9QOFZRV1BIVC0tV2RKTGtCQWFOUXlpRnhEdjN1U2x3VUdDamtIZV9CQzQ5ZmRmcUh3ZUVUa0NxbGhlenVVdWtaYjdpcnhvUlNFLXZfOWh2dWFZai0xUGU5cWpuYmpnRVRWakh0RVNUUTFyX0w5V0NXVWFrQlZuOTd5TkI0eVRoQ0ZBSm9HYUlYamoyY1FCMmlBPT0=
Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg=
Connector_AiOpenai_API_SECRET = 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
Connector_AiAnthropic_API_SECRET = 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
Connector_AiPerplexity_API_SECRET = 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
Connector_AiTavily_API_SECRET = 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
Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo=
Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGc2tQc2lvMk1YZk01Q1dob1U5cnR0dG03WWE3WkpoOWo0SEpvLU9Rc2lCNDExdy1wZExaN3lpT2FEQkxnaHRmWmZUUUZUUUJmblZreGlpaFpOdnFhbzlEd1RsVVJtX216cmhxTm5BcTN2eUZ2T054cDE5bmlEamJ3NGR6MVpFQnA=
Connector_AiMistral_API_SECRET = H55rGkR3ojIhcp4YMMlgUStgvz7Wym5c
Service_MSFT_TENANT_ID = common
@ -86,13 +86,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
# Manadate Pre-Processing Servers
PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
# Preprocessor API Configuration
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss

View file

@ -0,0 +1,92 @@
# Production Environment Configuration
# System Configuration
APP_ENV_TYPE = prod
APP_ENV_LABEL = Production Instance
APP_KEY_SYSVAR = CONFIG_KEY
APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3UnJRV0sySFlDblpXUlREclREaW1WbUt6bGtQYkdrNkZDOXNOLXFua1hqeFF2RHJnRXJ5VlVGV3hOZm41QjZOMlNTb0duYXNxZi05dXVTc2xDVkx0SVBFLUhncVo5T0VUZHE0UTZLWWw3ck09
APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3QVpIY19DQVZSSzJmc2F0VEZvQlU1cHBhTEgxdHdnR3g4eW01aTEzYTUxc1gxTDR1RVVpSHRXYjV6N1BLZUdCUGlfOW1qdy0xSHFVRkNBcGZvaGlSSkZycXRuUllaWnpyVGRoeFg1dGEyNUk9
APP_API_URL = https://gateway-prod.poweron.swiss
APP_COOKIE_SECURE = true
# PostgreSQL DB Host
DB_HOST=gateway-prod-server.postgres.database.azure.com
DB_USER=gzxxmcrdhn
DB_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3Y1JScGxjZG9TdUkwaHRzSHZhRHpNcDV3N1U2TnIwZ21PRG5TWFFfR1k0N3BiRk5WelVadjlnXzVSTDZ6NXFQNFpqbnJ1R3dNVkJocm1zVEgtSk0xaDRiR19zNDBEbVIzSk51ekNlQ0Z3b0U9
DB_PORT=5432
# Security Configuration
APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUlV5SVpaWXBNX1hpa2xPZGdMSWpnN2ZINHQxeGZnNHJweU5pZjlyYlY5Qm9zOUZEbl9wUEgtZHZXd1NhR19JSG9kbFU4MnFGQnllbFhRQVphRGQyNHlFVWR5VHQyUUpqN0stUmRuY2QyTi1oalczRHpLTEJqWURjZWs4YjZvT2U5YnFqcXEwdEpxV05fX05QMmtrPQ==
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
APP_LOGGING_LOG_DIR = /home/site/wwwroot/
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
APP_LOGGING_CONSOLE_ENABLED = True
APP_LOGGING_FILE_ENABLED = True
APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySFR2NjBKM084QTNpeUlyUmM4R0N0SU1BZ2x4MmVTZTVHQkVzRE9GdmFkV041MzhudFhobjU0RWNnd3lqeXpKUXA5aGtNZkhtYU12QjBtX0NjemVmdEZBdC1TbXVBSXJTcF9vMlJXd0ZNRTRKRFBMUXNjTF85eTBxakR4RVNfYmU=
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyNVU4cVRIZFdjS3l2S1RJVTVlc1ozQ1liZXZDX1VwdFZQUzFtS0N6UWYyeGxkNGNmY1hoaWxEUDBXVU5QR2t3Vi1ZV1A2QkxqbnpobzJwOXdzYTBZaFZYdnNkeDE1VVl0bm4weHFiLXdON2gtZzAwMTkxNWRoZldFM2djSkNHVS0=
Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyUmJleVpTOF9OaFV3NGVfcWVBX2oxSjUwMWRGOFZRWFRIN1FZRzZ6U3VQMlg5a21RY1drTHh3U254LW4zM1A1cXQ1TTFWYlNoek9hSHJIeE4tbm1wU1lKRXlKNU5HVWI4VGZwTVE0VnJGaV8wZmNvdkVrMjJGeXdmZ3UyNmVXN1E=
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyY2pxMDh0U0RqWERianBMTTNtSUZPSzhKUzh4S0RTenR2MmxnRDlvQzJjbDVTczRWLUJtVnhxWTE2MmUxQjJia2xJcVUzVlFlUnpma040NFdHRzVNRUt0OXR0c2JkTkRmQ1RIYllXbXFFaExIQWNycFVHbUxHbmtYOVhOVUV2MFY=
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/clickup/auth/connect/callback
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
# Stripe Billing (both end with _SECRET for encryption script)
STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
STRIPE_API_VERSION = 2026-01-28.clover
STRIPE_AUTOMATIC_TAX_ENABLED = false
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah
# AI configuration
Connector_AiOpenai_API_SECRET = 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
Connector_AiAnthropic_API_SECRET = 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
Connector_AiPerplexity_API_SECRET = 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
Connector_AiTavily_API_SECRET = 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
Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo=
Connector_AiMistral_API_SECRET = H55rGkR3ojIhcp4YMMlgUStgvz7Wym5c
Service_MSFT_TENANT_ID = common
# Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4NFQxaF9uN3h1cVB6dnZid1c1R1VfNDlSQ1NHMEVDZWtKanpMQ29CLXc1MXBqRm1hQ0YtWVhaejBMY1ZTOEFEVlpWQ3hrYkFza1E2RDNsYkdMMndNR0VGNTMwVDRGdURJY3hyaVFxVjEtSEYwNHJzeWM3WmlpZW9jU2E3NTgycEV2allqQ3dJRTNyRFAzaDJ6dklKeXpNRkJhYjFzUkptN2dpbkNpMklrcGxuZl9vTkt3T0JvNm1YTXd5UlkwZWptUXdWVFpnV2J4X3J2WUhIUlFkSElFVnlqMnlJRnNHTnlpMWs2R1dZc2ROWjNYZG85cndmd1E5cUZnVmZRYnVjTG43dXFmSWd2bGFfVWFWSmtpWkpndWNlSUNwcnFNU2NqZXFaV0xsY3l3SElLRkVHcHZGZERKV1ltcGhTS0dhTko1VTJLYzNoZjRkSGVEX3dTMWVVTmdDczV5cE1JQUdSbUJGUm11eFhTVjJHbkt0SzB4UG1Dc2xmbnp1Y041Y2RTeWRuWGdmQy1sTGx0MGtnM2VJQ3EyLXViRlNhTU9ybzZkR1N1bXE5SXhlZENWRFpWSGlYOWx4SUQ3UlR0ZEVxQkxNakRUVFRiUmFnbklOalphLUZkRFVVaXBRUk5NZW5PaUZydTFmQkNPSTdTVTNZd0plWXllNVFJdmN4MVcyTGlwMGFtVjBzOGRxR1FjbzhfYW5zdTB0ZEZBTTJhakltazh1dktNMUZsOUItdFdTb1pIaUxySllXNkdlY20zUS0wTnpFNTB2SU5acG1VcXhyaHBmME8takw3RDh5T043T2VGOV92TzNya2pWSlpYVjZDdXlZcjM3a0hPTlhkaW9oQmxqQlpGRFYyTTY4WmZmT3k4Tk1tdXRuSGdTUVpNT2NKenhXb05PdXBfSEdhMTNxNjdpNXlKUUI2YUgydFFPX1VvXzVJb0UxWTU2YVNiNDQ0QndZanhMMHR1cGdHWGhvcEg1QXEtSXZJdTdZUE12ZEVVWkF4QmtsQS1GYnY3SFIxSHlsOGVfcEpGS1A4QUVEQWNEOFZYYlljQ3ByTU03YU16Y0UzUnJQZEprSWNjT1ZXVEtDWi03Y3ZzRVdYUTlabXJISEo5THRHVXVuM0xqbzA4bGVlZVpOMk1QMmptb21tV0pTMlVoOXdWVU95UW1iQmttc2w1RG9mMWwxXzg1T2IxYUVmTUJEZkpUdTFDTzZ3RlBFeUFiX01iRTZNWkNaSG45TkFOM2pzbUJRZ2N0VFpoejJUTG1RODY3TzZpSzVkYUQzaEpfY2pSTkRzU0VpanlkdXVQQmJ2WU5peno4QWNLTDVxZTlhSHI3NnNiM0k0Y3JkQ0xaOU05bGtsQl8zQklvaktWSDZ4aVp2MHlYelJuUDJyTU9CZC1OZjJxNFc1dDcwSUlxaVh1LTMyWWFwU0IwUU9kOUFpMWpnOERtLTh1VmJiNGVwcXBMbU5fMjVZc0hFbmxQT2puSFd1ZGpyTkphLU5sVlBZWWxrWEZrWGJQWmVkN19tZFZfZ1l1V3pSWlA0V0ZxM2lrWnl2NU9WeTdCbDROSmhfeENKTFhMVXk1d195S2JMUFJoRXZjcVo4V2g0MTNKRnZhUE1wRkNPM3FZOGdVazJPeW5PSGpuZnFGTTdJMkRnam5rUlV6NFlqODlIelRYaEN5VjdJNnVwbllNODNCTFRHMWlXbmM1VlRxbXB3Wm9LRjVrQUpjYzRNMThUMWwwSVhBMUlyamtPZnE4R0o4bEdHay1zMjR5RDJkZ1lYRHZaNHVHU2otR3ZpN25LZlEySEU0UmdTNzJGVHNWQXMyb0dVMV9WUE13ODhZWUFaakxGOWZieGNXZkNYRnV5djEyWTZLcmdrajRBLU1rS1Z0VVRkOWlDMU9fMGVmYXFhZXJGMUhpNkdmb2hkbzZ1OWV6VlNmVzNISjVYTFh6SjJNdWR5MWZidE8yVEo2dnRrZXhMRXBPczUwTG13OGhNUVpIQm0zQmRKRnJ0Nl8wNW1Ob0dHRDVpU0NWREV3TkY2SjktdVBkMFU1ZXBmSFpHQ3FHNTRZdTJvaExpZVEtLTU4YTVyeFBpNDdEajZtWUc4c1dBeUJqQ3NIY1NLS0FIMUxGZzZxNFNkOG9ORGNHWWJCVnZuNnJVTEtoQi1mRTZyUl81ZWJJMi1KOGdERzBhNVRZeHRYUUlqY2JvMFlaNHhWMU9pWFFiZjdaLUhkaG15TTBPZVlkS2R5UVdENTI4QVFiY1RJV0ZNZnlpVWxfZmlnN1BXbGdrbjFGUkhzYl9qeHBxVVJacUE4bjZETENHVFpSamh0NVpOM2hMYTZjYzBuS3J0a3hhZGxSM1V5UHd2OTU3ZHY0Yy1xWDBkWUk0Ymp0MWVrS3YzSktKODhQZnY3QTZ1Wm1VZkZJbS1jamdreks1ZlhpQjFOUDFiOHJ2Nm9NcmdTdU5LQXV2RkZWZEFNZnVKUjVwcVY3dDdhQnpmRVJ6SmlvVXpDM0ZiYXh5bGE2X04tTE9qZ3BiTnN3TF9ZaFRxSUpjNjB1dXZBcy1TZHRHTjFjSUR3WUl4cE9VNzB5Rkk4U3Z1SVZYTl9sYXlZVk83UnFrMlVmcnBpam9lRUlCY19DdVJwOXl2TVVDV1pMRFZTZk9MY3Z1eXA0MnhGazc5YllQaWtOeTc4NjlOa2lGY05RRzY1cG9nbGpYelc4c3FicWxWRkg0YzRSamFlQ19zOU14YWJreU9pNDREZVJ3a0REMUxGTzF1XzI1bEF3VXVZRjlBeWFiLXJsOXgza3VZem1WckhWSnVNbDBNcldadU8xQ3RwOTl5NGgtVlR0QklCLWl5WkE4V1FlQTBCOVU1RE9sQlRrYUNZOGdfUmEwbEZvUTFGUEFWVmQ4V1FhOU9VNjZqemRpZm1sUDhZQTJ0YVBRbWZldkF5THV4QXpfdUtNZ0tlcGdSRFM3c0lDOTNQbnBxdmxYYWNpTmI3MW9BMlZIdTQ5RldudHpNQWQ5NDNPLVVTLXVVNzdHZXh4UXpZa3dVa2J4dTFDV1RkYjRnWXU2M3lJekRYWGNMcWU5OVh6U2xZWDh6MmpqcnpiOHlnMjA5S3RFQm1NZjNSM21adkVnTUpSYVhkTzNkNnJCTmljY0x1cl9kMkx3UHhySjZEdHREanZERzNEUTFlTkR0NWlBczAtdmFGTjdZNVpTMlkxV2czYW5RN2lqemg4eUViZDV6RjdKNXdFcUlvcVhoNkJ6eVJkR1pua1hnNzQwOEs2TXJYSlpGcW9qRDU2QjBOWFFtdXBJRkRKbmdZUF9ZSmRPVEtvUjVhLTV1NjdXQjRhS0duaEtJb2FrQnNjUTRvdFMxdkdTNk1NYlFHUFhhYTJ1eUN3WHN4UlJ4UjdrZjY0SzFGYWVFN1k0cGJnc1RjNmFUenR4NHljbVhablZSWHZmUVN3cXRHNjhsX1BSZWEzdTJUZFA0S2pTaU9YMnZIQ1ZPcGhWMFJqZkVEMWRMR1h3SnU0Z2FzZ3VGM3puNzdhVjhaQXNIWHFsbjB0TDVYSFdSNV9rdWhUUUhSZHBGYkJIVDB5SDdlMC13QTVnS0g5Qkg5RGNxSGJlelVndUhPcEQ0QkRKMTJTZUM1OXJhVm0zYjU0OVY2dk9MQVBheklIQXpVNW9Yc0ROVjEzaFZTWmVxYlBWMlNlSzladzJ6TmNuMG5FVVZkN1VZN1pfS2ZHa0lQcE80S24wSnQtVlJVV09OVWJ3M09YMkZpV2ktVF9ENHhKU2dfYUQ2aUVyamk0VHJHQmVfVHU4clpUTFoteW5aSWRPV1M0RDRMTms4NGRoYmJfVE82aUl2X3VieVJOdDhBQmRwdzdnRTVBNzZwaW93dUlZb3ZRYUtOeG9ULWxvNVp5a0haSjdkcUhRb3d6UGIxRUpCVkVYX2d6TkRqQVozUWxkNGFoc1FXYVd2YWNkME9Qclo0bjYxMFRWTy1nbnI5NTBJNzRMMDluUXRKYTFqQUN4d0d5aHVlamN3Tkk3NWJXeXR0TW9BeUg5Vnp4Q2RnZUY3b3AtMDlrNmlrSGR0eGRtbUdUd2lFRWg4MklEeWJHN2wwZEpVSXMxNDNOWjRFS0tPdWxhMmFCckhfRENIY184aEFDZXNrRDl2dHQtQW12UnRuQXJjaDJoTUpiYkNWQUtfRG9GMUZoNWM4UnBYZ29RWWs2NHcyUm5kdTF3Vk1GeFpiRUJLaVZ2UGFjbi1jV3lMV0N2ZDl4VERPN295X01NNG56ZjZkRzZoYUtmY1E5NlVXemx2SnVfb19iSXg0R2M3Mjd1a2JRPT0=
# Feature SyncDelta JIRA configuration
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4d3Z4d2x6N1FhUktMU0RKbkxfY2pTQkRzXzJ6UXVEbDNCaFM3UHMtQVFGYzNmYWs4N0lMM1R2SFJuZTVFVmx6MGVEbXc5U3NOTnY1TWN0ZDNaamlHQWloalM3VldmREJNSHQ1TlVkSVFJMTVhQWVGSVRMTGw4UTBqNGlQZFVuaHp4WUlKemR5UnBXZlh0REJFLXJ4ejR3PT0=
# Teamsbot Browser Bot Service
TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io
# Debug Configuration
APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss

View file

@ -20,7 +20,7 @@ APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUl
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
@ -33,18 +33,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySFR2NjBKM084QTNpeUlyUmM4R0N0SU1BZ2x4MmVTZTVHQkVzRE9GdmFkV041MzhudFhobjU0RWNnd3lqeXpKUXA5aGtNZkhtYU12QjBtX0NjemVmdEZBdC1TbXVBSXJTcF9vMlJXd0ZNRTRKRFBMUXNjTF85eTBxakR4RVNfYmU=
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyNVU4cVRIZFdjS3l2S1RJVTVlc1ozQ1liZXZDX1VwdFZQUzFtS0N6UWYyeGxkNGNmY1hoaWxEUDBXVU5QR2t3Vi1ZV1A2QkxqbnpobzJwOXdzYTBZaFZYdnNkeDE1VVl0bm4weHFiLXdON2gtZzAwMTkxNWRoZldFM2djSkNHVS0=
Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyUmJleVpTOF9OaFV3NGVfcWVBX2oxSjUwMWRGOFZRWFRIN1FZRzZ6U3VQMlg5a21RY1drTHh3U254LW4zM1A1cXQ1TTFWYlNoek9hSHJIeE4tbm1wU1lKRXlKNU5HVWI4VGZwTVE0VnJGaV8wZmNvdkVrMjJGeXdmZ3UyNmVXN1E=
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyY2pxMDh0U0RqWERianBMTTNtSUZPSzhKUzh4S0RTenR2MmxnRDlvQzJjbDVTczRWLUJtVnhxWTE2MmUxQjJia2xJcVUzVlFlUnpma040NFdHRzVNRUt0OXR0c2JkTkRmQ1RIYllXbXFFaExIQWNycFVHbUxHbmtYOVhOVUV2MFY=
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
@ -63,12 +63,12 @@ STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah
# AI configuration
Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3UxLUdwY2hhdGYxRGp4VFJqYXZIcmkxM1ZyOWV2M0Z4MHdFNkVYQ0ROb1d6LUZFUEdvMHhLMEtXYVBCRzM5TlYyY3ROYWtJRk41cDZxd0tYYi00MjVqMTh4QVcyTXl0bmVocEFHbXQwREpwNi1vODdBNmwzazE5bkpNelE2WXpvblIzWlQwbGdEelI2WXFqT1RibXVHcjNWbVhwYzBOM25XTzNmTDAwUjRvYk4yNjIyZHc5c2RSZzREQUFCdUwyb0ZuOXN1dzI2c2FKdXI4NGxEbk92czZWamJXU3ZSbUlLejZjRklRRk4tLV9aVUFZekI2bTU4OHYxNTUybDg3RVo0ZTh6dXNKRW5GNXVackZvcm9laGI0X3R6V3M9
Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09
Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6NG5CTm9QOFZRV1BIVC0tV2RKTGtCQWFOUXlpRnhEdjN1U2x3VUdDamtIZV9CQzQ5ZmRmcUh3ZUVUa0NxbGhlenVVdWtaYjdpcnhvUlNFLXZfOWh2dWFZai0xUGU5cWpuYmpnRVRWakh0RVNUUTFyX0w5V0NXVWFrQlZuOTd5TkI0eVRoQ0ZBSm9HYUlYamoyY1FCMmlBPT0=
Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg=
Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmcDVyOGNwbVkwWFJCWmFkZS12RkhLaFhLSF9kWWpEZ0d0NDBqV2FnWlpnYmpSckdLSGpjbmh6aHJXVUZxMElwY1MzcVg1MzBOdURUZXhnZ3pqNEZyQ1JWMVA0YmxhNWJlenNpa1A3TjZkYVZSclFONjU4MF9jMTJaS2d0ZDNnXzJKSmhSRVhyckJpTUlDa0RRWHN5cWVkOUJMTUp5aFRHcDV5Z1A1aWhSUnFNOHBJTDFPdzAzcVJ3bmhueTBmVkJDZTdJakhMOEFRdHBvWFduUzdRV2dNQVdpaXdFSVlHMDJ4NnZRUTBZZ3pOakxPLUdjNlNNQnJQMXpfSWR3NmFodDdDbkEtVmRjdVBhMjRWT1NOV1BYbU15VHRSWFR0UVBBMWtKRTRkS25KMFk9
Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmMGhla2xoZWowNjJzc1EzMWJYRXRTcGdWWWctU3hhcXNUbVVaOTJiRFJuSGM5S3ZGZ0M4RFotTGxOQ3loa3l4aVZ2T3FsRVVMck83RTlURFNOdWxHb0JfNVEtRGJ4X193dV9Bd0EtNlVGV0h4SWk2bldfWThxNVVnOGctSkNFR3FXa2pmY2ROcV9EVE1oMndFY1d4MjdLeWtUd0VEeW5CTlFwX2FOcW9DaWVXYWVfMy1ZUnFFUEZnanFOUGZILUpUZU8yUHNSODE3OXBSWVJFNlpBdTJtUT09
Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmRm9saTZuR1VSZV9pQllKRGFURmN4cDNNanpsVFM3TVItdDNtNWdoWC1zVllrLUVPeGZDRXF1S3Rxd0tVUGV6bl9Ob0JMa3U5ZUNlRjRVQ1dRWXZDTXlsRU13b2o2R1paalU4RXB6SWxYVEJPa2NmaDRFdzExRXU1X2VnNDlhQzQ3cTE1RlJrSlB5elRMZ2w3NmxlV2l3PT0=
Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmZGdyWkJibS03akJtSjF0U2doYXZVVDM1em1kY2ZpRGJISmVCUURfVkw3c2Z3OEFQd1h1SzE0cTExSUtVejRPY3VmWF9XT1ZyS3RxRmVRYktJeDR6OWhYaEM0bkNLVEI1cl9VZ1VFOG9IRTFWc2FUemh0UmNHTGprQ0FweThlSGpSSDAyZmw2YmR0OFREQWxpNERHWm1nPT0=
Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo=
Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGc2tQc2lvMk1YZk01Q1dob1U5cnR0dG03WWE3WkpoOWo0SEpvLU9Rc2lCNDExdy1wZExaN3lpT2FEQkxnaHRmWmZUUUZUUUJmblZreGlpaFpOdnFhbzlEd1RsVVJtX216cmhxTm5BcTN2eUZ2T054cDE5bmlEamJ3NGR6MVpFQnA=
Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmcEVpVmFuWkk4eTJTc3VtRFg4cE9QU3R5NVg0eVFIR29RSVhmXy1rR0pPTm4wbFhIVFFpckx5UmhvSGxqSWV4S0xoTzdESE55R2k5eHowZEprdGhrbEU3eG5JWGpaNWJIdDRqT05zZGNCQVpXd2xTek1teHRBS3NRU2FuUTlSQ2Q=
Service_MSFT_TENANT_ID = common
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
# Manadate Pre-Processing Servers
PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
# Preprocessor API Configuration
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss

View file

@ -319,25 +319,24 @@ class AiOpenai(BaseConnectorAi):
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00013
),
AiModel(
name="dall-e-3",
displayName="OpenAI DALL-E 3",
name="gpt-image-1",
displayName="OpenAI GPT Image",
connectorType="openai",
apiUrl="https://api.openai.com/v1/images/generations",
temperature=0.0, # Image generation doesn't use temperature
maxTokens=0, # Image generation doesn't use tokens
temperature=0.0,
maxTokens=0,
contextLength=0,
costPer1kTokensInput=0.04,
costPer1kTokensOutput=0.0,
speedRating=5, # Slow for image generation
qualityRating=9, # High quality art generation
# capabilities removed (not used in business logic)
speedRating=5,
qualityRating=9,
functionCall=self.generateImage,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
operationTypes=createOperationTypeRatings(
(OperationTypeEnum.IMAGE_GENERATE, 10)
),
version="dall-e-3",
version="gpt-image-1",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.04
)
]
@ -653,105 +652,82 @@ class AiOpenai(BaseConnectorAi):
)
async def generateImage(self, modelCall: AiModelCall) -> AiModelResponse:
"""
Generate an image using DALL-E 3 using standardized pattern.
Args:
modelCall: AiModelCall with messages and generation options
Returns:
AiModelResponse with generated image data
"""
"""Generate an image using GPT Image model (gpt-image-1)."""
try:
# Extract parameters from modelCall
messages = modelCall.messages
model = modelCall.model
options = modelCall.options
# Get prompt from messages
promptContent = messages[0]["content"] if messages else ""
# Parse prompt using AiCallPromptImage model
import json
messages = modelCall.messages
options = modelCall.options
promptContent = messages[0]["content"] if messages else ""
try:
# Try to parse as JSON
promptData = json.loads(promptContent)
promptModel = AiCallPromptImage(**promptData)
except:
# If not JSON, use plain text prompt
except Exception:
promptModel = AiCallPromptImage(
prompt=promptContent,
size=options.size if options and hasattr(options, 'size') else "1024x1024",
quality=options.quality if options and hasattr(options, 'quality') else "standard",
style=options.style if options and hasattr(options, 'style') else "vivid"
size=options.size if options and hasattr(options, "size") else "1024x1024",
quality=options.quality if options and hasattr(options, "quality") else "auto",
)
# Extract parameters from Pydantic model
prompt = promptModel.prompt
size = promptModel.size or "1024x1024"
quality = promptModel.quality or "standard"
style = promptModel.style or "vivid"
rawQuality = promptModel.quality or "auto"
quality = {"standard": "auto", "hd": "high"}.get(rawQuality, rawQuality)
logger.debug(f"Starting image generation with prompt: '{prompt[:100]}...'")
# DALL-E 3 API endpoint
dalle_url = "https://api.openai.com/v1/images/generations"
payload = {
"model": "dall-e-3",
"model": "gpt-image-1",
"prompt": prompt,
"size": size,
"quality": quality,
"style": style,
"n": 1,
"response_format": "b64_json" # Get base64 data directly instead of URLs
}
# Use existing httpClient to benefit from connection pooling
# This avoids TLS connection issues that can occur with fresh clients
response = await self.httpClient.post(
dalle_url,
json=payload
"https://api.openai.com/v1/images/generations",
json=payload,
)
if response.status_code != 200:
logger.error(f"DALL-E API error: {response.status_code} - {response.text}")
logger.error(f"Image generation API error: {response.status_code} - {response.text}")
return AiModelResponse(
content="",
success=False,
error=f"DALL-E API error: {response.status_code} - {response.text}"
error=f"Image generation API error: {response.status_code} - {response.text}",
)
responseJson = response.json()
if "data" in responseJson and len(responseJson["data"]) > 0:
image_data = responseJson["data"][0]["b64_json"]
logger.info(f"Successfully generated image: {len(image_data)} characters")
imageData = responseJson["data"][0].get("b64_json", "")
if not imageData:
imageData = responseJson["data"][0].get("url", "")
logger.info(f"Successfully generated image: {len(imageData)} characters")
return AiModelResponse(
content=image_data,
content=imageData,
success=True,
modelId="dall-e-3",
modelId="gpt-image-1",
metadata={
"size": size,
"quality": quality,
"style": style,
"response_id": responseJson.get("id", "")
}
"response_id": responseJson.get("id", ""),
},
)
else:
logger.error("No image data in DALL-E response")
logger.error("No image data in generation response")
return AiModelResponse(
content="",
success=False,
error="No image data in DALL-E response"
error="No image data in generation response",
)
except Exception as e:
logger.error(f"Error during image generation: {str(e)}", exc_info=True)
return AiModelResponse(
content="",
success=False,
error=f"Error during image generation: {str(e)}"
error=f"Error during image generation: {str(e)}",
)

View file

@ -311,7 +311,10 @@ class DatabaseConnector:
# Establish connection to the database
self._connect()
logger.info("PostgreSQL database system initialized successfully")
logger.debug(
"PostgreSQL database system initialized (db=%s, host=%s, port=%s)",
self.dbDatabase, self.dbHost, self.dbPort,
)
except Exception as e:
logger.error(f"FATAL ERROR: Database system initialization failed: {e}")
raise

View file

@ -19,6 +19,30 @@ from modules.shared.voiceCatalog import getDefaultVoice as _catalogDefaultVoice
logger = logging.getLogger(__name__)
def _buildPrimarySttRecognitionFields(
*,
model: str,
lightweight: bool,
) -> Dict[str, Any]:
"""Shared fields for batch + streaming primary RecognitionConfig."""
base: Dict[str, Any] = {
"enable_automatic_punctuation": True,
"model": model,
}
if lightweight:
base["enable_word_time_offsets"] = False
base["enable_word_confidence"] = False
base["max_alternatives"] = 1
base["use_enhanced"] = False
else:
base["enable_word_time_offsets"] = True
base["enable_word_confidence"] = True
base["max_alternatives"] = 3
base["use_enhanced"] = True
return base
# Gemini-TTS speaker IDs from voices.list use short names (e.g. "Kore") and require
# SynthesisInput.prompt + VoiceSelectionParams.model_name (google-cloud-texttospeech >= 2.24.0).
_GEMINI_TTS_DEFAULT_MODEL = "gemini-2.5-flash-tts"
@ -73,7 +97,10 @@ class ConnectorGoogleSpeech:
sampleRate: int = None, channels: int = None,
skipFallbacks: bool = False,
phraseHints: Optional[list] = None,
alternativeLanguages: Optional[list] = None) -> Dict:
alternativeLanguages: Optional[list] = None,
model: str = "latest_long",
lightweight: bool = False,
audioFormat: Optional[str] = None) -> Dict:
"""
Convert speech to text using Google Cloud Speech-to-Text API.
@ -82,6 +109,9 @@ class ConnectorGoogleSpeech:
language: Language code (e.g., 'de-DE', 'en-US')
sample_rate: Audio sample rate (auto-detected if None)
channels: Number of audio channels (auto-detected if None)
model: Google recognition model (e.g. latest_long, latest_short)
lightweight: If True, omit word timings/confidence, single alternative, no enhanced model
audioFormat: If set (webm_opus, linear16, mp3, flac, wav), skip auto-detection
Returns:
Dict containing transcribed text, confidence, and metadata
@ -92,8 +122,24 @@ class ConnectorGoogleSpeech:
logger.warning(f"Invalid sampleRate={sampleRate}, treating as unknown for auto-detection")
sampleRate = None
# Auto-detect audio format if not provided
if sampleRate is None or channels is None:
explicitFormat = (audioFormat or "").strip().lower() or None
if explicitFormat:
if channels is None:
channels = 1
if sampleRate is None:
if explicitFormat == "webm_opus":
sampleRate = 48000
elif explicitFormat == "linear16":
sampleRate = 16000
elif explicitFormat in ("mp3", "flac"):
sampleRate = 44100
elif explicitFormat == "wav":
sampleRate = 16000
else:
sampleRate = 16000
audioFormat = explicitFormat
logger.info(f"STT explicit format: {audioFormat}, {sampleRate}Hz, {channels}ch")
elif sampleRate is None or channels is None:
validation = self.validateAudioFormat(audioContent)
if not validation["valid"]:
return {
@ -156,12 +202,7 @@ class ConnectorGoogleSpeech:
"encoding": encoding,
"audio_channel_count": channels,
"language_code": language,
"enable_automatic_punctuation": True,
"model": "latest_long",
"enable_word_time_offsets": True,
"enable_word_confidence": True,
"max_alternatives": 3,
"use_enhanced": True,
**_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
}
if phraseHints:
@ -205,8 +246,7 @@ class ConnectorGoogleSpeech:
sample_rate_hertz=16000,
audio_channel_count=1,
language_code=language,
enable_automatic_punctuation=True,
model="latest_long"
**_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
)
try:
response = await asyncio.to_thread(
@ -343,7 +383,7 @@ class ConnectorGoogleSpeech:
"error": "No recognition results (silence or unclear audio)"
}
models = ["latest_long", "phone_call", "latest_short"]
models = list(dict.fromkeys([model, "latest_long", "phone_call", "latest_short"]))
for fallback_config in fallback_configs:
for model in models:
@ -419,6 +459,9 @@ class ConnectorGoogleSpeech:
audioQueue: asyncio.Queue,
language: str = "de-DE",
phraseHints: Optional[list] = None,
model: str = "latest_long",
lightweight: bool = False,
singleUtterance: bool = False,
) -> AsyncGenerator[Dict[str, Any], None]:
"""
Stream audio chunks to Google Cloud Speech-to-Text Streaming API.
@ -429,9 +472,13 @@ class ConnectorGoogleSpeech:
Send (b"", True) to signal end of stream.
language: Language code
phraseHints: Optional boost phrases
model: Google recognition model (e.g. latest_long, latest_short)
lightweight: If True, use non-enhanced primary config (lower latency)
singleUtterance: If True, end stream after first utterance (client should reconnect)
Yields:
Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec
Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec;
optionally endOfSingleUtterance, reconnectRequired
"""
STREAM_LIMIT_SEC = 290
streamStartTs = time.time()
@ -442,9 +489,7 @@ class ConnectorGoogleSpeech:
"sample_rate_hertz": 48000,
"audio_channel_count": 1,
"language_code": language,
"enable_automatic_punctuation": True,
"model": "latest_long",
"use_enhanced": True,
**_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
}
if phraseHints:
configParams["speech_contexts"] = [speech.SpeechContext(phrases=phraseHints, boost=15.0)]
@ -453,7 +498,7 @@ class ConnectorGoogleSpeech:
streamingConfig = speech.StreamingRecognitionConfig(
config=recognitionConfig,
interim_results=True,
single_utterance=False,
single_utterance=singleUtterance,
)
import queue as threadQueue
@ -490,7 +535,22 @@ class ConnectorGoogleSpeech:
)
for response in responseStream:
elapsed = time.time() - streamStartTs
estimatedDurationSec = totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0
durationFromResults = 0.0
for result in response.results:
rt = getattr(result, "result_end_time", None)
if rt is None:
continue
if hasattr(rt, "total_seconds"):
durationFromResults = max(durationFromResults, float(rt.total_seconds()))
else:
durationFromResults = max(
durationFromResults,
float(getattr(rt, "seconds", 0)) + float(getattr(rt, "nanos", 0)) * 1e-9,
)
estimatedDurationSec = durationFromResults if durationFromResults > 0 else (
totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0.0
)
finalTexts = []
interimTexts = []
@ -524,6 +584,13 @@ class ConnectorGoogleSpeech:
"stabilityScore": 0.0,
"audioDurationSec": estimatedDurationSec,
}), loop)
speechEvt = getattr(response, "speech_event_type", None)
if speechEvt and "END_OF_SINGLE_UTTERANCE" in str(speechEvt):
asyncio.run_coroutine_threadsafe(resultOutQ.put({
"endOfSingleUtterance": True,
"audioDurationSec": estimatedDurationSec,
}), loop)
if elapsed >= STREAM_LIMIT_SEC:
logger.info("Streaming STT approaching 5-min limit, client should reconnect")
asyncio.run_coroutine_threadsafe(resultOutQ.put({

View file

@ -245,11 +245,10 @@ class AiCallPromptWebCrawl(BaseModel):
class AiCallPromptImage(BaseModel):
"""Structured prompt format for image generation."""
prompt: str = Field(description="Text description of the image to generate")
size: Optional[str] = Field(default="1024x1024", description="Image size (1024x1024, 1792x1024, 1024x1792)")
quality: Optional[str] = Field(default="standard", description="Image quality (standard, hd)")
style: Optional[str] = Field(default="vivid", description="Image style (vivid, natural)")
size: Optional[str] = Field(default="1024x1024", description="Image size (1024x1024, 1536x1024, 1024x1536)")
quality: Optional[str] = Field(default="auto", description="Image quality (auto, high, medium, low)")
class AiProcessParameters(BaseModel):

View file

@ -62,15 +62,15 @@ class DataSource(PowerOnModel):
description="Owner user ID",
json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "UserInDB", "labelField": "username"}},
)
autoSync: bool = Field(
ragIndexEnabled: bool = Field(
default=False,
description="Automatically sync on schedule",
json_schema_extra={"label": "Auto-Sync"},
description="When true this tree element is indexed into the RAG knowledge store",
json_schema_extra={"label": "Im RAG indexieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
)
lastSynced: Optional[float] = Field(
lastIndexed: Optional[float] = Field(
default=None,
description="Last sync timestamp",
json_schema_extra={"label": "Letzter Sync", "frontend_type": "timestamp"},
description="Timestamp of last successful RAG indexing run",
json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"},
)
scope: str = Field(
default="personal",

View file

@ -115,7 +115,7 @@ class PaginationParams(BaseModel):
Omit or set to None for the default (ungrouped) view.
"""
page: int = Field(ge=1, description="Current page number (1-based)")
pageSize: int = Field(ge=1, le=1000, description="Number of items per page")
pageSize: int = Field(ge=1, le=10000, description="Number of items per page")
sort: List[SortField] = Field(default_factory=list, description="List of sort fields in priority order")
filters: Optional[Dict[str, Any]] = Field(
default=None,

View file

@ -484,10 +484,10 @@ class UserConnection(PowerOnModel):
default=None,
description=(
"Per-connection knowledge ingestion preferences. schemaVersion=1 keys: "
"neutralizeBeforeEmbed (bool), mailContentDepth (metadata|snippet|full), "
"mailIndexAttachments (bool), filesIndexBinaries (bool), mimeAllowlist (list[str]), "
"clickupScope (titles|title_description|with_comments), "
"surfaceToggles (dict per authority), maxAgeDays (int)."
"mailContentDepth (metadata|snippet|full), mailIndexAttachments (bool), "
"filesIndexBinaries (bool), clickupScope (titles|title_description|with_comments), "
"clickupIndexAttachments (bool), maxAgeDays (int). "
"Neutralization is controlled per DataSource.neutralize (not here)."
),
json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False, "label": "Wissenspräferenzen"},
)

View file

@ -1080,6 +1080,8 @@ class CommcoachService:
audioContent=audioContent,
language=language,
skipFallbacks=True,
model="latest_short",
lightweight=True,
)
transcribedText = ""

View file

@ -3,9 +3,147 @@
from modules.shared.i18nRegistry import t
LOOP_DONE_DATA_PICK_OPTIONS = [
{
"path": ["bodyResults"],
"pickerLabel": t("Alle Schleifen-Ergebnisse"),
"detail": t(
"Ausgabe des letzten Schrittes im Schleifen-Rumpf pro Iteration als Liste, "
"ein Eintrag pro Durchlauf. Ideal als Eingabe fuer Kontext zusammenfuehren."
),
"recommended": True,
"type": "List[Any]",
},
{
"path": ["items"],
"pickerLabel": t("Iterierte Elemente"),
"detail": t(
"Liste der Schleifen-Elemente nach gewähltem Iterationsmodus (Kopie der Eingabeliste, gefiltert)."
),
"recommended": False,
"type": "List[Any]",
},
{
"path": ["count"],
"pickerLabel": t("Anzahl Durchläufe"),
"detail": t("Wie viele Iterationen die Schleife ausgeführt hat."),
"recommended": False,
"type": "int",
},
]
LOOP_ITEM_DATA_PICK_OPTIONS = [
{
"path": ["currentItem"],
"pickerLabel": t("Aktuelles Element"),
"detail": t("Das aktuelle Iterationselement."),
"recommended": True,
"type": "Any",
},
{
"path": ["currentIndex"],
"pickerLabel": t("Aktueller Index"),
"detail": t("0-basierter Index der aktuellen Iteration."),
"recommended": False,
"type": "int",
},
{
"path": ["items"],
"pickerLabel": t("Alle Elemente"),
"detail": t("Die vollständige Quellliste."),
"recommended": False,
"type": "List[Any]",
},
{
"path": ["count"],
"pickerLabel": t("Gesamtanzahl"),
"detail": t("Anzahl der Elemente in der Schleife."),
"recommended": False,
"type": "int",
},
]
# Base paths when ``ActionResult.data`` uses envelope + ``_meta`` (context.extractContent-style clarity).
CONTEXT_ENVELOPE_DATA_PICK_OPTIONS = [
{
"path": ["data"],
"pickerLabel": t("Vollständiges data-Objekt"),
"detail": t(
"Versionierter Kontext-Umschlag: ``schemaVersion``, ``kind``, Nutzdatenfelder, ``_meta``."
),
"recommended": True,
"type": "Dict",
},
{
"path": ["data", "_meta"],
"pickerLabel": t("Technische Metadaten (_meta)"),
"detail": t(
"`actionType`, Payload-Schema-Version; bei Transform/Merge keine großen Payloads."
),
"recommended": False,
"type": "Any",
},
]
MERGE_RESULT_DATA_PICK_OPTIONS = [
{
"path": ["merged"],
"pickerLabel": t("Zusammengeführt"),
"detail": t("Zusammengeführtes Ergebnis (je nach Modus)."),
"recommended": True,
"type": "Dict",
},
{
"path": ["first"],
"pickerLabel": t("Erster Zweig"),
"detail": t("Daten vom ersten verbundenen Eingang (Modus „first“)."),
"recommended": False,
"type": "Any",
},
{
"path": ["inputs"],
"pickerLabel": t("Alle Eingänge"),
"detail": t("Dict der Eingabeobjekte nach Port-Index."),
"recommended": False,
"type": "Dict[int,Any]",
},
]
# Extended picker for ``context.mergeContext`` (ActionResult + ``surfaceDataAsTopLevel``): same
# merge keys as ``flow.merge`` plus ``count`` from the action payload.
CONTEXT_MERGE_ACTION_RESULT_DATA_PICK_OPTIONS = [
*CONTEXT_ENVELOPE_DATA_PICK_OPTIONS,
*MERGE_RESULT_DATA_PICK_OPTIONS,
{
"path": ["count"],
"pickerLabel": t("Anzahl Einträge"),
"detail": t("Wie viele Einträge zusammengeführt wurden."),
"recommended": False,
"type": "int",
},
]
_CONTEXT_BRANCH_DATA_PICK_OPTIONS = [
{
"path": ["items"],
"pickerLabel": t("Gefilterte Elemente"),
"detail": t("Empfohlen für Schleifen: je Eintrag ein Durchlauf (z. B. Bild-Slots)."),
"recommended": True,
"type": "List[Any]",
},
{
"path": ["data"],
"pickerLabel": t("Kontext (data)"),
"detail": t("Gefilterter Presentation-Umschlag oder unveränderter Eingang auf dem Sonst-Zweig."),
"recommended": False,
"type": "Dict",
},
]
# Ports, die typische Schritt-Ausgaben durchreichen (nicht nur leerer Transit).
_FLOW_INPUT_SCHEMAS = [
"Transit",
"ContextBranch",
"FormPayload",
"AiResult",
"TextResult",
@ -31,12 +169,23 @@ FLOW_NODES = [
"Die Daten vom Eingangskanal werden an den gewählten Ausgang durchgereicht."
),
"parameters": [
{
"name": "Item",
"type": "Any",
"required": True,
"frontendType": "dataRef",
"description": t("Item, das auf die Bedingung getestet wird"),
},
{
"name": "condition",
"type": "json",
"required": True,
"frontendType": "condition",
"description": t("Bedingung: Feld aus einem vorherigen Schritt und Vergleich"),
"frontendOptions": {
"dependsOn": "Item",
"operatorCatalog": "condition",
},
"description": t("Bedingung auf das gewählte Item"),
},
],
"inputs": 1,
@ -52,8 +201,10 @@ FLOW_NODES = [
"category": "flow",
"label": t("Switch"),
"description": t(
"Mehrere Zweige nach einem Wert aus einem vorherigen Schritt (Data Picker). "
"Definiere Fälle mit Vergleichsoperator; der Eingang wird an den ersten passenden Zweig durchgereicht."
"Mehrere Zweige nach einem Wert aus einem vorherigen Schritt. "
"Jeder Fall hat einen eigenen Ausgang mit passend gefiltertem Inhalt in ``items``; "
"mehrere Kontext-Filter können gleichzeitig zutreffen (z. B. Text und Bilder). "
"Der letzte Ausgang (Sonst) reicht den unveränderten Eingang durch."
),
"parameters": [
{
@ -68,13 +219,22 @@ FLOW_NODES = [
"type": "array",
"required": False,
"frontendType": "caseList",
"description": t("Fälle: Operator und Vergleichswert"),
"frontendOptions": {
"dependsOn": "value",
"operatorCatalog": "condition",
},
"description": t("Fälle: Operator und Vergleichswert (abhängig vom gewählten Wert)"),
},
],
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": list(_FLOW_INPUT_SCHEMAS)}},
"outputPorts": {0: {"schema": "Transit"}},
"outputPorts": {
0: {
"schema": "ContextBranch",
"dataPickOptions": _CONTEXT_BRANCH_DATA_PICK_OPTIONS,
},
},
"executor": "flow",
"meta": {"icon": "mdi-swap-horizontal", "color": "#FF9800", "usesAi": False},
},
@ -83,8 +243,10 @@ FLOW_NODES = [
"category": "flow",
"label": t("Schleife / Für jedes"),
"description": t(
"Iteriert über ein Array aus einem vorherigen Schritt (z. B. documente, Zeilen, Listeneinträge). "
"Optional: UDM-Ebene für strukturierte Dokumente."
"Zwei Ausgänge: „Schleife“ verbindet den Rumpf (pro Element); optional führt der Rumpf "
"mit einem Rücklauf-Pfeil wieder zum **gleichen Eingang** wie der vorherige Schritt (wie in n8n). "
"„Fertig“ führt genau einmal fort, wenn alle Iterationen beendet sind. "
"Die zu durchlaufende Liste wählen Sie wie bisher; UDM-/Strukturdaten werden automatisch sinnvoll in Elemente aufgelöst."
),
"parameters": [
{
@ -95,13 +257,27 @@ FLOW_NODES = [
"description": t("Liste oder Sammlung zum Durchlaufen (im Data Picker wählen)"),
},
{
"name": "level",
"name": "iterationMode",
"type": "str",
"required": False,
"frontendType": "select",
"frontendOptions": {"options": ["auto", "documents", "structuralNodes", "contentBlocks"]},
"description": t("Nur bei UDM-Daten: welche Strukturebene als Elemente verwendet wird"),
"default": "auto",
"frontendOptions": {
"options": ["all", "first", "last", "every_second", "every_third", "every_nth"],
},
"description": t(
"Welche Elemente die Schleife besucht: alle, nur das erste/letzte, jedes zweite/dritte "
"oder jedes n-te (Schritt dann unter „Schrittweite“)."
),
"default": "all",
},
{
"name": "iterationStride",
"type": "int",
"required": False,
"frontendType": "number",
"frontendOptions": {"min": 2, "max": 100},
"description": t("Nur bei „jedes n-te“: Schrittweite (z. B. 5 = jedes 5. Element ab Index 0)."),
"default": 2,
},
{
"name": "concurrency",
@ -114,12 +290,18 @@ FLOW_NODES = [
},
],
"inputs": 1,
"outputs": 1,
"inputPorts": {0: {"accepts": [
"Transit", "UdmDocument", "EmailList", "DocumentList", "FileList", "TaskList",
"ActionResult", "AiResult", "QueryResult", "FormPayload",
]}},
"outputPorts": {0: {"schema": "LoopItem"}},
"outputs": 2,
"outputLabels": [t("Schleife"), t("Fertig")],
"inputPorts": {
0: {"accepts": [
"Transit", "ContextBranch", "UdmDocument", "EmailList", "DocumentList", "FileList", "TaskList",
"ActionResult", "AiResult", "QueryResult", "FormPayload", "LoopItem",
]},
},
"outputPorts": {
0: {"schema": "LoopItem", "dataPickOptions": LOOP_ITEM_DATA_PICK_OPTIONS},
1: {"schema": "Transit", "dataPickOptions": LOOP_DONE_DATA_PICK_OPTIONS},
},
"executor": "flow",
"meta": {"icon": "mdi-repeat", "color": "#FF9800", "usesAi": False},
},
@ -151,13 +333,19 @@ FLOW_NODES = [
"default": 2,
},
],
# ``inputs: 2`` is the static minimum / default topology. ``inputCount`` is a
# frontend hint: the editor adds/removes input ports dynamically when the user
# changes the value. ``FlowExecutor._merge`` collects whatever ports exist in
# ``inputSources`` at runtime, so extra ports (35) work without further changes
# to this definition. ``inputPorts`` below only type-declares the two minimum
# ports; additional ports inherit the same ``_FLOW_INPUT_SCHEMAS`` accepts list.
"inputs": 2,
"outputs": 1,
"inputPorts": {
0: {"accepts": list(_FLOW_INPUT_SCHEMAS)},
1: {"accepts": list(_FLOW_INPUT_SCHEMAS)},
},
"outputPorts": {0: {"schema": "MergeResult"}},
"outputPorts": {0: {"schema": "MergeResult", "dataPickOptions": MERGE_RESULT_DATA_PICK_OPTIONS}},
"executor": "flow",
"meta": {"icon": "mdi-call-merge", "color": "#FF9800", "usesAi": False},
},

View file

@ -40,6 +40,8 @@ class BrowserBotConnector:
botAccountPassword: Optional[str] = None,
transferMode: str = "auto",
debugMode: bool = False,
avatarMediaData: Optional[str] = None,
avatarMediaType: Optional[str] = None,
) -> Dict[str, Any]:
"""
Send join command to the Browser Bot service.
@ -79,12 +81,16 @@ class BrowserBotConnector:
"debugMode": debugMode,
}
# Add authenticated join credentials if configured
if botAccountEmail and botAccountPassword:
payload["botAccountEmail"] = botAccountEmail
payload["botAccountPassword"] = botAccountPassword
logger.info(f"Bot will join authenticated as {botAccountEmail}")
if avatarMediaData and avatarMediaType:
payload["avatarMediaData"] = avatarMediaData
payload["avatarMediaType"] = avatarMediaType
logger.info(f"Avatar media attached: {avatarMediaType}, {len(avatarMediaData)} chars")
try:
async with aiohttp.ClientSession(timeout=_BOT_TIMEOUT) as session:
async with session.post(f"{self.botUrl}/api/bot", json=payload) as resp:

View file

@ -111,6 +111,18 @@ class TeamsbotMeetingModule(PowerOnModel):
defaultDirectorPrompts: Optional[str] = Field(default=None, description="JSON list of default director prompts")
goals: Optional[str] = Field(default=None, description="Free-text goals")
kpiTargets: Optional[str] = Field(default=None, description="JSON object with structured KPI targets")
defaultMeetingLink: Optional[str] = Field(
default=None,
description="Default Teams meeting URL for new sessions in this module (user can override)",
)
defaultBotName: Optional[str] = Field(
default=None,
description="Default display name for the bot when starting a session from this module",
)
defaultAvatarFileId: Optional[str] = Field(
default=None,
description="FileItem ID for the default avatar image/video shown in the meeting",
)
status: TeamsbotModuleStatus = Field(default=TeamsbotModuleStatus.ACTIVE)
@ -217,6 +229,7 @@ class TeamsbotUserSettings(PowerOnModel):
triggerCooldownSeconds: Optional[int] = Field(default=None, description="Trigger cooldown override")
contextWindowSegments: Optional[int] = Field(default=None, description="Context window override")
debugMode: Optional[bool] = Field(default=None, description="Debug mode override")
avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video override")
# ============================================================================
@ -240,6 +253,7 @@ class TeamsbotConfig(BaseModel):
triggerCooldownSeconds: int = Field(default=3, ge=1, le=30, description="Minimum seconds between AI calls")
contextWindowSegments: int = Field(default=20, ge=5, le=100, description="Number of transcript segments to include in AI context")
debugMode: bool = Field(default=False, description="Enable debug mode: screenshots at every join step for diagnostics")
avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video shown in the meeting")
def _getEffectiveBrowserBotUrl(self) -> Optional[str]:
"""Resolve the effective browser bot URL: per-instance config takes priority, then env variable."""
@ -257,6 +271,7 @@ class TeamsbotStartSessionRequest(BaseModel):
"""Request to start a new Teams Bot session."""
meetingLink: str = Field(description="Teams meeting join link (e.g., https://teams.microsoft.com/l/meetup-join/...)")
botName: Optional[str] = Field(default=None, description="Override bot name for this session")
moduleId: Optional[str] = Field(default=None, description="Optional MeetingModule to attach this session to")
connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access")
joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.")
sessionContext: Optional[str] = Field(default=None, description="Custom context/knowledge to provide to the bot for this session (e.g. meeting agenda, documents, background info)")
@ -277,6 +292,9 @@ class CreateMeetingModuleRequest(BaseModel):
defaultDirectorPrompts: Optional[str] = None
goals: Optional[str] = None
kpiTargets: Optional[str] = None
defaultMeetingLink: Optional[str] = None
defaultBotName: Optional[str] = None
defaultAvatarFileId: Optional[str] = None
class UpdateMeetingModuleRequest(BaseModel):
@ -287,6 +305,9 @@ class UpdateMeetingModuleRequest(BaseModel):
defaultDirectorPrompts: Optional[str] = None
goals: Optional[str] = None
kpiTargets: Optional[str] = None
defaultMeetingLink: Optional[str] = None
defaultBotName: Optional[str] = None
defaultAvatarFileId: Optional[str] = None
status: Optional[TeamsbotModuleStatus] = None
@ -304,6 +325,7 @@ class TeamsbotConfigUpdateRequest(BaseModel):
triggerCooldownSeconds: Optional[int] = None
contextWindowSegments: Optional[int] = None
debugMode: Optional[bool] = None
avatarFileId: Optional[str] = None
# ============================================================================

View file

@ -25,6 +25,7 @@ from .datamodelTeamsbot import (
TeamsbotDirectorPromptStatus,
TeamsbotDirectorPromptMode,
TeamsbotMeetingModule,
TeamsbotModuleStatus,
)
logger = logging.getLogger(__name__)
@ -338,6 +339,8 @@ class TeamsbotObjects:
def getModules(self, instanceId: str) -> List[Dict[str, Any]]:
"""Get all meeting modules for a feature instance."""
records = self.db.getRecordset(TeamsbotMeetingModule, recordFilter={"instanceId": instanceId})
for r in records:
r.setdefault("status", TeamsbotModuleStatus.ACTIVE.value)
records.sort(key=lambda r: r.get("sysCreatedAt") or "", reverse=True)
return records

View file

@ -290,6 +290,19 @@ def _runMigrations():
migrated = False
# M2: MeetingModule default meeting link / bot name (additive columns)
if _tableExists("TeamsbotMeetingModule"):
for col, sqlType in (
("defaultMeetingLink", "TEXT"),
("defaultBotName", "TEXT"),
):
if not _columnExists("TeamsbotMeetingModule", col):
cur.execute(
f'ALTER TABLE "TeamsbotMeetingModule" ADD COLUMN "{col}" {sqlType} NULL',
)
logger.info(f"Migration M2: Added TeamsbotMeetingModule.{col}")
migrated = True
# M1: Create default Adhoc modules for orphaned sessions
# (only runs if TeamsbotSession table exists with moduleId column
# and there are sessions without a moduleId)

View file

@ -40,6 +40,7 @@ from .datamodelTeamsbot import (
TeamsbotDirectorPromptMode,
TeamsbotDirectorPromptStatus,
TeamsbotMeetingModule,
TeamsbotModuleStatus,
CreateMeetingModuleRequest,
UpdateMeetingModuleRequest,
DIRECTOR_PROMPT_FILE_LIMIT,
@ -203,6 +204,7 @@ async def createModule(
data["instanceId"] = instanceId
data["mandateId"] = mandateId
data["ownerUserId"] = str(context.user.id)
data.setdefault("status", TeamsbotModuleStatus.ACTIVE.value)
module = interface.createModule(data)
return {"module": module}
@ -280,6 +282,11 @@ async def startSession(
mandateId = _validateInstanceAccess(instanceId, context)
interface = _getInterface(context, instanceId)
config = _getInstanceConfig(instanceId)
if body.moduleId:
mod = interface.getModule(body.moduleId)
if not mod or str(mod.get("instanceId") or "") != str(instanceId):
raise HTTPException(status_code=400, detail="Invalid moduleId for this instance")
# Extract and validate meeting URL from user input (handles SafeLinks, invitation text, etc.)
cleanMeetingUrl = _extractTeamsMeetingUrl(body.meetingLink)
@ -288,6 +295,7 @@ async def startSession(
sessionData = TeamsbotSession(
instanceId=instanceId,
mandateId=mandateId,
moduleId=body.moduleId,
meetingLink=cleanMeetingUrl,
botName=body.botName or config.botName,
sessionContext=body.sessionContext,
@ -426,6 +434,54 @@ async def listSessions(
return {"sessions": sessions}
@router.get("/{instanceId}/dashboard/stream")
@limiter.limit("60/minute")
async def streamDashboard(
request: Request,
instanceId: str,
context: RequestContext = Depends(getRequestContext),
):
"""
SSE channel for the Teamsbot dashboard: repeated snapshots of sessions and meeting modules.
Push interval: 3s while any own session is pending/joining/active, otherwise 20s.
Same session visibility rules as GET /sessions (own sessions unless platform admin).
"""
_validateInstanceAccess(instanceId, context)
interface = _getInterface(context, instanceId)
userId = None if context.isPlatformAdmin else str(context.user.id)
activeStatuses = {
TeamsbotSessionStatus.PENDING.value,
TeamsbotSessionStatus.JOINING.value,
TeamsbotSessionStatus.ACTIVE.value,
}
async def eventGenerator():
while True:
sessionRows = []
try:
sessionRows = interface.getSessions(instanceId, includeEnded=True, userId=userId)
moduleRows = interface.getModules(instanceId)
payload = {"type": "dashboardState", "sessions": sessionRows, "modules": moduleRows}
yield f"data: {json.dumps(payload, default=str)}\n\n"
except asyncio.CancelledError:
raise
except Exception as ex:
logger.warning("dashboard stream tick failed: %s", ex)
yield f"data: {json.dumps({'type': 'error', 'message': 'dashboard_tick_failed'})}\n\n"
hasActive = any((s.get("status") in activeStatuses) for s in sessionRows)
await asyncio.sleep(3.0 if hasActive else 20.0)
return StreamingResponse(
eventGenerator(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
@router.get("/{instanceId}/sessions/{sessionId}")
@limiter.limit("30/minute")
async def getSession(
@ -634,12 +690,10 @@ def _getEffectiveConfig(instanceId: str, userId: str, interface) -> TeamsbotConf
if not userSettings:
return baseConfig
# Merge: user settings override instance defaults (only non-None values)
# Merge: user settings override instance defaults (only non-None values).
# Derive mergeable fields from TeamsbotConfig so new fields are picked up automatically.
overrides = {}
for field in ["botName", "aiSystemPrompt", "responseMode",
"responseChannel", "transferMode", "language", "voiceId",
"triggerIntervalSeconds", "triggerCooldownSeconds", "contextWindowSegments",
"debugMode"]:
for field in TeamsbotConfig.model_fields:
value = userSettings.get(field)
if value is not None:
overrides[field] = value

View file

@ -83,10 +83,10 @@ _EPHEMERAL_PHRASE_INTENTS: Dict[str, str] = {
),
"agentRound": (
"One short sentence (max ~14 words) the assistant says BETWEEN rounds "
"of a longer agent task to signal that work is still in progress. "
"Include the placeholder tokens '{round}' and '{maxRounds}' so the "
"caller can substitute the actual numbers — e.g. 'Step {round} of "
"{maxRounds}, still working.'"
"of a longer agent task to update the audience on what it is doing. "
"Include the placeholder token '{activity}' which will be filled with "
"the current activity — e.g. 'I am {activity}, one moment...' or "
"'Currently {activity}, almost there...'. Do NOT include step numbers."
),
}
@ -602,6 +602,13 @@ class TeamsbotService:
self._lastTranscriptText: Optional[str] = None
self._lastTranscriptId: Optional[str] = None
self._lastSttTime: float = 0.0
# Audio chunk aggregation: collect chunks and send to STT only
# after a speech pause or when the buffer reaches a target duration.
self._audioBuffer: bytes = b""
self._audioBufferStartTime: float = 0.0
self._audioBufferLastChunkTime: float = 0.0
self._audioBufferSampleRate: int = 16000
self._lastBotResponseText: Optional[str] = None
self._lastBotResponseTs: float = 0.0
@ -732,6 +739,12 @@ class TeamsbotService:
hasAuth = bool(botAccountEmail and botAccountPassword)
logger.info(f"Joining meeting for session {sessionId}: auth={hasAuth}, email={botAccountEmail or 'N/A'}, transferMode={self.config.transferMode}")
avatarMediaData = None
avatarMediaType = None
avatarFileId = self._resolveAvatarFileId(session, interface)
if avatarFileId:
avatarMediaData, avatarMediaType = self._loadAvatarFileData(avatarFileId, interface)
result = await self.browserBotConnector.joinMeeting(
sessionId=sessionId,
meetingUrl=meetingLink,
@ -743,6 +756,8 @@ class TeamsbotService:
botAccountPassword=botAccountPassword,
transferMode=self.config.transferMode if hasattr(self.config, 'transferMode') else "auto",
debugMode=self.config.debugMode if hasattr(self.config, 'debugMode') else False,
avatarMediaData=avatarMediaData,
avatarMediaType=avatarMediaType,
)
if result.get("success"):
@ -767,6 +782,37 @@ class TeamsbotService:
})
await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)})
def _resolveAvatarFileId(self, session, interface):
"""Resolve avatarFileId: module override > config default."""
moduleId = session.get("moduleId")
if moduleId:
module = interface.getModule(moduleId)
if module and module.get("defaultAvatarFileId"):
return module["defaultAvatarFileId"]
return getattr(self.config, "avatarFileId", None)
def _loadAvatarFileData(self, fileId, _teamsbotInterface):
"""Load avatar file as base64 data + mime type. Returns (data, mimeType) or (None, None)."""
import base64
from modules.interfaces import interfaceDbManagement
try:
mgmt = interfaceDbManagement.getInterface(self.currentUser, self.mandateId)
fileRecord = mgmt.getFile(fileId)
if not fileRecord:
logger.warning(f"Avatar file {fileId} not found")
return None, None
mimeType = getattr(fileRecord, "mimeType", None) or "image/png"
rawBytes = mgmt.getFileData(fileId)
if not rawBytes:
logger.warning(f"Avatar file {fileId} has no data")
return None, None
b64 = base64.b64encode(rawBytes).decode("ascii")
logger.info(f"Avatar file loaded: {fileId}, {mimeType}, {len(b64)} chars base64")
return b64, mimeType
except Exception as e:
logger.error(f"Failed to load avatar file {fileId}: {e}")
return None, None
async def leaveMeeting(self, sessionId: str):
"""Send leave command to the Browser Bot service."""
from . import interfaceFeatureTeamsbot as interfaceDb
@ -1164,6 +1210,14 @@ class TeamsbotService:
interface.updateSession(sessionId, updates)
await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
# Flush remaining audio buffer before generating summary
if dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
if self._audioBuffer:
logger.info(f"[AudioChunk] Flushing remaining buffer on session end ({len(self._audioBuffer)} bytes)")
self._audioBuffer = b""
self._audioBufferStartTime = 0.0
self._audioBufferLastChunkTime = 0.0
# Generate summary when session ends
if dbStatus == TeamsbotSessionStatus.ENDED.value:
asyncio.create_task(self._generateMeetingSummary(sessionId))
@ -1178,11 +1232,18 @@ class TeamsbotService:
voiceInterface,
websocket: WebSocket,
):
"""Process an audio chunk from WebRTC capture — run STT and feed into transcript pipeline."""
"""Process an audio chunk from WebRTC capture. The bot-side VAD
(AudioWorklet / ScriptProcessor) already segments speech into 1-8s
voiced chunks. Here we apply a minimum-duration safety net: very short
chunks (<1s) are buffered until they reach 1s; everything else goes
straight to STT. A wall-clock timeout flushes stale buffers."""
import base64
_MIN_CHUNK_SEC = 1.0
_STALE_TIMEOUT_SEC = 3.0
try:
audioBytes = base64.b64decode(audioBase64)
if len(audioBytes) < 1000:
if len(audioBytes) < 500:
return
if captureDiagnostics:
@ -1195,14 +1256,12 @@ class TeamsbotService:
f"rms={rms}, nativeRate={nativeSampleRate}, bytes={len(audioBytes)}"
)
# Use RMS from capture diagnostics to skip real silence.
# Byte-variation heuristics produced false positives and dropped valid speech.
isSilent = False
if captureDiagnostics and captureDiagnostics.get("rms") is not None:
try:
rmsVal = float(captureDiagnostics.get("rms"))
if rmsVal < 0.0003:
logger.debug(f"[AudioChunk] Skipping silent audio ({len(audioBytes)} bytes, rms={rmsVal:.6f})")
return
isSilent = True
except Exception:
pass
@ -1210,21 +1269,51 @@ class TeamsbotService:
logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}")
return
# Treat sampleRate=0 as unknown (triggers auto-detection)
effectiveSampleRate = sampleRate if sampleRate and sampleRate > 0 else None
now = time.time()
effectiveRate = sampleRate if sampleRate and sampleRate > 0 else 16000
if not isSilent:
if not self._audioBuffer:
self._audioBufferStartTime = now
self._audioBuffer += audioBytes
self._audioBufferLastChunkTime = now
self._audioBufferSampleRate = effectiveRate
bufferDuration = len(self._audioBuffer) / (effectiveRate * 2) if self._audioBuffer else 0.0
bufferAge = (now - self._audioBufferStartTime) if self._audioBuffer else 0.0
shouldFlush = (
self._audioBuffer
and (
bufferDuration >= _MIN_CHUNK_SEC
or (bufferAge >= _STALE_TIMEOUT_SEC and bufferDuration > 0.3)
)
)
if not shouldFlush:
return
flushBytes = self._audioBuffer
flushRate = self._audioBufferSampleRate
self._audioBuffer = b""
self._audioBufferStartTime = 0.0
self._audioBufferLastChunkTime = 0.0
flushDuration = len(flushBytes) / (flushRate * 2)
logger.info(f"[AudioChunk] Flushing buffer: {len(flushBytes)} bytes, {flushDuration:.1f}s, {flushRate}Hz")
phraseHints = list(self._knownSpeakers)
if self.config.botName:
phraseHints.append(self.config.botName)
sttResult = await voiceInterface.speechToText(
audioContent=audioBytes,
audioContent=flushBytes,
language=self.config.language or "de-DE",
sampleRate=effectiveSampleRate,
sampleRate=flushRate,
channels=1,
skipFallbacks=True,
phraseHints=phraseHints if phraseHints else None,
alternativeLanguages=["en-US"],
audioFormat="linear16",
)
if sttResult and sttResult.get("success") and sttResult.get("text"):
@ -1252,19 +1341,18 @@ class TeamsbotService:
def _registerSpeakerHint(self, speaker: str, text: str, sessionId: str = ""):
"""Track current speaker from captions for STT attribution.
When the first non-bot caption arrives, retroactively attributes
any STT segments that were created before a speaker was known."""
Retroactively attributes any unattributed STT segments whenever a
new non-bot caption speaker arrives (not just the first time)."""
if not speaker:
return
normalizedSpeaker = speaker.strip()
if not normalizedSpeaker or self._isBotSpeaker(normalizedSpeaker):
return
prevSpeaker = self._lastCaptionSpeaker
self._lastCaptionSpeaker = normalizedSpeaker
self._knownSpeakers.add(normalizedSpeaker)
if prevSpeaker is None and self._unattributedTranscriptIds:
if self._unattributedTranscriptIds:
from . import interfaceFeatureTeamsbot as interfaceDb
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
for tid in self._unattributedTranscriptIds:
@ -3243,17 +3331,53 @@ class TeamsbotService:
return await self._pickEphemeralPhrase("agentBusy")
async def _interimAgentRoundMessage(
self, roundNum: int, maxRounds: int
self, lastToolLabel: Optional[str] = None
) -> Optional[str]:
"""Per-round progress notice for long agent runs (meeting voice /
chat, ephemeral). Phrasing is AI-localised once per session;
``{round}`` and ``{maxRounds}`` placeholders are substituted at
render time. Returns ``None`` if generation failed."""
return await self._pickEphemeralPhrase(
"agentRound",
substitutions={"round": roundNum, "maxRounds": maxRounds},
chat, ephemeral). Generates a single short phrase in the bot's
configured language that describes the current activity. Unlike
the cached ephemeral phrases, this is a per-call AI generation
to avoid mixing English displayLabels into non-English speech."""
targetLang = (self.config.language or "").strip() or "en-US"
botName = (self.config.botName or "the assistant").strip()
activityHint = lastToolLabel or "working on the task"
prompt = (
f"You are a meeting assistant named '{botName}'.\n"
f"Target spoken language (BCP-47): {targetLang}\n\n"
f"The assistant is currently busy with: {activityHint}\n\n"
f"Generate ONE short sentence (max 12 words) in {targetLang} "
f"that tells the audience what the assistant is doing right now. "
f"Natural, spoken style. No step numbers. No quotes around the output.\n"
f"Output ONLY the sentence, nothing else."
)
try:
aiService = createAiService(
self.currentUser, self.mandateId, self.instanceId
)
await aiService.ensureAiObjectsInitialized()
request = AiCallRequest(
prompt=prompt,
context="",
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
),
)
response = await aiService.callAi(request)
except Exception as aiErr:
logger.debug(f"Agent round phrase generation failed: {aiErr}")
return None
if not response or response.errorCount != 0 or not response.content:
return None
result = response.content.strip().strip('"').strip("'")
if len(result) > 200:
result = result[:200]
return result
async def _notifyMeetingEphemeral(self, sessionId: str, text: str) -> None:
"""Deliver a short line to the meeting (TTS + chat per config) without
persisting botResponses/transcripts, so the main agent answer stays the
@ -3370,6 +3494,7 @@ class TeamsbotService:
finalText: str = ""
rounds = 0
lastToolLabel: Optional[str] = None
try:
async for event in agentService.runAgent(
prompt=taskText,
@ -3390,11 +3515,9 @@ class TeamsbotService:
"round": roundNum,
"maxRounds": maxR,
})
# Runde 1: schon allgemeiner Start-Hinweis; ab Runde 2 ins Meeting melden.
# Director prompts bleiben still — keine Zwischen-Updates ins Meeting.
if roundNum >= 2 and not directorPromptMode:
try:
roundText = await self._interimAgentRoundMessage(roundNum, maxR)
roundText = await self._interimAgentRoundMessage(lastToolLabel)
if roundText:
await self._notifyMeetingEphemeral(sessionId, roundText)
except Exception as roundNoticeErr:
@ -3402,12 +3525,26 @@ class TeamsbotService:
f"Session {sessionId}: Per-round agent notice failed: {roundNoticeErr}"
)
elif event.type == AgentEventTypeEnum.TOOL_CALL:
toolName = (event.data or {}).get("toolName") if event.data else None
evtData = event.data or {}
toolName = evtData.get("toolName")
lastToolLabel = evtData.get("displayLabel")
await _emitSessionEvent(sessionId, "agentRun", {
"source": sourceLabel,
"promptId": promptId,
"status": "toolCall",
"toolName": toolName,
"displayLabel": lastToolLabel,
})
elif event.type == AgentEventTypeEnum.TOOL_RESULT:
evtData = event.data or {}
resultSnippet = (evtData.get("data") or "")[:200]
await _emitSessionEvent(sessionId, "agentRun", {
"source": sourceLabel,
"promptId": promptId,
"status": "toolResult",
"toolName": evtData.get("toolName", ""),
"success": evtData.get("success", True),
"summary": resultSnippet,
})
elif event.type == AgentEventTypeEnum.FILE_CREATED:
await _emitSessionEvent(sessionId, "documentCreated", event.data or {})

View file

@ -754,14 +754,35 @@ ANTI-PATTERNS (do NOT do this):
"""
# Parked for one release as a fallback while the ontology-based path rolls
# out (see `trusteeOntology.getTrusteeOntology()`). Remove together with the
# legacy ``_loadFeatureDomainHints`` path once Phase 2 is the only supplier
# of the trustee prompt block.
_AGENT_DOMAIN_HINTS_LEGACY = _AGENT_DOMAIN_HINTS
def getAgentDomainHints() -> str:
"""Return Trustee-specific guidance for the Feature Data Sub-Agent.
The text is appended verbatim to the sub-agent's system prompt by
``featureDataAgent._buildSchemaContext``. Keep it concise and
pattern-driven every line costs tokens on every sub-agent call.
Deprecated as of Phase 2 (2026-05). Prefer ``getAgentOntology()`` ->
``ontologyToPromptCompiler.compileOntologyToPrompt(...)``. The legacy
text remains available so callers that still go through
``_buildSchemaContext()`` keep working during the migration window.
"""
return _AGENT_DOMAIN_HINTS
return _AGENT_DOMAIN_HINTS_LEGACY
def getAgentOntology():
"""Return the structured ontology used by the Feature Data Sub-Agent.
Discovered by ``featureDataAgent._buildSchemaContext`` (Phase 2 path):
when this hook is present, the agent compiles its domain block from
the ontology instead of using the legacy free-text hints. The same
descriptor feeds the validator's NEVER_AGGREGATE constraints, so
prompt and validator stay in sync.
"""
from modules.features.trustee.trusteeOntology import getTrusteeOntology
return getTrusteeOntology()
def registerFeature(catalogService) -> bool:

View file

@ -0,0 +1,295 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Trustee feature ontology (Phase 2 pilot).
Replaces the hand-written ``_AGENT_DOMAIN_HINTS`` block with a structured
ontology so the Feature Data Sub-Agent's QueryValidator AND the prompt
compiler share the same source of truth: account-group conventions,
period-bucket semantics, the NEVER_AGGREGATE constraints on already-
aggregated columns, and canonical tool-call templates for the most
frequent user intents.
Both the validator (deterministic enforcement) and the prompt compiler
(LLM steering) read from this descriptor, so an LLM that follows the
prompt patterns will never trigger a validator failure -- and one that
ignores them gets a structured repair hint pointing back at the same
constraint.
The legacy ``_AGENT_DOMAIN_HINTS_LEGACY`` block stays parked in
``mainTrustee.py`` for one release as a fallback during rollout.
"""
from __future__ import annotations
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
CanonicalQueryPattern,
Cardinality,
Constraint,
ConstraintRule,
Entity,
Invariant,
OntologyDescriptor,
Relation,
SemanticType,
)
# ---------------------------------------------------------------------------
# Entities
# ---------------------------------------------------------------------------
_ENTITIES = [
Entity(
name="Account",
pythonClass="TrusteeDataAccount",
semanticType=SemanticType.ACCOUNT,
description=(
"Chart-of-accounts row (Konto). One row per accountNumber per "
"mandate. Identifies the account, never holds balances."
),
invariants=[
Invariant(description="accountNumber is a stable string identifier (e.g. '1020', '5400')."),
Invariant(description="accountType is one of: asset / liability / revenue / expense."),
],
),
Entity(
name="BankAccount",
pythonClass="TrusteeDataAccount",
semanticType=SemanticType.ACCOUNT,
parentEntity="Account",
description="Account subgroup with accountNumber LIKE '102%' (ZKB, PostFinance, UBS, ...).",
),
Entity(
name="CashAccount",
pythonClass="TrusteeDataAccount",
semanticType=SemanticType.ACCOUNT,
parentEntity="Account",
description="Account subgroup with accountNumber LIKE '100%' (Hauptkasse, Nebenkassen).",
),
Entity(
name="AccountBalance",
pythonClass="TrusteeDataAccountBalance",
semanticType=SemanticType.BALANCE_SNAPSHOT,
description=(
"Period-bucketed snapshot: one row per (account, year, month). "
"closingBalance is THE balance at end of period -- already aggregated."
),
invariants=[
Invariant(description="periodMonth=0 means annual total of periodYear (use for 'per 31.12.YYYY')."),
Invariant(description="periodMonth in 1..12 means month-end snapshot."),
Invariant(description="closingBalance is the balance at period end; openingBalance at period start."),
Invariant(description="debitTotal/creditTotal are turnovers for the period, NOT balances."),
],
),
Entity(
name="JournalEntry",
pythonClass="TrusteeDataJournalEntry",
semanticType=SemanticType.TRANSACTION,
description="One booking header (Beleg). Has a bookingDate (unix seconds float) and totalAmount.",
invariants=[
Invariant(description="bookingDate is a UTC unix-seconds float; never compare against ISO strings."),
],
),
Entity(
name="JournalLine",
pythonClass="TrusteeDataJournalLine",
semanticType=SemanticType.TRANSACTION,
description="One booking line of a JournalEntry. Each line debits or credits exactly one account.",
invariants=[
Invariant(description="Per line either debitAmount > 0 (Soll) or creditAmount > 0 (Haben), not both."),
],
),
]
# ---------------------------------------------------------------------------
# Relations
# ---------------------------------------------------------------------------
_RELATIONS = [
Relation(fromEntity="AccountBalance", toEntity="Account", cardinality=Cardinality.MANY_TO_ONE, via="accountNumber"),
Relation(fromEntity="JournalLine", toEntity="JournalEntry", cardinality=Cardinality.MANY_TO_ONE, via="journalEntryId"),
Relation(fromEntity="JournalLine", toEntity="Account", cardinality=Cardinality.MANY_TO_ONE, via="accountNumber"),
]
# ---------------------------------------------------------------------------
# Constraints (validator-enforced)
# ---------------------------------------------------------------------------
_CONSTRAINTS = [
# closingBalance is the single biggest hallucination magnet -- it's a
# balance per period, summing it across periods or accounts is meaningless.
Constraint(
appliesTo="TrusteeDataAccountBalance.closingBalance",
rule=ConstraintRule.NEVER_AGGREGATE,
message=(
"closingBalance is per-period already; query with periodYear+periodMonth, never SUM/AVG it."
),
),
Constraint(
appliesTo="TrusteeDataAccountBalance.openingBalance",
rule=ConstraintRule.NEVER_AGGREGATE,
message="openingBalance is already a balance per period; do not SUM/AVG it across rows.",
),
Constraint(
appliesTo="TrusteeDataAccountBalance.debitTotal",
rule=ConstraintRule.NEVER_AGGREGATE,
message=(
"debitTotal is the period's debit TURNOVER; do not SUM it without an explicit period filter."
),
),
Constraint(
appliesTo="TrusteeDataAccountBalance.creditTotal",
rule=ConstraintRule.NEVER_AGGREGATE,
message="creditTotal is a per-period turnover; do not SUM it across periods without an explicit period filter.",
),
# AccountBalance queries without a period filter are almost always wrong --
# they conflate annual and monthly snapshots. Phase 2 (REQUIRES_FILTER_ON)
# is wired through to the validator in a later iteration; for now this
# rule is rendered into the prompt compiler so the LLM sees it explicitly.
Constraint(
appliesTo="TrusteeDataAccountBalance",
rule=ConstraintRule.REQUIRES_FILTER_ON,
message=(
"Always filter on periodYear AND periodMonth (use periodMonth=0 for end-of-year)."
),
params={"requiredFields": ["periodYear", "periodMonth"]},
),
Constraint(
appliesTo="TrusteeDataAccountBalance",
rule=ConstraintRule.PREFERRED_TABLE_FOR_INTENT,
message="For 'Saldo per <date>' and 'Stand <year>' questions, prefer AccountBalance over JournalLine.",
params={"intents": ["BANK_BALANCE_AT_DATE", "BALANCE_AT_YEAR_END"]},
),
]
# ---------------------------------------------------------------------------
# Canonical query patterns (worked examples for the LLM)
# ---------------------------------------------------------------------------
_CANONICAL_PATTERNS = [
CanonicalQueryPattern(
intent="BANK_BALANCE_AT_DATE",
description="Saldo eines Bankkontos per Jahresende.",
pattern={
"tool": "queryTable",
"tableName": "TrusteeDataAccountBalance",
"filters": [
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
{"field": "periodYear", "op": "=", "value": "<year>"},
{"field": "periodMonth", "op": "=", "value": 0},
],
"fields": ["closingBalance", "currency"],
},
),
CanonicalQueryPattern(
intent="BANK_GROUP_TOTAL_AT_DATE",
description="Summe einer Kontogruppe (z. B. alle Bankkonten 102%) per Jahresende.",
pattern={
"tool": "queryTable",
"tableName": "TrusteeDataAccountBalance",
"filters": [
{"field": "accountNumber", "op": "LIKE", "value": "<prefix>%"},
{"field": "periodYear", "op": "=", "value": "<year>"},
{"field": "periodMonth", "op": "=", "value": 0},
],
"fields": ["accountNumber", "closingBalance", "currency"],
"_postProcessing": "Sum closingBalance values in your final answer; do NOT SUM via aggregateTable.",
},
),
CanonicalQueryPattern(
intent="BALANCE_HISTORY_PER_YEAR",
description="Saldo-Verlauf eines Kontos ueber mehrere Jahre.",
pattern={
"tool": "queryTable",
"tableName": "TrusteeDataAccountBalance",
"filters": [
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
{"field": "periodMonth", "op": "=", "value": 0},
],
"fields": ["periodYear", "closingBalance", "currency"],
"orderBy": "periodYear",
},
),
CanonicalQueryPattern(
intent="MONTHLY_BALANCE_SNAPSHOT",
description="Saldo per Ende eines bestimmten Monats.",
pattern={
"tool": "queryTable",
"tableName": "TrusteeDataAccountBalance",
"filters": [
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
{"field": "periodYear", "op": "=", "value": "<year>"},
{"field": "periodMonth", "op": "=", "value": "<month 1..12>"},
],
"fields": ["closingBalance", "currency"],
},
),
CanonicalQueryPattern(
intent="ACCOUNT_LIST_BY_TYPE_OR_PREFIX",
description="Welche Konten gehoeren zu einer Gruppe (Typ oder Nummern-Prefix)?",
pattern={
"tool": "queryTable",
"tableName": "TrusteeDataAccount",
"filters": [
{"field": "accountNumber", "op": "LIKE", "value": "<prefix>%"},
],
"fields": ["accountNumber", "label", "accountType"],
},
),
CanonicalQueryPattern(
intent="JOURNAL_SUM_AT_ACCOUNT",
description="Summe der Soll- oder Haben-Buchungen auf einem Konto.",
pattern={
"tool": "aggregateTable",
"tableName": "TrusteeDataJournalLine",
"aggregate": "SUM",
"field": "debitAmount",
"filters": [
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
],
},
),
CanonicalQueryPattern(
intent="COUNT_ROWS",
description="Anzahl Buchungen / Buchungszeilen / Konten.",
pattern={
"tool": "aggregateTable",
"tableName": "<table>",
"aggregate": "COUNT",
"field": "id",
},
),
CanonicalQueryPattern(
intent="JOURNAL_LINES_BY_AMOUNT",
description="Buchungszeilen mit einem Betrag groesser/kleiner als einer Schwelle.",
pattern={
"tool": "queryTable",
"tableName": "TrusteeDataJournalLine",
"filters": [
{"field": "debitAmount", "op": ">", "value": "<amount>"},
],
"fields": ["accountNumber", "debitAmount", "description"],
},
),
]
_TRUSTEE_ONTOLOGY = OntologyDescriptor(
featureCode="trustee",
entities=_ENTITIES,
relations=_RELATIONS,
constraints=_CONSTRAINTS,
canonicalPatterns=_CANONICAL_PATTERNS,
)
def getTrusteeOntology() -> OntologyDescriptor:
"""Public accessor for the trustee ontology.
Cached as a module-level singleton -- the descriptor is immutable and
has no per-call state.
"""
return _TRUSTEE_ONTOLOGY

View file

@ -33,11 +33,6 @@ UI_OBJECTS = [
"label": t("Einstellungen", context="UI"),
"meta": {"area": "settings"}
},
{
"objectKey": "ui.feature.workspace.rag-insights",
"label": t("Wissens-Insights", context="UI"),
"meta": {"area": "rag-insights"},
},
]
RESOURCE_OBJECTS = [
@ -86,7 +81,6 @@ TEMPLATE_ROLES = [
{"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
{"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
{"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
{"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True},
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "n", "update": "n", "delete": "n"},
]
},
@ -97,7 +91,6 @@ TEMPLATE_ROLES = [
{"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
{"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
{"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
{"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True},
{"context": "RESOURCE", "item": "resource.feature.workspace.start", "view": True},
{"context": "RESOURCE", "item": "resource.feature.workspace.stop", "view": True},
{"context": "RESOURCE", "item": "resource.feature.workspace.files", "view": True},

View file

@ -2192,49 +2192,4 @@ async def putWorkspaceUserSettings(
# =========================================================================
# RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
# =========================================================================
def _collectWorkspaceFileIdsForStats(instanceId: str, mandateId: Optional[str]) -> List[str]:
"""All FileItem ids for this feature instance (any user). Knowledge rows are often stored
without featureInstanceId; we correlate by file id from the Management DB."""
from modules.datamodels.datamodelFiles import FileItem
from modules.interfaces.interfaceDbManagement import ComponentObjects
co = ComponentObjects()
rows = co.db.getRecordset(FileItem, recordFilter={"featureInstanceId": instanceId})
out: List[str] = []
m = str(mandateId) if mandateId else ""
for r in rows or []:
rid = r.get("id") if isinstance(r, dict) else getattr(r, "id", None)
if not rid:
continue
if m:
mid = r.get("mandateId") if isinstance(r, dict) else getattr(r, "mandateId", "") or ""
if mid and mid != m:
continue
out.append(str(rid))
return out
@router.get("/{instanceId}/rag-statistics")
@limiter.limit("60/minute")
async def getRagStatistics(
request: Request,
instanceId: str = Path(...),
days: int = Query(90, ge=7, le=365, description="Timeline window in days"),
context: RequestContext = Depends(getRequestContext),
):
"""Aggregated, non-identifying knowledge-store metrics for this workspace instance."""
mandateId, _instanceConfig = _validateInstanceAccess(instanceId, context)
workspaceFileIds = _collectWorkspaceFileIdsForStats(instanceId, mandateId)
kdb = getKnowledgeInterface(context.user)
stats = kdb.getRagStatisticsForInstance(
featureInstanceId=instanceId,
mandateId=str(mandateId) if mandateId else "",
timelineDays=days,
workspaceFileIds=workspaceFileIds,
)
if isinstance(stats, dict):
stats.setdefault("scope", {})
stats["scope"]["workspaceFileIdsResolved"] = len(workspaceFileIds)
return JSONResponse(stats)

View file

@ -133,6 +133,60 @@ class KnowledgeObjects:
return {"indexRows": indexCount, "chunks": chunkCount}
def deleteFileContentIndexByDataSource(self, dataSourceId: str) -> Dict[str, int]:
"""Delete all FileContentIndex rows whose provenance.dataSourceId matches.
Used when a user disables ragIndexEnabled on a DataSource to purge
only those chunks that were ingested from that specific tree element.
"""
if not dataSourceId:
return {"indexRows": 0, "chunks": 0}
allRows = self.db.getRecordset(FileContentIndex)
matchedRows = []
for row in allRows:
prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None)
if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId:
matchedRows.append(row)
mandateIds: set = set()
chunkCount = 0
indexCount = 0
for row in matchedRows:
fid = row.get("id") if isinstance(row, dict) else getattr(row, "id", None)
mid = row.get("mandateId") if isinstance(row, dict) else getattr(row, "mandateId", "")
if not fid:
continue
chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fid})
for chunk in chunks:
if self.db.recordDelete(ContentChunk, chunk["id"]):
chunkCount += 1
if self.db.recordDelete(FileContentIndex, fid):
indexCount += 1
if mid:
mandateIds.add(str(mid))
for mid in mandateIds:
try:
from modules.interfaces.interfaceDbBilling import _getRootInterface
_getRootInterface().reconcileMandateStorageBilling(mid)
except Exception as ex:
logger.warning("reconcileMandateStorageBilling after datasource purge failed: %s", ex)
return {"indexRows": indexCount, "chunks": chunkCount}
def listFileContentIndexByDataSource(self, dataSourceId: str) -> List[Dict[str, Any]]:
"""List all FileContentIndex rows whose provenance.dataSourceId matches."""
if not dataSourceId:
return []
allRows = self.db.getRecordset(FileContentIndex)
out = []
for row in allRows:
prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None)
if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId:
out.append(dict(row) if not isinstance(row, dict) else row)
return out
def deleteFileContentIndex(self, fileId: str) -> bool:
"""Delete a FileContentIndex and all associated ContentChunks."""
existing = self.getFileContentIndex(fileId)

View file

@ -1274,17 +1274,20 @@ class ComponentObjects:
if getattr(permissions, "update", None) != AccessLevel.ALL:
raise PermissionError("Setting global scope requires ALL permission")
self.db.recordModify(FileFolder, folderId, {"scope": scope})
allFolderIds = self._collectChildFolderIds(folderId)
for fid in allFolderIds:
self.db.recordModify(FileFolder, fid, {"scope": scope})
filesUpdated = 0
if cascadeToFiles:
items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId})
for item in items:
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
if owner == self.userId:
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
self.db.recordModify(FileItem, iid, {"scope": scope})
filesUpdated += 1
for fid in allFolderIds:
items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid})
for item in items:
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
if owner == self.userId:
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
self.db.recordModify(FileItem, iid, {"scope": scope})
filesUpdated += 1
return {"folderId": folderId, "scope": scope, "filesUpdated": filesUpdated}
@ -1294,16 +1297,19 @@ class ComponentObjects:
raise FileNotFoundError(f"Folder {folderId} not found")
self._requireFolderWriteAccess(folder, folderId, "update")
self.db.recordModify(FileFolder, folderId, {"neutralize": neutralize})
allFolderIds = self._collectChildFolderIds(folderId)
for fid in allFolderIds:
self.db.recordModify(FileFolder, fid, {"neutralize": neutralize})
items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId})
filesUpdated = 0
for item in items:
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
if owner == self.userId:
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
self.db.recordModify(FileItem, iid, {"neutralize": neutralize})
filesUpdated += 1
for fid in allFolderIds:
items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid})
for item in items:
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
if owner == self.userId:
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
self.db.recordModify(FileItem, iid, {"neutralize": neutralize})
filesUpdated += 1
return {"folderId": folderId, "neutralize": neutralize, "filesUpdated": filesUpdated}

View file

@ -69,7 +69,10 @@ class VoiceObjects:
sampleRate: int = None, channels: int = None,
skipFallbacks: bool = False,
phraseHints: list = None,
alternativeLanguages: list = None) -> Dict[str, Any]:
alternativeLanguages: list = None,
model: str = "latest_long",
lightweight: bool = False,
audioFormat: Optional[str] = None) -> Dict[str, Any]:
"""
Convert speech to text using Google Cloud Speech-to-Text API.
@ -81,6 +84,9 @@ class VoiceObjects:
skipFallbacks: If True, skip fallback attempts (use when audio format is known)
phraseHints: Optional list of phrases to boost recognition (names, terms)
alternativeLanguages: Optional list of additional language codes for multi-language
model: Google STT model (e.g. latest_long, latest_short)
lightweight: If True, omit word-level features and enhanced model
audioFormat: If set (webm_opus, linear16, ...), skip format auto-detection
Returns:
Dict containing transcribed text, confidence, and metadata
@ -97,6 +103,9 @@ class VoiceObjects:
skipFallbacks=skipFallbacks,
phraseHints=phraseHints,
alternativeLanguages=alternativeLanguages,
model=model,
lightweight=lightweight,
audioFormat=audioFormat,
)
if result["success"]:
@ -120,13 +129,23 @@ class VoiceObjects:
audioQueue: asyncio.Queue,
language: str = "de-DE",
phraseHints: Optional[list] = None,
model: str = "latest_long",
lightweight: bool = False,
singleUtterance: bool = False,
) -> AsyncGenerator[Dict[str, Any], None]:
"""
Stream audio to Google Streaming STT and yield interim/final results.
Billing is recorded for each final result.
"""
connector = self._getGoogleSpeechConnector()
async for event in connector.streamingRecognize(audioQueue, language, phraseHints):
async for event in connector.streamingRecognize(
audioQueue,
language,
phraseHints,
model=model,
lightweight=lightweight,
singleUtterance=singleUtterance,
):
if event.get("isFinal") and self.billingCallback:
durationSec = event.get("audioDurationSec", 0)
priceCHF = connector.calculateSttCostCHF(durationSec)

View file

@ -0,0 +1,217 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""STT Benchmark route — compare Speech-to-Text v1 (latest_long) vs v2 (Chirp 2).
Sysadmin-only page for evaluating STT model quality and latency.
"""
import json
import time
import logging
from typing import Any, Dict
from fastapi import APIRouter, HTTPException, Depends, Request, UploadFile, File, Form
from modules.auth import limiter, getCurrentUser
from modules.datamodels.datamodelUam import User
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/api/admin/stt-benchmark",
tags=["Admin STT Benchmark"],
responses={401: {"description": "Unauthorized"}, 403: {"description": "Forbidden"}},
)
def _requireSysAdmin(currentUser: User = Depends(getCurrentUser)) -> User:
if not getattr(currentUser, "isSysAdmin", False) and not getattr(currentUser, "isPlatformAdmin", False):
raise HTTPException(status_code=403, detail="SysAdmin required")
return currentUser
def _getCredentials():
apiKey = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
if not apiKey or apiKey.startswith("YOUR_"):
raise HTTPException(status_code=500, detail="Google Speech API key not configured")
from google.oauth2 import service_account
return service_account.Credentials.from_service_account_info(json.loads(apiKey))
def _runV1(audioBytes: bytes, language: str, model: str) -> Dict[str, Any]:
"""Run Speech-to-Text v1 recognition."""
from google.cloud import speech
credentials = _getCredentials()
client = speech.SpeechClient(credentials=credentials)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
language_code=language,
model=model,
enable_automatic_punctuation=True,
enable_word_time_offsets=True,
enable_word_confidence=True,
max_alternatives=3,
use_enhanced=True,
)
audio = speech.RecognitionAudio(content=audioBytes)
t0 = time.perf_counter()
response = client.recognize(config=config, audio=audio)
elapsed = time.perf_counter() - t0
results = []
for r in response.results:
for alt in r.alternatives:
results.append({
"transcript": alt.transcript,
"confidence": round(alt.confidence, 4),
"words": len(alt.words) if alt.words else 0,
})
return {
"api": "v1",
"model": model,
"latencyMs": round(elapsed * 1000, 1),
"results": results,
"resultCount": len(response.results),
}
def _runV2(audioBytes: bytes, language: str, model: str, location: str) -> Dict[str, Any]:
"""Run Speech-to-Text v2 recognition (Chirp 2)."""
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech
credentials = _getCredentials()
credInfo = json.loads(APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET"))
projectId = credInfo.get("project_id", "")
client = SpeechClient(
credentials=credentials,
client_options={"api_endpoint": f"{location}-speech.googleapis.com"},
)
config = cloud_speech.RecognitionConfig(
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
language_codes=[language],
model=model,
features=cloud_speech.RecognitionFeatures(
enable_automatic_punctuation=True,
enable_word_time_offsets=True,
enable_word_confidence=True,
),
)
recognizer = f"projects/{projectId}/locations/{location}/recognizers/_"
request = cloud_speech.RecognizeRequest(
recognizer=recognizer,
config=config,
content=audioBytes,
)
t0 = time.perf_counter()
response = client.recognize(request=request)
elapsed = time.perf_counter() - t0
results = []
for r in response.results:
for alt in r.alternatives:
results.append({
"transcript": alt.transcript,
"confidence": round(alt.confidence, 4),
"words": len(alt.words) if alt.words else 0,
})
return {
"api": "v2",
"model": model,
"location": location,
"latencyMs": round(elapsed * 1000, 1),
"results": results,
"resultCount": len(getattr(response, "results", [])),
}
@router.post("/run")
@limiter.limit("10/minute")
async def runBenchmark(
request: Request,
file: UploadFile = File(...),
language: str = Form(default="de-DE"),
v1Model: str = Form(default="latest_long"),
v2Model: str = Form(default="chirp_2"),
v2Location: str = Form(default="europe-west4"),
currentUser: User = Depends(_requireSysAdmin),
) -> Dict[str, Any]:
"""Upload audio and compare v1 vs v2 STT results."""
audioBytes = await file.read()
if len(audioBytes) > 10 * 1024 * 1024:
raise HTTPException(status_code=400, detail="Audio file too large (max 10 MB)")
if len(audioBytes) < 100:
raise HTTPException(status_code=400, detail="Audio file too small")
logger.info("STT benchmark: %s, %d bytes, language=%s, v1=%s, v2=%s@%s",
file.filename, len(audioBytes), language, v1Model, v2Model, v2Location)
v1Result = None
v1Error = None
try:
v1Result = _runV1(audioBytes, language, v1Model)
except Exception as e:
v1Error = str(e)
logger.warning("STT v1 benchmark failed: %s", e)
v2Result = None
v2Error = None
try:
v2Result = _runV2(audioBytes, language, v2Model, v2Location)
except Exception as e:
v2Error = str(e)
logger.warning("STT v2 benchmark failed: %s", e)
return {
"filename": file.filename,
"fileSizeBytes": len(audioBytes),
"language": language,
"v1": v1Result or {"error": v1Error},
"v2": v2Result or {"error": v2Error},
}
@router.get("/models")
@limiter.limit("30/minute")
async def getAvailableModels(
request: Request,
currentUser: User = Depends(_requireSysAdmin),
) -> Dict[str, Any]:
"""Return available STT models for the benchmark UI."""
return {
"v1Models": [
{"value": "latest_long", "label": "latest_long (default)"},
{"value": "latest_short", "label": "latest_short"},
{"value": "phone_call", "label": "phone_call"},
{"value": "video", "label": "video"},
{"value": "command_and_search", "label": "command_and_search"},
],
"v2Models": [
{"value": "chirp_2", "label": "Chirp 2 (recommended)"},
{"value": "chirp", "label": "Chirp (original)"},
{"value": "long", "label": "long"},
{"value": "short", "label": "short"},
],
"locations": [
{"value": "europe-west4", "label": "Europe West (NL)"},
{"value": "us-central1", "label": "US Central"},
{"value": "asia-southeast1", "label": "Asia Southeast"},
],
"languages": [
{"value": "de-DE", "label": "Deutsch (DE)"},
{"value": "de-CH", "label": "Deutsch (CH)"},
{"value": "en-US", "label": "English (US)"},
{"value": "en-GB", "label": "English (GB)"},
{"value": "fr-FR", "label": "Francais (FR)"},
{"value": "it-IT", "label": "Italiano (IT)"},
],
}

View file

@ -1986,10 +1986,10 @@ def getUserViewTransactions(
if not pagination:
raise HTTPException(status_code=400, detail="pagination required for groupSummary")
import json as _json
from collections import defaultdict
from modules.interfaces.interfaceDbApp import getInterface as getAppInterface
from modules.routes.routeHelpers import (
applyViewToParams,
build_group_summary_groups,
effective_group_by_levels,
resolveView,
)
@ -2018,28 +2018,7 @@ def getUserViewTransactions(
summary_params,
ctx.user,
)
counts: Dict[str, int] = defaultdict(int)
labels: Dict[str, str] = {}
null_key = "\x00NULL"
for item in all_rows:
raw = item.get(field)
if raw is None or raw == "":
nk = null_key
labels[nk] = null_label
else:
nk = str(raw)
if nk not in labels:
labels[nk] = nk
counts[nk] += 1
groups_out: List[Dict[str, Any]] = []
for nk in sorted(counts.keys(), key=lambda x: (x == null_key, labels.get(x, x).lower())):
groups_out.append(
{
"value": None if nk == null_key else nk,
"label": labels.get(nk, nk),
"totalCount": counts[nk],
}
)
groups_out = build_group_summary_groups(all_rows, field, null_label, groupByLevels=levels)
return JSONResponse(content={"groups": groups_out})
paginationParams = None

View file

@ -130,7 +130,7 @@ def get_auth_authority_options(
# ============================================================================
@router.get("/")
@limiter.limit("30/minute")
@limiter.limit("60/minute")
async def get_connections(
request: Request,
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
@ -197,7 +197,9 @@ async def get_connections(
"lastChecked": connection.lastChecked,
"expiresAt": connection.expiresAt,
"tokenStatus": tokenStatus,
"tokenExpiresAt": tokenExpiresAt
"tokenExpiresAt": tokenExpiresAt,
"knowledgeIngestionEnabled": getattr(connection, "knowledgeIngestionEnabled", False),
"knowledgePreferences": getattr(connection, "knowledgePreferences", None) or {},
})
return items
@ -264,7 +266,7 @@ async def get_connections(
})
enrichRowsWithFkLabels(enhanced_connections_dict, UserConnection)
filtered = apply_strategy_b_filters_and_sort(enhanced_connections_dict, paginationParams, currentUser)
groups_out = build_group_summary_groups(filtered, field, null_label)
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
return JSONResponse(content={"groups": groups_out})
try:
@ -724,4 +726,161 @@ def delete_connection(
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to delete connection: {str(e)}"
)
)
# =========================================================================
# Knowledge Consent & Control Endpoints
# =========================================================================
def _findOwnConnection(interface, userId: str, connectionId: str):
"""Find a connection owned by the user. Returns None if not found."""
connections = interface.getUserConnections(userId)
for conn in connections:
if conn.id == connectionId:
return conn
return None
@router.patch("/{connectionId}/knowledge-consent")
@limiter.limit("10/minute")
async def _updateKnowledgeConsent(
request: Request,
connectionId: str = Path(..., description="Connection ID"),
enabled: bool = Body(..., embed=True),
currentUser: User = Depends(getCurrentUser),
) -> Dict[str, Any]:
"""Master switch: can PowerOn ingest data from this connection into the RAG knowledge store?
enabled=False: purge ALL chunks for this connection + cancel running jobs.
enabled=True: set flag; enqueue bootstrap only if rag-enabled DataSources exist.
"""
try:
interface = getInterface(currentUser)
connection = _findOwnConnection(interface, currentUser.id, connectionId)
if not connection:
raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
from modules.interfaces.interfaceDbApp import getRootInterface
rootIf = getRootInterface()
rootIf.db.recordModify(UserConnection, connectionId, {"knowledgeIngestionEnabled": enabled})
purged = None
cancelled = 0
bootstrapEnqueued = False
if not enabled:
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
purged = getKnowledgeInterface(None).deleteFileContentIndexByConnectionId(connectionId)
from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection
cancelled = cancelJobsByConnection(connectionId)
else:
from modules.datamodels.datamodelDataSource import DataSource
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId, "ragIndexEnabled": True})
if dataSources:
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
authority = connection.authority.value if hasattr(connection.authority, "value") else str(connection.authority or "")
await startJob(
"connection.bootstrap",
{"connectionId": connectionId, "authority": authority.lower()},
triggeredBy=str(currentUser.id),
)
bootstrapEnqueued = True
import json as _json
from modules.shared.auditLogger import audit_logger
from modules.datamodels.datamodelAudit import AuditCategory
audit_logger.logEvent(
userId=str(currentUser.id),
mandateId=str(getattr(connection, "mandateId", "") or ""),
category=AuditCategory.PERMISSION.value,
action="knowledge_consent_changed",
details=_json.dumps({"connectionId": connectionId, "enabled": enabled}),
)
logger.info("Knowledge consent %s for connection %s by user %s",
"enabled" if enabled else "disabled", connectionId, currentUser.id)
return {
"connectionId": connectionId,
"knowledgeIngestionEnabled": enabled,
"purged": purged,
"cancelledJobs": cancelled,
"bootstrapEnqueued": bootstrapEnqueued,
}
except HTTPException:
raise
except Exception as e:
logger.error("Error updating knowledge consent: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/{connectionId}/knowledge-preferences")
@limiter.limit("20/minute")
def _updateKnowledgePreferences(
request: Request,
connectionId: str = Path(..., description="Connection ID"),
preferences: Dict[str, Any] = Body(..., embed=True),
currentUser: User = Depends(getCurrentUser),
) -> Dict[str, Any]:
"""Update per-connection knowledge ingestion preferences (mail depth, attachments, etc.)."""
_ALLOWED_KEYS = {"mailContentDepth", "mailIndexAttachments", "filesIndexBinaries",
"clickupScope", "clickupIndexAttachments", "maxAgeDays"}
try:
interface = getInterface(currentUser)
connection = _findOwnConnection(interface, currentUser.id, connectionId)
if not connection:
raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
existing = getattr(connection, "knowledgePreferences", None) or {}
cleaned = {k: v for k, v in preferences.items() if k in _ALLOWED_KEYS}
merged = {**existing, **cleaned, "schemaVersion": 1}
from modules.interfaces.interfaceDbApp import getRootInterface
getRootInterface().db.recordModify(UserConnection, connectionId, {"knowledgePreferences": merged})
logger.info("Knowledge preferences updated for connection %s", connectionId)
return {"connectionId": connectionId, "knowledgePreferences": merged, "updated": True}
except HTTPException:
raise
except Exception as e:
logger.error("Error updating knowledge preferences: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/{connectionId}/knowledge-stop")
@limiter.limit("10/minute")
def _stopKnowledgeJobs(
request: Request,
connectionId: str = Path(..., description="Connection ID"),
currentUser: User = Depends(getCurrentUser),
) -> Dict[str, Any]:
"""Cancel all running/pending bootstrap jobs for this connection."""
try:
interface = getInterface(currentUser)
connection = _findOwnConnection(interface, currentUser.id, connectionId)
if not connection:
raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection
cancelled = cancelJobsByConnection(connectionId)
import json as _json
from modules.shared.auditLogger import audit_logger
from modules.datamodels.datamodelAudit import AuditCategory
audit_logger.logEvent(
userId=str(currentUser.id),
mandateId=str(getattr(connection, "mandateId", "") or ""),
category=AuditCategory.PERMISSION.value,
action="knowledge_jobs_stopped",
details=_json.dumps({"connectionId": connectionId, "cancelledCount": cancelled}),
)
logger.info("Stopped %d knowledge jobs for connection %s", cancelled, connectionId)
return {"connectionId": connectionId, "cancelled": cancelled}
except HTTPException:
raise
except Exception as e:
logger.error("Error stopping knowledge jobs: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

View file

@ -413,7 +413,7 @@ def patch_folder_scope(
scope = body.get("scope")
if not scope:
raise HTTPException(status_code=400, detail="scope is required")
cascadeToFiles = body.get("cascadeToFiles", False)
cascadeToFiles = body.get("cascadeChildren", body.get("cascadeToFiles", False))
managementInterface = interfaceDbManagement.getInterface(
currentUser,
mandateId=str(context.mandateId) if context.mandateId else None,
@ -543,7 +543,7 @@ def get_files(
FileItem,
)
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
groups_out = build_group_summary_groups(filtered, field, null_label)
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
return JSONResponse(content={"groups": groups_out})
if mode == "filterValues":

View file

@ -100,7 +100,7 @@ def get_prompts(
result if isinstance(result, list) else (result.items if hasattr(result, "items") else [])
)
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
groups_out = build_group_summary_groups(filtered, field, null_label)
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
return JSONResponse(content={"groups": groups_out})
if mode == "filterValues":

View file

@ -1,6 +1,6 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize tagging."""
"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize/rag-index tagging."""
import logging
from typing import Any, Dict, List, Optional
@ -125,3 +125,69 @@ def _updateNeutralizeFields(
except Exception as e:
logger.error("Error updating neutralizeFields: %s", e)
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/{sourceId}/rag-index")
@limiter.limit("30/minute")
async def _updateDataSourceRagIndex(
request: Request,
sourceId: str = Path(..., description="ID of the DataSource"),
ragIndexEnabled: bool = Body(..., embed=True),
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]:
"""Toggle RAG indexing for a DataSource.
true: sets flag + enqueues mini-bootstrap for this DataSource only.
false: sets flag + synchronously purges all chunks from this DataSource.
Must be `async def` so `await startJob(...)` registers `_runJob` in the
main event loop. Sync route worker thread temporary loop closes
before the task runs job stays stuck forever.
"""
try:
from modules.interfaces.interfaceDbApp import getRootInterface
rootIf = getRootInterface()
rec = rootIf.db.getRecord(DataSource, sourceId)
if not rec:
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
rootIf.db.recordModify(DataSource, sourceId, {"ragIndexEnabled": ragIndexEnabled})
logger.info("Updated ragIndexEnabled=%s for DataSource %s", ragIndexEnabled, sourceId)
if ragIndexEnabled:
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
connectionId = rec.get("connectionId") or rec.get("connection_id") or ""
conn = rootIf.getUserConnectionById(connectionId) if connectionId else None
authority = ""
if conn:
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
await startJob(
"connection.bootstrap",
{"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": [sourceId]},
triggeredBy=str(context.user.id),
)
else:
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
purgeResult = getKnowledgeInterface(None).deleteFileContentIndexByDataSource(sourceId)
logger.info("Purged %d index rows / %d chunks for DataSource %s",
purgeResult.get("indexRows", 0), purgeResult.get("chunks", 0), sourceId)
import json
from modules.shared.auditLogger import audit_logger
from modules.datamodels.datamodelAudit import AuditCategory
audit_logger.logEvent(
userId=str(context.user.id),
mandateId=context.mandateId,
category=AuditCategory.PERMISSION.value,
action="rag_index_toggled",
details=json.dumps({"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled}),
)
return {"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled, "updated": True}
except HTTPException:
raise
except Exception as e:
logger.error("Error updating datasource ragIndexEnabled: %s", e)
raise HTTPException(status_code=500, detail=str(e))

View file

@ -825,45 +825,106 @@ def build_group_summary_groups(
items: List[Dict[str, Any]],
field: str,
null_label: str = "",
groupByLevels: List[Dict[str, Any]] | None = None,
) -> List[Dict[str, Any]]:
"""
Build {"value", "label", "totalCount"} for mode=groupSummary (single grouping level).
Build {"value", "label", "totalCount"} summaries for mode=groupSummary.
When *groupByLevels* contains more than one level the function produces one
entry per unique combination of all level values (flat permutations).
``value`` becomes a ``///``-joined composite key and ``label`` the ``/``-joined
human-readable label so the frontend can split them back.
"""
from collections import defaultdict
counts: Dict[str, int] = defaultdict(int)
display_by_key: Dict[str, str] = {}
null_key = "\x00NULL"
label_attr = f"{field}Label"
fields: list[dict] = []
if groupByLevels and len(groupByLevels) > 1:
for lvl in groupByLevels:
f = lvl.get("field", "")
nl = str(lvl.get("nullLabel") or null_label)
if f:
fields.append({"field": f, "nullLabel": nl})
if not fields:
fields = [{"field": field, "nullLabel": null_label}]
nullKey = "\x00NULL"
if len(fields) == 1:
f = fields[0]["field"]
nl = fields[0]["nullLabel"]
counts: Dict[str, int] = defaultdict(int)
displayByKey: Dict[str, str] = {}
labelAttr = f"{f}Label"
for item in items:
raw = item.get(f)
if raw is None or raw == "":
nk = nullKey
display = nl
else:
nk = str(raw)
display = None
lbl = item.get(labelAttr)
if lbl is not None and lbl != "":
display = str(lbl)
if display is None:
display = nk
counts[nk] += 1
if nk not in displayByKey:
displayByKey[nk] = display
orderedKeys = sorted(
counts.keys(),
key=lambda x: (x == nullKey, str(displayByKey.get(x, x)).lower()),
)
return [
{
"value": None if nk == nullKey else nk,
"label": displayByKey.get(nk, nk),
"totalCount": counts[nk],
}
for nk in orderedKeys
]
counts = defaultdict(int)
displayByComposite: Dict[str, list] = {}
filtersByComposite: Dict[str, dict] = {}
for item in items:
raw = item.get(field)
if raw is None or raw == "":
nk = null_key
display = null_label
else:
nk = str(raw)
display = None
lbl = item.get(label_attr)
if lbl is not None and lbl != "":
display = str(lbl)
if display is None:
display = nk
counts[nk] += 1
if nk not in display_by_key:
display_by_key[nk] = display
parts: list[str] = []
labels: list[str] = []
filterMap: dict = {}
for fd in fields:
f = fd["field"]
nl = fd["nullLabel"]
labelAttr = f"{f}Label"
raw = item.get(f)
if raw is None or raw == "":
parts.append(nullKey)
labels.append(nl)
filterMap[f] = None
else:
parts.append(str(raw))
lbl = item.get(labelAttr)
labels.append(str(lbl) if lbl not in (None, "") else str(raw))
filterMap[f] = str(raw)
compositeKey = "///".join(parts)
counts[compositeKey] += 1
if compositeKey not in displayByComposite:
displayByComposite[compositeKey] = labels
filtersByComposite[compositeKey] = filterMap
ordered_keys = sorted(
orderedKeys = sorted(
counts.keys(),
key=lambda x: (x == null_key, str(display_by_key.get(x, x)).lower()),
key=lambda x: tuple(
(seg == nullKey, seg.lower()) for seg in x.split("///")
),
)
return [
{
"value": None if nk == null_key else nk,
"label": display_by_key.get(nk, nk),
"totalCount": counts[nk],
"value": ck.replace(nullKey, "__null__") if nullKey in ck else ck,
"label": " / ".join(displayByComposite[ck]),
"totalCount": counts[ck],
"filters": filtersByComposite[ck],
}
for nk in ordered_keys
for ck in orderedKeys
]

View file

@ -0,0 +1,302 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""RAG Inventory API — global knowledge-store visibility for users, admins, platform."""
import logging
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, HTTPException, Depends, Request
from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext
from modules.datamodels.datamodelUam import User
from modules.shared.i18nRegistry import apiRouteContext
routeApiMsg = apiRouteContext("routeRagInventory")
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/api/rag/inventory",
tags=["RAG Inventory"],
responses={
401: {"description": "Unauthorized"},
403: {"description": "Forbidden"},
500: {"description": "Internal server error"},
},
)
def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> List[Dict[str, Any]]:
from modules.datamodels.datamodelDataSource import DataSource
from modules.datamodels.datamodelKnowledge import FileContentIndex
out = []
for conn in connections:
connectionId = str(conn.id)
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
connIndexRows = knowledgeIf.db.getRecordset(FileContentIndex, recordFilter={"connectionId": connectionId})
connChunkTotal = len(connIndexRows)
chunksByDs: Dict[str, int] = {}
unassigned = 0
for idx in connIndexRows:
struct = (idx.get("structure") if isinstance(idx, dict) else getattr(idx, "structure", None)) or {}
ingestion = struct.get("_ingestion") or {} if isinstance(struct, dict) else {}
prov = ingestion.get("provenance") or {} if isinstance(ingestion, dict) else {}
dsIdRef = prov.get("dataSourceId", "") if isinstance(prov, dict) else ""
if dsIdRef:
chunksByDs[dsIdRef] = chunksByDs.get(dsIdRef, 0) + 1
else:
unassigned += 1
seen: Dict[str, bool] = {}
dsItems = []
for ds in dataSources:
dsId = ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")
dsPath = ds.get("path") if isinstance(ds, dict) else getattr(ds, "path", "")
if dsPath in seen:
continue
seen[dsPath] = True
dsItems.append({
"id": dsId,
"label": ds.get("label") if isinstance(ds, dict) else getattr(ds, "label", ""),
"path": dsPath,
"sourceType": ds.get("sourceType") if isinstance(ds, dict) else getattr(ds, "sourceType", ""),
"ragIndexEnabled": ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False),
"neutralize": ds.get("neutralize") if isinstance(ds, dict) else getattr(ds, "neutralize", False),
"lastIndexed": ds.get("lastIndexed") if isinstance(ds, dict) else getattr(ds, "lastIndexed", None),
"chunkCount": chunksByDs.get(dsId, 0),
})
if unassigned > 0 and len(dsItems) > 0:
perDs = unassigned // len(dsItems)
remainder = unassigned % len(dsItems)
for i, item in enumerate(dsItems):
item["chunkCount"] += perDs + (1 if i < remainder else 0)
# Pull a wider window than the previous 5 so the "last successful
# sync" is found even if a connection has many recent jobs queued.
jobs = jobService.listJobs(jobType="connection.bootstrap", limit=50)
connJobs = [j for j in jobs if (j.get("payload") or {}).get("connectionId") == connectionId]
runningJobs = [
{"jobId": j["id"], "progress": j.get("progress", 0), "progressMessage": j.get("progressMessage", "")}
for j in connJobs
if j.get("status") in ("PENDING", "RUNNING")
]
lastError: Optional[Dict[str, Any]] = None
lastSuccess: Optional[Dict[str, Any]] = None
for j in connJobs:
status = j.get("status")
if status == "ERROR" and lastError is None:
lastError = {
"jobId": j["id"],
"errorMessage": j.get("errorMessage", ""),
"finishedAt": j.get("finishedAt"),
}
elif status == "SUCCESS" and lastSuccess is None:
result = j.get("result") or {}
lastSuccess = {
"jobId": j["id"],
"finishedAt": j.get("finishedAt"),
"indexed": result.get("indexed", 0),
"skippedDuplicate": result.get("skippedDuplicate", 0),
"skippedPolicy": result.get("skippedPolicy", 0),
"failed": result.get("failed", 0),
"durationMs": result.get("durationMs", 0),
}
if lastError and lastSuccess:
break
out.append({
"id": connectionId,
"authority": conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority),
"externalEmail": getattr(conn, "externalEmail", ""),
"knowledgeIngestionEnabled": getattr(conn, "knowledgeIngestionEnabled", False),
"preferences": getattr(conn, "knowledgePreferences", None) or {},
"dataSources": dsItems,
"totalChunks": connChunkTotal,
"runningJobs": runningJobs,
"lastError": lastError,
"lastSuccess": lastSuccess,
})
return out
@router.get("/me")
@limiter.limit("30/minute")
def _getInventoryMe(
request: Request,
currentUser: User = Depends(getCurrentUser),
) -> Dict[str, Any]:
"""Personal RAG inventory: own connections + DataSources + chunk counts."""
try:
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
rootIf = getRootInterface()
knowledgeIf = getKnowledgeInterface(None)
connections = rootIf.getUserConnections(currentUser.id)
items = _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService)
totalChunks = sum(c.get("totalChunks", 0) for c in items)
return {"connections": items, "totals": {"chunks": totalChunks}}
except Exception as e:
logger.error("Error in RAG inventory /me: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/mandate")
@limiter.limit("20/minute")
def _getInventoryMandate(
request: Request,
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]:
"""Mandate-level RAG aggregation (requires mandate membership)."""
if not context.mandateId:
raise HTTPException(status_code=403, detail=routeApiMsg("Mandate context required"))
try:
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface, aggregateMandateRagTotalBytes
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
rootIf = getRootInterface()
knowledgeIf = getKnowledgeInterface(None)
mandateId = str(context.mandateId) if context.mandateId else ""
from modules.datamodels.datamodelUam import UserConnection
allConnections = rootIf.db.getRecordset(UserConnection, recordFilter={"mandateId": mandateId})
connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
totalChunks = sum(c.get("totalChunks", 0) for c in items)
totalBytes = aggregateMandateRagTotalBytes(mandateId)
return {"connections": items, "totals": {"chunks": totalChunks, "bytes": totalBytes}}
except HTTPException:
raise
except Exception as e:
logger.error("Error in RAG inventory /mandate: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/platform")
@limiter.limit("10/minute")
def _getInventoryPlatform(
request: Request,
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]:
"""Platform-wide RAG statistics (sysadmin only)."""
if not context.isSysAdmin:
raise HTTPException(status_code=403, detail=routeApiMsg("Platform admin required"))
try:
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
from modules.datamodels.datamodelUam import UserConnection
rootIf = getRootInterface()
knowledgeIf = getKnowledgeInterface(None)
allConnections = rootIf.db.getRecordset(UserConnection)
connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
totalChunks = sum(c.get("totalChunks", 0) for c in items)
return {"connections": items, "totals": {"chunks": totalChunks}}
except HTTPException:
raise
except Exception as e:
logger.error("Error in RAG inventory /platform: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/reindex/{connectionId}")
@limiter.limit("10/minute")
async def _reindexConnection(
request: Request,
connectionId: str,
currentUser: User = Depends(getCurrentUser),
) -> Dict[str, Any]:
"""Re-trigger bootstrap for a connection (re-index all ragIndexEnabled DataSources).
Submits a new connection.bootstrap job, regardless of previous failures.
Must be `async def` so `await startJob(...)` registers the `_runJob` task
in FastAPI's main event loop. A sync route would land in the worker
threadpool and `asyncio.run` would tear down the temporary loop right
after `create_task`, leaving the job stuck in PENDING forever.
"""
try:
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
from modules.datamodels.datamodelDataSource import DataSource
rootIf = getRootInterface()
conn = rootIf.getUserConnectionById(connectionId)
if conn is None:
raise HTTPException(status_code=404, detail="Connection not found")
if str(conn.userId) != str(currentUser.id):
raise HTTPException(status_code=403, detail="Not your connection")
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
ragDs = [ds for ds in dataSources if (ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False))]
if not ragDs:
return {"status": "skipped", "reason": "no_rag_enabled_datasources"}
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
dsIds = [(ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")) for ds in ragDs]
jobId = await startJob(
"connection.bootstrap",
{"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": dsIds},
triggeredBy=str(currentUser.id),
)
logger.info("Reindex triggered for connection %s (%d DataSources, jobId=%s)", connectionId, len(dsIds), jobId)
return {"status": "queued", "connectionId": connectionId, "dataSourceCount": len(dsIds), "jobId": jobId}
except HTTPException:
raise
except Exception as e:
logger.error("Error triggering reindex: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/jobs")
@limiter.limit("60/minute")
def _getActiveJobs(
request: Request,
currentUser: User = Depends(getCurrentUser),
) -> List[Dict[str, Any]]:
"""Active RAG jobs for the current user (used by header badge)."""
try:
from modules.serviceCenter.services.serviceBackgroundJobs import listJobs
from modules.interfaces.interfaceDbApp import getRootInterface
rootIf = getRootInterface()
connections = rootIf.getUserConnections(currentUser.id)
connectionMap = {str(c.id): c for c in connections}
connectionIds = set(connectionMap.keys())
jobs = listJobs(jobType="connection.bootstrap", limit=50)
active = []
for j in jobs:
if j.get("status") not in ("PENDING", "RUNNING"):
continue
payload = j.get("payload") or {}
connId = payload.get("connectionId")
if connId in connectionIds:
conn = connectionMap[connId]
active.append({
"jobId": j["id"],
"connectionId": connId,
"connectionLabel": getattr(conn, "displayLabel", None) or getattr(conn, "authority", connId),
"jobType": j.get("jobType", "connection.bootstrap"),
"progress": j.get("progress", 0),
"progressMessage": j.get("progressMessage", ""),
})
return active
except Exception as e:
logger.error("Error in RAG inventory /jobs: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

View file

@ -155,12 +155,13 @@ async def sttStream(
Protocol:
Client sends JSON:
{"type": "open", "language": "de-DE"}
{"type": "open", "language": "de-DE", "model": "latest_short", "lightweight": true, "singleUtterance": true}
{"type": "audio", "chunk": "<base64>"}
{"type": "close"}
Server sends JSON:
{"type": "interim", "text": "..."}
{"type": "final", "text": "...", "confidence": 0.95}
{"type": "end_of_single_utterance", "audioDurationSec": 0.0}
{"type": "error", "message": "..."}
{"type": "closed"}
"""
@ -205,7 +206,12 @@ async def sttStream(
logger.warning(f"STT billing pre-flight skipped: {e}")
audioQueue: asyncio.Queue = asyncio.Queue()
language = "de-DE"
sttOpenOptions: Dict[str, Any] = {
"language": "de-DE",
"model": "latest_long",
"lightweight": False,
"singleUtterance": False,
}
streamingTask: Optional[asyncio.Task] = None
voiceInterface: Optional[VoiceObjects] = None
@ -233,10 +239,23 @@ async def sttStream(
voiceInterface.billingCallback = _billingCb
try:
async for event in voiceInterface.streamingSpeechToText(audioQueue, language):
async for event in voiceInterface.streamingSpeechToText(
audioQueue,
sttOpenOptions["language"],
phraseHints=None,
model=sttOpenOptions["model"],
lightweight=sttOpenOptions["lightweight"],
singleUtterance=sttOpenOptions["singleUtterance"],
):
if event.get("reconnectRequired"):
await _sendJson({"type": "reconnect_required"})
return
if event.get("endOfSingleUtterance"):
await _sendJson({
"type": "end_of_single_utterance",
"audioDurationSec": event.get("audioDurationSec", 0.0),
})
continue
if event.get("isFinal"):
if event.get("transcript"):
await _sendJson({"type": "final", "text": event["transcript"], "confidence": event.get("confidence", 0.0)})
@ -258,7 +277,10 @@ async def sttStream(
msgType = (msg.get("type") or "").strip()
if msgType == "open":
language = msg.get("language") or "de-DE"
sttOpenOptions["language"] = msg.get("language") or "de-DE"
sttOpenOptions["model"] = msg.get("model") or "latest_long"
sttOpenOptions["lightweight"] = bool(msg.get("lightweight"))
sttOpenOptions["singleUtterance"] = bool(msg.get("singleUtterance"))
if streamingTask and not streamingTask.done():
await audioQueue.put((b"", True))
streamingTask.cancel()

View file

@ -7,7 +7,7 @@ import logging
import time
import json
import re
from typing import List, Dict, Any, Optional, AsyncGenerator, Callable, Awaitable
from typing import List, Dict, Any, Optional, AsyncGenerator, Callable, Awaitable, Tuple
from modules.datamodels.datamodelAi import (
AiCallRequest, AiCallOptions, AiCallResponse, OperationTypeEnum
@ -335,9 +335,14 @@ async def runAgentLoop(
# Execute tool calls
for tc in toolCalls:
toolDef = toolRegistry.getTool(tc.name)
yield AgentEvent(
type=AgentEventTypeEnum.TOOL_CALL,
data={"toolName": tc.name, "args": tc.args}
data={
"toolName": tc.name,
"displayLabel": toolDef.displayLabel if toolDef else None,
"args": tc.args,
}
)
results = await _executeToolCalls(toolCalls, toolRegistry, {
@ -355,12 +360,18 @@ async def runAgentLoop(
state.totalToolCalls += len(results)
for result in results:
validationCode = None
if isinstance(result.errorDetails, dict):
code = result.errorDetails.get("code")
if isinstance(code, str):
validationCode = code
roundLog.toolCalls.append(ToolCallLog(
toolName=result.toolName,
args=next((tc.args for tc in toolCalls if tc.id == result.toolCallId), {}),
success=result.success,
durationMs=result.durationMs,
error=result.error,
validationFailureCode=validationCode,
resultData=result.data[:300] if result.data else "",
))
if not result.success:
@ -438,6 +449,11 @@ async def runAgentLoop(
trace.totalCostCHF = state.totalCostCHF
trace.abortReason = state.abortReason
validationFailures, repairAttempts, successAfterRepair = _computeRepairCounters(trace.rounds)
trace.validationFailures = validationFailures
trace.repairAttempts = repairAttempts
trace.successAfterRepair = successAfterRepair
artifactSummary = _buildArtifactSummary(trace.rounds)
yield AgentEvent(
@ -451,6 +467,9 @@ async def runAgentLoop(
"status": state.status.value,
"abortReason": state.abortReason,
"artifacts": artifactSummary,
"validationFailures": validationFailures,
"repairAttempts": repairAttempts,
"successAfterRepair": successAfterRepair,
}
)
@ -715,6 +734,41 @@ def classifyToolResult(
return None
def _computeRepairCounters(rounds: List[AgentRoundLog]) -> Tuple[int, int, int]:
"""Aggregate repair-loop telemetry across all rounds.
Returns ``(validationFailures, repairAttempts, successAfterRepair)``.
* `validationFailures` -- total tool calls rejected by a pre-execute
validator (any round, counts every occurrence).
* `repairAttempts` -- tool calls in **later** rounds whose `toolName`
had been rejected in some **earlier** round. Multiple retries of the
same tool count multiple times. We intentionally do not count
sibling calls within the same round, since the LLM has not yet seen
the first one's result when emitting the second.
* `successAfterRepair` -- the subset of `repairAttempts` that passed
the validator (``validationFailureCode is None``).
"""
validationFailures = 0
repairAttempts = 0
successAfterRepair = 0
rejectedTools: set = set()
for roundLog in rounds:
rejectedFromPriorRounds = set(rejectedTools)
for tc in roundLog.toolCalls:
wasRejectedBefore = tc.toolName in rejectedFromPriorRounds
if tc.validationFailureCode is not None:
validationFailures += 1
if wasRejectedBefore:
repairAttempts += 1
rejectedTools.add(tc.toolName)
elif wasRejectedBefore:
repairAttempts += 1
successAfterRepair += 1
return validationFailures, repairAttempts, successAfterRepair
_ARTIFACT_TOOLS = {"writeFile", "replaceInFile", "deleteFile", "renameFile", "copyFile",
"createFolder", "deleteFolder", "renderDocument", "generateImage"}

View file

@ -184,4 +184,5 @@ def _registerConnectionTools(registry: ToolRegistry, services):
"required": ["connectionId", "to", "subject", "body"],
},
readOnly=False,
displayLabel="composing an email",
)

View file

@ -297,6 +297,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
},
},
readOnly=False,
displayLabel="creating a document",
)
# ── textToSpeech tool ──────────────────────────────────────────────
@ -573,6 +574,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
"required": ["prompt"],
},
readOnly=False,
displayLabel="generating an image",
)
# ── createChart tool ─────────────────────────────────────────────────
@ -770,6 +772,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
"required": ["datasets"],
},
readOnly=False,
displayLabel="creating a chart",
)
# ── Phase 3: speechToText, detectLanguage, neutralizeData, executeCode ──
@ -917,5 +920,6 @@ def _registerMediaTools(registry: ToolRegistry, services):
},
"required": ["code"]
},
readOnly=True
readOnly=True,
displayLabel="running calculations",
)

View file

@ -19,6 +19,20 @@ from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
logger = logging.getLogger(__name__)
_STALE_EXTRACTION_PATTERNS = (
"requires the extract-msg package",
"extraction requires the",
"will be treated as binary",
)
def _isStaleExtractionResult(text: str) -> bool:
"""Detect cached extraction results that are just error/warning placeholders."""
if len(text) > 500:
return False
textLower = text.lower()
return any(p in textLower for p in _STALE_EXTRACTION_PATTERNS)
import uuid as _uuid
@ -62,15 +76,16 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
]
if textChunks:
assembled = "\n\n".join(c["data"] for c in textChunks)
chunked = _applyOffsetLimit(assembled, offset, limit)
if chunked is not None:
return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked)
if len(assembled) > _MAX_TOOL_RESULT_CHARS:
assembled = assembled[:_MAX_TOOL_RESULT_CHARS] + f"\n\n[Truncated showing first {_MAX_TOOL_RESULT_CHARS} chars of {len(assembled)}. Use offset/limit to read specific sections.]"
return ToolResult(
toolCallId="", toolName="readFile", success=True,
data=assembled,
)
if not _isStaleExtractionResult(assembled):
chunked = _applyOffsetLimit(assembled, offset, limit)
if chunked is not None:
return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked)
if len(assembled) > _MAX_TOOL_RESULT_CHARS:
assembled = assembled[:_MAX_TOOL_RESULT_CHARS] + f"\n\n[Truncated showing first {_MAX_TOOL_RESULT_CHARS} chars of {len(assembled)}. Use offset/limit to read specific sections.]"
return ToolResult(
toolCallId="", toolName="readFile", success=True,
data=assembled,
)
elif fileStatus in ("processing", "embedding", "extracted"):
return ToolResult(
toolCallId="", toolName="readFile", success=True,
@ -101,12 +116,31 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
isBinary = _looksLikeBinary(rawBytes)
if isBinary:
extractionService = services.getService("extraction") if hasattr(services, "getService") else None
if extractionService:
try:
extracted = extractionService.extractContentFromBytes(
rawBytes, fileName, mimeType, documentId=fileId,
)
textParts = [
p.data for p in (extracted.parts or [])
if getattr(p, "contentType", "") != "image" and getattr(p, "data", None)
]
if textParts:
assembled = "\n\n".join(textParts)
chunked = _applyOffsetLimit(assembled, offset, limit)
if chunked is not None:
return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked)
if len(assembled) > _MAX_TOOL_RESULT_CHARS:
assembled = assembled[:_MAX_TOOL_RESULT_CHARS] + f"\n\n[Truncated showing first {_MAX_TOOL_RESULT_CHARS} chars of {len(assembled)}. Use offset/limit to read specific sections.]"
return ToolResult(toolCallId="", toolName="readFile", success=True, data=assembled)
except Exception as extractErr:
logger.warning("readFile: inline extraction failed for %s: %s", fileId, extractErr)
return ToolResult(
toolCallId="", toolName="readFile", success=True,
data=(
f"[File '{fileName}' ({mimeType}) is not yet indexed "
f"(status: {fileStatus or 'unknown'}). Indexing runs automatically "
f"on upload. Please wait a few seconds and retry, or re-upload the file. "
f"[File '{fileName}' ({mimeType}) is binary and could not be extracted "
f"(status: {fileStatus or 'unknown'}). "
f"For visual content use describeImage(fileId='{fileId}').]"
),
)
@ -310,11 +344,15 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
return ToolResult(toolCallId="", toolName="writeFile", success=False, error="name is required for mode=create")
fileItem, _ = dbMgmt.saveUploadedFile(content.encode("utf-8"), name)
fiId = context.get("featureInstanceId") or (services.featureInstanceId if services else "")
updateFields: Dict[str, Any] = {}
if fiId:
dbMgmt.updateFile(fileItem.id, {"featureInstanceId": fiId})
# File group tree removed — groupId arg and instance-group assignment no longer apply
updateFields["featureInstanceId"] = fiId
if args.get("folderId"):
updateFields["folderId"] = args["folderId"]
if args.get("tags"):
dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
updateFields["tags"] = args["tags"]
if updateFields:
dbMgmt.updateFile(fileItem.id, updateFields)
chatDocId = _attachFileAsChatDocument(
services, fileItem,
@ -359,7 +397,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
},
"required": ["fileId"]
},
readOnly=True
readOnly=True,
displayLabel="reviewing a document",
)
registry.register(
@ -406,7 +445,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
"properties": {"query": {"type": "string", "description": "Search query"}},
"required": ["query"]
},
readOnly=True
readOnly=True,
displayLabel="researching on the web",
)
registry.register(
@ -427,7 +467,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
"writeFile", _writeFile,
description=(
"Create, append, or overwrite a file. Modes:\n"
"- create (default): create a new file (name required).\n"
"- create (default): create a new file (name required). Use folderId to place it in a specific folder.\n"
"- append: append content to an existing file (fileId required). "
"Use for large content that exceeds a single tool call (~8000 chars per call).\n"
"- overwrite: replace entire file content (fileId required).\n"
@ -443,7 +483,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
"content": {"type": "string", "description": "Content to write/append"},
"mode": {"type": "string", "enum": ["create", "append", "overwrite"], "description": "Write mode (default: create)"},
"fileId": {"type": "string", "description": "File ID (required for mode=append/overwrite)"},
"groupId": {"type": "string", "description": "Group ID to place the file in (mode=create only). Omit to use the instance default group."},
"folderId": {"type": "string", "description": "Folder ID to place the file in (mode=create only). Use listFolders to find IDs. Omit for root."},
"tags": {"type": "array", "items": {"type": "string"}, "description": "Tags (mode=create only)"},
},
"required": ["content"]
@ -581,7 +621,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
},
"required": ["url"]
},
readOnly=True
readOnly=True,
displayLabel="reading a webpage",
)
registry.register(
@ -701,7 +742,147 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
readOnly=False
)
# Group tree tools removed — file grouping now uses view-based display grouping (TableListView)
# ---- Folder management tools ----
async def _createFolder(args: Dict[str, Any], context: Dict[str, Any]):
name = args.get("name", "")
parentId = args.get("parentId") or None
if not name:
return ToolResult(toolCallId="", toolName="createFolder", success=False, error="name is required")
try:
chatService = services.chat
dbMgmt = chatService.interfaceDbComponent
folder = dbMgmt.createFolder(name, parentId=parentId)
folderId = folder.get("id") if isinstance(folder, dict) else getattr(folder, "id", None)
folderName = folder.get("name") if isinstance(folder, dict) else getattr(folder, "name", name)
return ToolResult(
toolCallId="", toolName="createFolder", success=True,
data=f"Folder '{folderName}' created (id: {folderId})" + (f" inside parent {parentId}" if parentId else ""),
sideEvents=[{"type": "folderCreated", "data": {"folderId": folderId, "folderName": folderName, "parentId": parentId}}],
)
except Exception as e:
return ToolResult(toolCallId="", toolName="createFolder", success=False, error=str(e))
async def _listFolders(args: Dict[str, Any], context: Dict[str, Any]):
try:
chatService = services.chat
dbMgmt = chatService.interfaceDbComponent
folders = dbMgmt.getOwnFolderTree()
if not folders:
return ToolResult(toolCallId="", toolName="listFolders", success=True, data="No folders found.")
lines = []
folderMap: Dict[Optional[str], List] = {}
for f in folders:
pid = f.get("parentId") if isinstance(f, dict) else getattr(f, "parentId", None)
folderMap.setdefault(pid, []).append(f)
def _walk(parentId: Optional[str], indent: int):
for f in sorted(folderMap.get(parentId, []), key=lambda x: (x.get("name") if isinstance(x, dict) else getattr(x, "name", "")).lower()):
fId = f.get("id") if isinstance(f, dict) else getattr(f, "id", "")
fName = f.get("name") if isinstance(f, dict) else getattr(f, "name", "")
prefix = " " * indent
lines.append(f"{prefix}- {fName} (id: {fId})")
_walk(fId, indent + 1)
_walk(None, 0)
return ToolResult(toolCallId="", toolName="listFolders", success=True, data="\n".join(lines))
except Exception as e:
return ToolResult(toolCallId="", toolName="listFolders", success=False, error=str(e))
async def _moveFile(args: Dict[str, Any], context: Dict[str, Any]):
fileId = args.get("fileId", "")
folderId = args.get("folderId")
if not fileId:
return ToolResult(toolCallId="", toolName="moveFile", success=False, error="fileId is required")
try:
chatService = services.chat
dbMgmt = chatService.interfaceDbComponent
file = dbMgmt.getFile(fileId)
if not file:
return ToolResult(toolCallId="", toolName="moveFile", success=False, error=f"File {fileId} not found")
dbMgmt.updateFile(fileId, {"folderId": folderId or None})
targetLabel = f"folder {folderId}" if folderId else "root"
return ToolResult(
toolCallId="", toolName="moveFile", success=True,
data=f"File '{file.fileName}' (id: {fileId}) moved to {targetLabel}",
sideEvents=[{"type": "fileUpdated", "data": {"fileId": fileId, "fileName": file.fileName}}],
)
except Exception as e:
return ToolResult(toolCallId="", toolName="moveFile", success=False, error=str(e))
registry.register(
"createFolder", _createFolder,
description=(
"Create a new folder in the workspace file tree. "
"Use parentId to create nested folders. Returns the new folder ID."
),
parameters={
"type": "object",
"properties": {
"name": {"type": "string", "description": "Folder name"},
"parentId": {"type": "string", "description": "Parent folder ID for nesting. Omit to create at root level."},
},
"required": ["name"]
},
readOnly=False
)
registry.register(
"listFolders", _listFolders,
description=(
"List all folders in the workspace as an indented tree. "
"Use to find folder IDs for createFolder (parentId), writeFile (folderId), or moveFile."
),
parameters={"type": "object", "properties": {}},
readOnly=True
)
async def _renameFolder(args: Dict[str, Any], context: Dict[str, Any]):
folderId = args.get("folderId", "")
newName = args.get("newName", "")
if not folderId or not newName:
return ToolResult(toolCallId="", toolName="renameFolder", success=False, error="folderId and newName are required")
try:
chatService = services.chat
dbMgmt = chatService.interfaceDbComponent
folder = dbMgmt.renameFolder(folderId, newName)
return ToolResult(
toolCallId="", toolName="renameFolder", success=True,
data=f"Folder {folderId} renamed to '{newName}'",
sideEvents=[{"type": "folderUpdated", "data": {"folderId": folderId, "folderName": newName}}],
)
except Exception as e:
return ToolResult(toolCallId="", toolName="renameFolder", success=False, error=str(e))
registry.register(
"renameFolder", _renameFolder,
description="Rename an existing folder in the workspace file tree.",
parameters={
"type": "object",
"properties": {
"folderId": {"type": "string", "description": "The folder ID to rename"},
"newName": {"type": "string", "description": "New folder name"},
},
"required": ["folderId", "newName"]
},
readOnly=False
)
registry.register(
"moveFile", _moveFile,
description=(
"Move a file into a specific folder. Set folderId to null or omit to move the file back to the root level."
),
parameters={
"type": "object",
"properties": {
"fileId": {"type": "string", "description": "The file ID to move"},
"folderId": {"type": "string", "description": "Target folder ID. Omit or null to move to root."},
},
"required": ["fileId"]
},
readOnly=False
)
registry.register(
"replaceInFile", _replaceInFile,

View file

@ -41,6 +41,12 @@ class ToolDefinition(BaseModel):
"""Schema for a tool available to the agent."""
name: str = Field(description="Unique tool name")
description: str = Field(description="What this tool does")
displayLabel: Optional[str] = Field(
default=None,
description="Short human-readable activity phrase (e.g. 'researching on the web'). "
"Used for live progress messages in meetings. English gerund phrase; "
"localised by the caller."
)
parameters: Dict[str, Any] = Field(
default_factory=dict,
description="JSON Schema for tool parameters"
@ -73,6 +79,14 @@ class ToolResult(BaseModel):
success: bool = True
data: str = ""
error: Optional[str] = None
errorDetails: Optional[Dict[str, Any]] = Field(
default=None,
description=(
"Structured, machine-readable error payload for the LLM (e.g. validation "
"repair hints with code/field/suggestion/hint). `error` remains the short "
"human-readable text for logs and audit."
),
)
durationMs: int = 0
sideEvents: Optional[List[Dict[str, Any]]] = None
@ -135,6 +149,14 @@ class ToolCallLog(BaseModel):
success: bool = True
durationMs: int = 0
error: Optional[str] = None
validationFailureCode: Optional[str] = Field(
default=None,
description=(
"If the tool call was rejected by a pre-execute validator (e.g. "
"QueryValidator), the structured error code (e.g. FIELD_NOT_FOUND). "
"None when the call ran cleanly or failed for other reasons."
),
)
resultData: str = Field(default="", description="Short result summary for artifact tracking")
@ -161,6 +183,24 @@ class AgentTrace(BaseModel):
totalToolCalls: int = 0
totalCostCHF: float = 0.0
abortReason: Optional[str] = None
validationFailures: int = Field(
default=0,
description="Total tool calls rejected by a pre-execute validator across the run.",
)
repairAttempts: int = Field(
default=0,
description=(
"Number of times the LLM retried a previously rejected tool (same toolName) "
"in a later round. Counted by `agentLoop` from per-round ToolCallLog entries."
),
)
successAfterRepair: int = Field(
default=0,
description=(
"Number of repair attempts that produced a clean (validationFailureCode=None) "
"result. Combined with `repairAttempts` this gives the repair conversion rate."
),
)
rounds: List[AgentRoundLog] = Field(default_factory=list)

View file

@ -0,0 +1,203 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Ontology data model for feature data sub-agents.
This module defines the data structures that describe a feature's data
ontology -- entities, relations, constraints, canonical query patterns --
plus the validation error payload used by the QueryValidator.
Phase 1 (Repair-Loop) only needs `QueryValidationError`, `Constraint`,
`ConstraintRule` and `ValidationErrorCode`; the richer `Entity`/`Relation`/
`OntologyDescriptor` types are defined here so Phase 2 (Trustee ontology
pilot) can plug in without a second data-model change.
See `wiki/c-work/2-build/2026-05-feature-data-agent-ontology-and-repair.md`.
"""
from enum import Enum
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
class ValidationErrorCode(str, Enum):
"""Stable codes for validator failures.
The LLM sees these codes verbatim in `ToolResult.errorDetails["code"]`
and is expected to react to them deterministically (e.g. inspect the
schema via browseTable when FIELD_NOT_FOUND, drop the SUM when
INVALID_AGGREGATE_TARGET, add a period filter when MISSING_REQUIRED_FILTER).
"""
FIELD_NOT_FOUND = "FIELD_NOT_FOUND"
INVALID_AGGREGATE_TARGET = "INVALID_AGGREGATE_TARGET"
WRONG_TABLE_FOR_PURPOSE = "WRONG_TABLE_FOR_PURPOSE"
TYPE_MISMATCH = "TYPE_MISMATCH"
OPERATOR_INCOMPATIBLE = "OPERATOR_INCOMPATIBLE"
MISSING_REQUIRED_FILTER = "MISSING_REQUIRED_FILTER"
ORDER_BY_INVALID = "ORDER_BY_INVALID"
class QueryValidationError(BaseModel):
"""Structured pre-execute validation error.
Serialized into `ToolResult.errorDetails` (machine-readable) and
summarized into `ToolResult.error` (short human-readable string).
"""
code: ValidationErrorCode
field: Optional[str] = Field(
default=None,
description="The offending field name (when applicable).",
)
suggestion: Optional[str] = Field(
default=None,
description=(
"Best-effort suggestion (e.g. fuzzy-matched valid field name). "
"None when no useful suggestion exists."
),
)
hint: str = Field(
description="Short corrective hint, max ~80 chars. Surfaced to the LLM verbatim.",
max_length=160,
)
def toShortError(self) -> str:
"""Build the short `error` string for logs/audit.
Format: `<CODE>: <hint>` (or with field when present).
"""
if self.field:
return f"{self.code.value}: {self.field}: {self.hint}"
return f"{self.code.value}: {self.hint}"
def toErrorDetails(self) -> Dict[str, Any]:
"""Build the dict for `ToolResult.errorDetails`."""
return {
"code": self.code.value,
"field": self.field,
"suggestion": self.suggestion,
"hint": self.hint,
}
class ConstraintRule(str, Enum):
"""High-level rule kinds that can be attached to a field or table."""
NEVER_AGGREGATE = "NEVER_AGGREGATE"
REQUIRES_FILTER_ON = "REQUIRES_FILTER_ON"
TYPE_MISMATCH_GUARD = "TYPE_MISMATCH_GUARD"
PREFERRED_TABLE_FOR_INTENT = "PREFERRED_TABLE_FOR_INTENT"
class Constraint(BaseModel):
"""A single rule the validator and the prompt compiler both consume.
Phase 1 uses constraints declared inline by the validator (defaults
derived from naming conventions like ``*Balance`` / ``*Total``).
Phase 2 sources them from feature ontologies, replacing the
convention-based defaults.
"""
appliesTo: str = Field(
description=(
"Target identifier, format depends on rule: `<Table>.<field>` for "
"field-level constraints, `<Table>` for table-level."
),
)
rule: ConstraintRule
message: str = Field(
description="Short hint forwarded to the LLM if the constraint fires.",
max_length=160,
)
params: Dict[str, Any] = Field(
default_factory=dict,
description=(
"Rule-specific extras, e.g. {'requiredFields': ['periodYear', 'periodMonth']} "
"for REQUIRES_FILTER_ON."
),
)
class SemanticType(str, Enum):
"""High-level semantic category an entity belongs to.
Coarser than the underlying Pydantic type -- used so the prompt compiler
can group entities ("here are your ACCOUNT-like tables") without the LLM
having to read the full schema.
"""
ACCOUNT = "ACCOUNT"
BALANCE_SNAPSHOT = "BALANCE_SNAPSHOT"
TRANSACTION = "TRANSACTION"
DOCUMENT = "DOCUMENT"
PARTY = "PARTY"
PERIOD = "PERIOD"
OTHER = "OTHER"
class Cardinality(str, Enum):
ONE_TO_ONE = "ONE_TO_ONE"
ONE_TO_MANY = "ONE_TO_MANY"
MANY_TO_ONE = "MANY_TO_ONE"
MANY_TO_MANY = "MANY_TO_MANY"
class Invariant(BaseModel):
"""Free-form invariant attached to an entity.
Phase 1 leaves these as opaque text consumed by the prompt compiler.
Future phases may add a structured rule kind.
"""
description: str = Field(max_length=200)
class Entity(BaseModel):
"""One semantic entity in the ontology (often backed by a Pydantic table)."""
name: str
pythonClass: Optional[str] = Field(
default=None,
description="MODEL_REGISTRY key when the entity is DB-backed (e.g. 'TrusteeDataAccountBalance').",
)
semanticType: SemanticType = SemanticType.OTHER
parentEntity: Optional[str] = Field(
default=None,
description="Name of a broader entity this one specializes (e.g. 'BankAccount' parentEntity 'Account').",
)
description: str = ""
invariants: List[Invariant] = Field(default_factory=list)
class Relation(BaseModel):
fromEntity: str
toEntity: str
cardinality: Cardinality
via: Optional[str] = Field(
default=None,
description="FK-Feldname auf der fromEntity-Seite (z. B. 'journalEntryId').",
)
class CanonicalQueryPattern(BaseModel):
"""Tool-call skeleton for a recurring user intent.
The prompt compiler renders these as worked examples so the LLM has a
template to mimic instead of inventing a query shape.
"""
intent: str = Field(description="Short label, e.g. 'BANK_BALANCE_AT_DATE'.")
description: str = Field(default="", description="Human-readable when to use this pattern.")
pattern: Dict[str, Any] = Field(
description="Tool-call shape with placeholders, e.g. {'tool': 'queryTable', 'tableName': '...', 'filters': [...]}",
)
class OntologyDescriptor(BaseModel):
"""Top-level container exported by `getAgentOntology()` per feature."""
featureCode: str
entities: List[Entity] = Field(default_factory=list)
relations: List[Relation] = Field(default_factory=list)
constraints: List[Constraint] = Field(default_factory=list)
canonicalPatterns: List[CanonicalQueryPattern] = Field(default_factory=list)
def constraintsForTable(self, tableName: str) -> List[Constraint]:
"""Return constraints whose ``appliesTo`` targets the given table or one of its fields."""
prefix = f"{tableName}."
return [
c for c in self.constraints
if c.appliesTo == tableName or c.appliesTo.startswith(prefix)
]

View file

@ -15,6 +15,7 @@ invoked outside an agent loop (e.g. in tests).
import json
import logging
import os
from typing import Any, Callable, Awaitable, Dict, List, Optional
from modules.datamodels.datamodelAi import (
@ -25,6 +26,10 @@ from modules.serviceCenter.services.serviceAgent.agentLoop import runAgentLoop
from modules.serviceCenter.services.serviceAgent.datamodelAgent import (
AgentConfig, AgentEvent, AgentEventTypeEnum, ToolResult,
)
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
QueryValidationError,
)
from modules.serviceCenter.services.serviceAgent.queryValidator import QueryValidator
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
from modules.serviceCenter.services.serviceAgent.featureDataProvider import FeatureDataProvider
from modules.shared.i18nRegistry import resolveText
@ -83,7 +88,8 @@ async def runFeatureDataAgent(
"""
provider = FeatureDataProvider(dbConnector, neutralizeFields=neutralizeFields)
registry = _buildSubAgentTools(provider, featureInstanceId, mandateId, tableFilters or {})
validator = _buildValidatorForFeature(featureCode)
registry = _buildSubAgentTools(provider, featureInstanceId, mandateId, tableFilters or {}, validator=validator)
for tbl in selectedTables:
meta = tbl.get("meta", {})
@ -153,10 +159,19 @@ def _buildSubAgentTools(
featureInstanceId: str,
mandateId: str,
tableFilters: Dict[str, Dict[str, str]] = None,
validator: Optional[QueryValidator] = None,
) -> ToolRegistry:
"""Register browseTable and queryTable as sub-agent tools."""
"""Register browseTable and queryTable as sub-agent tools.
The optional ``validator`` runs **before** the provider on every call.
When it returns a structured error, the tool result carries
``errorDetails`` (machine-readable repair hint for the LLM) plus the
short ``error`` string for logs/audit. No provider call happens in that
case, so the database is never reached with a known-bad query.
"""
registry = ToolRegistry()
_tableFilters = tableFilters or {}
_validator = validator or QueryValidator()
def _recordFilterToList(tableName: str) -> Optional[List[Dict[str, Any]]]:
"""Convert a recordFilter dict to a list of {field, op, value} filter dicts."""
@ -165,6 +180,14 @@ def _buildSubAgentTools(
return None
return [{"field": k, "op": "=", "value": v} for k, v in rf.items()]
def _validationToolResult(toolName: str, err: QueryValidationError) -> ToolResult:
return ToolResult(
toolCallId="", toolName=toolName,
success=False,
error=err.toShortError(),
errorDetails=err.toErrorDetails(),
)
async def _browseTable(args: Dict[str, Any], context: Dict[str, Any]):
tableName = args.get("tableName", "")
limit = args.get("limit", 50)
@ -172,6 +195,9 @@ def _buildSubAgentTools(
fields = args.get("fields")
if not tableName:
return ToolResult(toolCallId="", toolName="browseTable", success=False, error="tableName required")
validationErr = _validator.validateBrowseQuery(tableName, args)
if validationErr is not None:
return _validationToolResult("browseTable", validationErr)
result = provider.browseTable(
tableName=tableName,
featureInstanceId=featureInstanceId,
@ -197,6 +223,9 @@ def _buildSubAgentTools(
offset = args.get("offset", 0)
if not tableName:
return ToolResult(toolCallId="", toolName="queryTable", success=False, error="tableName required")
validationErr = _validator.validateQueryTable(tableName, args)
if validationErr is not None:
return _validationToolResult("queryTable", validationErr)
result = provider.queryTable(
tableName=tableName,
featureInstanceId=featureInstanceId,
@ -220,12 +249,19 @@ def _buildSubAgentTools(
aggregate = args.get("aggregate", "")
field = args.get("field", "")
groupBy = args.get("groupBy")
filters = args.get("filters") or []
if not tableName:
return ToolResult(toolCallId="", toolName="aggregateTable", success=False, error="tableName required")
if not aggregate:
return ToolResult(toolCallId="", toolName="aggregateTable", success=False, error="aggregate required (SUM, COUNT, AVG, MIN, MAX)")
if not field:
return ToolResult(toolCallId="", toolName="aggregateTable", success=False, error="field required")
validationErr = _validator.validateAggregateQuery(tableName, args)
if validationErr is not None:
return _validationToolResult("aggregateTable", validationErr)
combinedFilters = list(filters)
recordFilters = _recordFilterToList(tableName) or []
combinedFilters.extend(recordFilters)
result = provider.aggregateTable(
tableName=tableName,
featureInstanceId=featureInstanceId,
@ -233,7 +269,7 @@ def _buildSubAgentTools(
aggregate=aggregate,
field=field,
groupBy=groupBy,
extraFilters=_recordFilterToList(tableName),
extraFilters=combinedFilters or None,
)
return ToolResult(
toolCallId="", toolName="aggregateTable",
@ -246,8 +282,12 @@ def _buildSubAgentTools(
"aggregateTable", _aggregateTable,
description=(
"Run an aggregate query on a feature data table. "
"Supports SUM, COUNT, AVG, MIN, MAX with optional GROUP BY. "
"Example: aggregateTable(tableName='TrusteeDataJournalLine', aggregate='SUM', field='debitAmount', groupBy='costCenter')"
"Supports SUM, COUNT, AVG, MIN, MAX with optional GROUP BY and filters. "
"Example: aggregateTable(tableName='TrusteeDataJournalLine', aggregate='SUM', "
"field='debitAmount', filters=[{'field':'accountNumber','op':'=','value':'5400'}]). "
"On validation failure the tool returns success=False with errorDetails={code, field, suggestion, hint} -- "
"read errorDetails and correct the next call (e.g. drop the SUM, switch to queryTable with period filters, "
"or use the suggested field name)."
),
parameters={
"type": "object",
@ -256,6 +296,22 @@ def _buildSubAgentTools(
"aggregate": {"type": "string", "enum": ["SUM", "COUNT", "AVG", "MIN", "MAX"], "description": "Aggregate function"},
"field": {"type": "string", "description": "Field to aggregate (e.g. debitAmount, creditAmount)"},
"groupBy": {"type": "string", "description": "Optional field to group by (e.g. costCenter, accountNumber)"},
"filters": {
"type": "array",
"items": {
"type": "object",
"properties": {
"field": {"type": "string"},
"op": {"type": "string"},
"value": {},
},
},
"description": (
"Optional filter conditions applied before the aggregate. Same shape as queryTable's "
"filters. Required whenever you want to aggregate only a subset (e.g. SUM debits on "
"ONE account, COUNT rows in ONE year)."
),
},
},
"required": ["tableName", "aggregate", "field"],
},
@ -264,7 +320,11 @@ def _buildSubAgentTools(
registry.register(
"browseTable", _browseTable,
description="List rows from a feature data table with pagination.",
description=(
"List rows from a feature data table with pagination. "
"On validation failure the tool returns success=False with errorDetails={code, field, suggestion, hint} -- "
"use errorDetails to correct the next call."
),
parameters={
"type": "object",
"properties": {
@ -286,7 +346,10 @@ def _buildSubAgentTools(
description=(
"Query a feature data table with filters, field selection, and ordering. "
"Filters: [{\"field\": \"status\", \"op\": \"=\", \"value\": \"active\"}]. "
"Operators: =, !=, >, <, >=, <=, LIKE, ILIKE, IS NULL, IS NOT NULL."
"Operators: =, !=, >, <, >=, <=, LIKE, ILIKE, IS NULL, IS NOT NULL. "
"On validation failure the tool returns success=False with errorDetails={code, field, suggestion, hint} -- "
"common codes: FIELD_NOT_FOUND (use the suggestion or call browseTable), OPERATOR_INCOMPATIBLE "
"(switch to a compatible operator for that field type), ORDER_BY_INVALID."
),
parameters={
"type": "object",
@ -410,13 +473,94 @@ def _buildSchemaContext(
"- Keep your answer SHORT. The caller is a machine, not a human.",
]
domainHints = _loadFeatureDomainHints(featureCode)
if domainHints:
parts.extend(["", domainHints.strip()])
domainBlock = ""
if not _isOntologyDisabled():
domainBlock = _loadFeatureOntologyBlock(featureCode)
if not domainBlock:
domainBlock = _loadFeatureDomainHints(featureCode)
if domainBlock:
parts.extend(["", domainBlock.strip()])
return "\n".join(parts)
def _isOntologyDisabled() -> bool:
"""Eval-only escape hatch.
Set ``POWERON_DISABLE_FEATURE_ONTOLOGY=1`` in the environment to force
``_buildSchemaContext`` back onto the legacy ``getAgentDomainHints()``
path. Used by the Phase 1.5 benchmark to measure ``baseline`` and
``phase1`` accuracy WITHOUT the ontology-driven prompt block. Never
set this flag in production.
"""
return os.environ.get("POWERON_DISABLE_FEATURE_ONTOLOGY", "").strip() in ("1", "true", "TRUE", "yes")
def _buildValidatorForFeature(featureCode: str) -> QueryValidator:
"""Construct a QueryValidator wired with the feature ontology (when present).
Without an ontology the validator falls back to its convention-based
constraints (``*Balance`` / ``*Total`` are NEVER_AGGREGATE). With an
ontology the descriptor's constraints take precedence -- the validator
and the prompt block then share the same source of truth.
"""
ontology = _loadFeatureOntology(featureCode)
return QueryValidator(ontology=ontology)
def _loadFeatureOntology(featureCode: str):
"""Return the feature's OntologyDescriptor or None when no hook is exposed."""
if not featureCode:
return None
try:
from modules.system.registry import loadFeatureMainModules
except Exception:
return None
try:
mainModules = loadFeatureMainModules() or {}
except Exception as exc:
logger.debug("Ontology lookup: cannot load main modules (%s)", exc)
return None
module = mainModules.get(featureCode) or mainModules.get(featureCode.lower())
if module is None:
return None
hook = getattr(module, "getAgentOntology", None)
if not callable(hook):
return None
try:
return hook()
except Exception as exc:
logger.warning("Feature '%s' getAgentOntology() raised: %s", featureCode, exc)
return None
def _loadFeatureOntologyBlock(featureCode: str) -> str:
"""Return the ontology-derived prompt block when the feature exposes one.
Each feature can expose ``getAgentOntology() -> OntologyDescriptor`` in
its ``mainXxx.py``. When present, the descriptor is compiled via
:func:`ontologyToPromptCompiler.compileOntologyToPrompt` and the result
replaces the legacy ``getAgentDomainHints()`` text block. This keeps
one single source of truth for the validator AND the prompt.
Failures are swallowed (missing hook, exceptions in compilation) so the
caller can fall back to the legacy domain-hints path.
"""
ontology = _loadFeatureOntology(featureCode)
if ontology is None:
return ""
try:
from modules.serviceCenter.services.serviceAgent.ontologyToPromptCompiler import (
compileOntologyToPrompt,
)
return compileOntologyToPrompt(ontology)
except Exception as exc:
logger.warning("Ontology compile failed for '%s': %s", featureCode, exc)
return ""
def _loadFeatureDomainHints(featureCode: str) -> str:
"""Pull optional domain-specific hints from the feature's main module.

View file

@ -0,0 +1,140 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Deterministic compiler: OntologyDescriptor -> sub-agent prompt block.
Phase 2 replaces a feature's hand-written ``_AGENT_DOMAIN_HINTS`` text
with a structured :class:`OntologyDescriptor`. This compiler renders the
descriptor into a stable, terse Markdown-ish block that the sub-agent
appends to its system prompt -- the same source of truth the
:class:`QueryValidator` consults.
The output is intentionally:
* short (every token costs every call)
* deterministic (no f-string ordering bugs, no Python dict iteration)
* free of internal jargon ('canonicalQueryPattern' is rendered as
'CANONICAL PATTERN' for the LLM)
"""
from __future__ import annotations
from typing import Iterable, List
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
CanonicalQueryPattern,
Constraint,
ConstraintRule,
Entity,
OntologyDescriptor,
Relation,
)
def compileOntologyToPrompt(ontology: OntologyDescriptor) -> str:
"""Render *ontology* into a sub-agent prompt block.
The output starts with a stable marker line (``DOMAIN ONTOLOGY (...)``)
so downstream tooling can find/replace it deterministically.
"""
lines: List[str] = []
lines.append(f"DOMAIN ONTOLOGY ({ontology.featureCode}):")
lines.append("")
lines.extend(_renderEntities(ontology.entities))
relationLines = _renderRelations(ontology.relations)
if relationLines:
lines.append("")
lines.extend(relationLines)
constraintLines = _renderConstraints(ontology.constraints)
if constraintLines:
lines.append("")
lines.extend(constraintLines)
patternLines = _renderPatterns(ontology.canonicalPatterns)
if patternLines:
lines.append("")
lines.extend(patternLines)
return "\n".join(lines).rstrip() + "\n"
def _renderEntities(entities: Iterable[Entity]) -> List[str]:
out: List[str] = ["ENTITIES:"]
for e in entities:
head = f"- {e.name}"
if e.parentEntity:
head += f" (specializes {e.parentEntity})"
if e.pythonClass:
head += f" [table: {e.pythonClass}]"
out.append(head)
if e.description:
out.append(f" {e.description}")
for inv in e.invariants:
out.append(f" * {inv.description}")
return out
def _renderRelations(relations: Iterable[Relation]) -> List[str]:
rels = list(relations)
if not rels:
return []
out: List[str] = ["RELATIONS:"]
for r in rels:
line = f"- {r.fromEntity} -> {r.toEntity} ({r.cardinality.value}"
if r.via:
line += f" via {r.via}"
line += ")"
out.append(line)
return out
def _renderConstraints(constraints: Iterable[Constraint]) -> List[str]:
cons = list(constraints)
if not cons:
return []
out: List[str] = ["CONSTRAINTS (validator-enforced):"]
for c in cons:
rule = _ruleLabel(c.rule)
line = f"- {rule} on {c.appliesTo}: {c.message}"
params = c.params or {}
required = params.get("requiredFields")
if isinstance(required, list) and required:
line += f" (required filters: {', '.join(required)})"
intents = params.get("intents")
if isinstance(intents, list) and intents:
line += f" (intents: {', '.join(intents)})"
out.append(line)
return out
def _ruleLabel(rule: ConstraintRule) -> str:
return rule.value.replace("_", " ").lower()
def _renderPatterns(patterns: Iterable[CanonicalQueryPattern]) -> List[str]:
pats = list(patterns)
if not pats:
return []
out: List[str] = ["CANONICAL QUERY PATTERNS (mimic these tool calls):"]
for i, p in enumerate(pats, start=1):
out.append(f"{i}) intent={p.intent}: {p.description}")
out.append(f" call: {_renderPatternCall(p.pattern)}")
extra = p.pattern.get("_postProcessing") if isinstance(p.pattern, dict) else None
if isinstance(extra, str):
out.append(f" note: {extra}")
return out
def _renderPatternCall(pattern: dict) -> str:
"""Render the pattern as a compact one-line tool call signature."""
tool = pattern.get("tool", "?")
parts: List[str] = []
for key in ("tableName", "aggregate", "field", "groupBy", "orderBy"):
if key in pattern and pattern[key] is not None and not str(key).startswith("_"):
parts.append(f"{key}={pattern[key]!r}")
if "fields" in pattern and pattern["fields"]:
parts.append(f"fields={pattern['fields']}")
if "filters" in pattern and pattern["filters"]:
compact = ", ".join(
f"{f.get('field')}{f.get('op','=')}{f.get('value')!r}"
for f in pattern["filters"]
if isinstance(f, dict)
)
parts.append(f"filters=[{compact}]")
return f"{tool}({', '.join(parts)})"

View file

@ -0,0 +1,311 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Pre-execute query validator for the Feature Data Sub-Agent.
Sits between the LLM tool call and `FeatureDataProvider`. Catches the four
high-impact hallucination classes deterministically so the LLM gets an
actionable repair hint instead of a raw SQL exception:
* invented field names -> FIELD_NOT_FOUND (+ fuzzy suggestion)
* operator/type mismatches -> OPERATOR_INCOMPATIBLE
* SUM/AVG on already-aggregated -> INVALID_AGGREGATE_TARGET
balance/total columns
* orderBy on invented fields -> ORDER_BY_INVALID
The validator reads the canonical schema from
`modules.datamodels.datamodelBase.MODEL_REGISTRY`. When an
`OntologyDescriptor` is provided (Phase 2), its constraints override the
convention-based defaults (e.g. NEVER_AGGREGATE on closingBalance).
"""
from __future__ import annotations
import difflib
import logging
import re
import typing
from typing import Any, Dict, List, Optional, Tuple
from modules.datamodels.datamodelBase import MODEL_REGISTRY
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
Constraint,
ConstraintRule,
OntologyDescriptor,
QueryValidationError,
ValidationErrorCode,
)
logger = logging.getLogger(__name__)
_STRING_ONLY_OPERATORS = {"LIKE", "ILIKE"}
_COMPARISON_OPERATORS = {">", "<", ">=", "<="}
_VALUELESS_OPERATORS = {"IS NULL", "IS NOT NULL"}
_AGGREGATES_THAT_SUM = {"SUM", "AVG"}
_AGGREGATE_BLACKLIST_SUFFIXES_DEFAULT: Tuple[str, ...] = ("Balance", "Total")
class QueryValidator:
"""Validate sub-agent tool arguments against the schema (+ optional ontology).
Stateless per call -- holding only the optional ontology. Each
`validateXxx` method returns ``None`` on success or a
:class:`QueryValidationError` to be surfaced to the LLM.
"""
def __init__(self, ontology: Optional[OntologyDescriptor] = None):
self._ontology = ontology
# ------------------------------------------------------------------
# public API: one method per sub-agent tool
# ------------------------------------------------------------------
def validateBrowseQuery(
self, tableName: str, args: Dict[str, Any]
) -> Optional[QueryValidationError]:
"""Validate browseTable arguments.
Phase 1 scope: only `fields` (whitelist) is LLM-driven; `limit`/`offset`
are sanitized by the tool wrapper.
"""
modelFields = _getModelFields(tableName)
if modelFields is None:
return None
fieldsErr = self._validateFieldList(args.get("fields"), modelFields)
if fieldsErr is not None:
return fieldsErr
return None
def validateQueryTable(
self, tableName: str, args: Dict[str, Any]
) -> Optional[QueryValidationError]:
"""Validate queryTable arguments (filters + fields + orderBy)."""
modelFields = _getModelFields(tableName)
if modelFields is None:
return None
fieldsErr = self._validateFieldList(args.get("fields"), modelFields)
if fieldsErr is not None:
return fieldsErr
for f in args.get("filters") or []:
filterErr = self._validateFilter(f, modelFields)
if filterErr is not None:
return filterErr
orderBy = args.get("orderBy")
if orderBy is not None and not _isPlainNone(orderBy):
if orderBy not in modelFields:
return QueryValidationError(
code=ValidationErrorCode.ORDER_BY_INVALID,
field=orderBy,
suggestion=_suggestFieldName(orderBy, modelFields),
hint="orderBy must be a real field of this table.",
)
return None
def validateAggregateQuery(
self, tableName: str, args: Dict[str, Any]
) -> Optional[QueryValidationError]:
"""Validate aggregateTable arguments.
Catches the highest-impact hallucination in the codebase:
``SUM(closingBalance)`` (and friends) across periods -- closing
balances are already per-period, summing them produces nonsense.
"""
modelFields = _getModelFields(tableName)
if modelFields is None:
return None
field = args.get("field")
aggregate = (args.get("aggregate") or "").upper()
if not field:
return None # tool wrapper rejects empty field already
if field not in modelFields:
return QueryValidationError(
code=ValidationErrorCode.FIELD_NOT_FOUND,
field=field,
suggestion=_suggestFieldName(field, modelFields),
hint="Use browseTable to inspect this table's columns.",
)
if aggregate in _AGGREGATES_THAT_SUM and self._isAggregateBlacklisted(tableName, field):
return QueryValidationError(
code=ValidationErrorCode.INVALID_AGGREGATE_TARGET,
field=field,
suggestion=None,
hint=(
f"{field} is already aggregated per period; do not {aggregate} it "
"across rows. Use queryTable with period filters instead."
),
)
if aggregate in _AGGREGATES_THAT_SUM and not _isNumericAnnotation(modelFields[field]):
return QueryValidationError(
code=ValidationErrorCode.TYPE_MISMATCH,
field=field,
suggestion=None,
hint=f"{aggregate} requires a numeric field; {field} is not numeric.",
)
groupBy = args.get("groupBy")
if groupBy is not None and not _isPlainNone(groupBy):
if groupBy not in modelFields:
return QueryValidationError(
code=ValidationErrorCode.FIELD_NOT_FOUND,
field=groupBy,
suggestion=_suggestFieldName(groupBy, modelFields),
hint="groupBy must be a real field of this table.",
)
# filters validation matches queryTable so the LLM gets consistent
# repair hints regardless of which tool it picked.
for f in args.get("filters") or []:
filterErr = self._validateFilter(f, modelFields)
if filterErr is not None:
return filterErr
return None
# ------------------------------------------------------------------
# internals
# ------------------------------------------------------------------
def _validateFieldList(
self, fields: Optional[List[str]], modelFields: Dict[str, Any]
) -> Optional[QueryValidationError]:
if not fields:
return None
for f in fields:
if not isinstance(f, str):
continue
if f not in modelFields:
return QueryValidationError(
code=ValidationErrorCode.FIELD_NOT_FOUND,
field=f,
suggestion=_suggestFieldName(f, modelFields),
hint="Use browseTable to inspect this table's columns.",
)
return None
def _validateFilter(
self, filterEntry: Any, modelFields: Dict[str, Any]
) -> Optional[QueryValidationError]:
if not isinstance(filterEntry, dict):
return None
field = filterEntry.get("field")
op = (filterEntry.get("op") or "=").upper()
if not isinstance(field, str) or not field:
return None # tool wrapper passes these straight through
if field not in modelFields:
return QueryValidationError(
code=ValidationErrorCode.FIELD_NOT_FOUND,
field=field,
suggestion=_suggestFieldName(field, modelFields),
hint="Use browseTable to inspect this table's columns.",
)
annotation = modelFields[field]
if op in _STRING_ONLY_OPERATORS and not _isStringAnnotation(annotation):
return QueryValidationError(
code=ValidationErrorCode.OPERATOR_INCOMPATIBLE,
field=field,
suggestion=None,
hint=f"{op} only works on string fields; {field} is not a string.",
)
if op in _COMPARISON_OPERATORS and not _isComparableAnnotation(annotation):
return QueryValidationError(
code=ValidationErrorCode.OPERATOR_INCOMPATIBLE,
field=field,
suggestion=None,
hint=f"{op} requires a numeric or date field; {field} is not comparable.",
)
return None
def _isAggregateBlacklisted(self, tableName: str, fieldName: str) -> bool:
"""Check whether a field is marked NEVER_AGGREGATE.
Phase 2 (ontology present): consult the descriptor.
Phase 1 fallback: naming convention (``*Balance`` / ``*Total``).
"""
if self._ontology is not None:
target = f"{tableName}.{fieldName}"
for c in self._ontology.constraintsForTable(tableName):
if c.rule == ConstraintRule.NEVER_AGGREGATE and c.appliesTo == target:
return True
for suffix in _AGGREGATE_BLACKLIST_SUFFIXES_DEFAULT:
if fieldName.endswith(suffix):
return True
return False
# ------------------------------------------------------------------
# helpers
# ------------------------------------------------------------------
def _getModelFields(tableName: str) -> Optional[Dict[str, Any]]:
"""Return ``{fieldName: annotation}`` for a registered Pydantic table model.
None when the table is not in MODEL_REGISTRY (e.g. pure UDB tables in
early-startup contexts). The validator is a best-effort layer -- when
the schema is unknown we let the request through and rely on the
downstream SQL layer for safety.
"""
modelClass = MODEL_REGISTRY.get(tableName)
if modelClass is None:
return None
return {
name: info.annotation for name, info in modelClass.model_fields.items()
}
def _suggestFieldName(badName: str, modelFields: Dict[str, Any]) -> Optional[str]:
"""Return the closest valid field name, or None if nothing reasonable."""
if not badName or not modelFields:
return None
matches = difflib.get_close_matches(badName, list(modelFields.keys()), n=1, cutoff=0.6)
return matches[0] if matches else None
def _isPlainNone(value: Any) -> bool:
"""LLMs sometimes pass the literal string 'None' -- treat both as None."""
return value is None or (isinstance(value, str) and value.strip().lower() == "none")
def _unwrapAnnotation(annotation: Any) -> Tuple[Any, ...]:
"""Flatten Optional/Union annotations into their constituent types."""
origin = typing.get_origin(annotation)
if origin is None:
return (annotation,)
return tuple(a for a in typing.get_args(annotation) if a is not type(None))
def _isStringAnnotation(annotation: Any) -> bool:
return any(a is str for a in _unwrapAnnotation(annotation))
def _isNumericAnnotation(annotation: Any) -> bool:
numericTypes = (int, float)
return any(a in numericTypes for a in _unwrapAnnotation(annotation))
def _isComparableAnnotation(annotation: Any) -> bool:
"""Numeric types are the comparable shape we see in feature tables.
Booleans count as int in Python's type hierarchy but the comparison
operators ``>``/``<`` on bool columns are almost never meaningful, so we
treat bool as non-comparable for validator purposes.
"""
for a in _unwrapAnnotation(annotation):
if a is bool:
continue
if a in (int, float):
return True
return False

View file

@ -98,14 +98,17 @@ class _VirtualFS:
def _makeReadFile(services):
"""Create a readFile(fileId) closure bound to the current services context."""
def readFile(fileId: str) -> str:
def readFile(fileId: str, encoding: str = "utf-8") -> str:
mgmt = getattr(services, 'interfaceDbComponent', None) if services else None
if not mgmt:
raise RuntimeError("readFile: no file store available in this session")
data = mgmt.getFileData(str(fileId))
if data is None:
raise FileNotFoundError(f"File '{fileId}' not found in workspace")
return data.decode("utf-8")
try:
return data.decode(encoding)
except (UnicodeDecodeError, LookupError):
return data.decode("utf-8", errors="replace")
return readFile

View file

@ -23,7 +23,7 @@ class ToolRegistry:
def register(self, name: str, handler: Callable[..., Awaitable[ToolResult]],
description: str = "", parameters: Dict[str, Any] = None,
readOnly: bool = False, featureType: str = None,
toolSet: str = None):
toolSet: str = None, displayLabel: str = None):
"""Register a tool with its handler function."""
if name in self._tools:
logger.warning(f"Tool '{name}' already registered, overwriting")
@ -31,6 +31,7 @@ class ToolRegistry:
self._tools[name] = ToolDefinition(
name=name,
description=description,
displayLabel=displayLabel,
parameters=parameters or {},
readOnly=readOnly,
featureType=featureType,

View file

@ -567,11 +567,14 @@ mit Web-Recherche, E-Mail-Versand, Dokumenten-Erzeugung und Datenquellen-Zugriff
Setze "needsAgent": true und "agentReason": "<kurze Beschreibung der Aufgabe in einem Satz>"
WENN die Aufgabe eines oder mehrere dieser Merkmale hat:
- Recherche im Internet noetig (z.B. "recherchier was im Internet ueber XY", "schau mal nach", "google das")
- E-Mail an Teilnehmer/Kontakte versenden
- Dokument (PDF, Word, Excel) generieren oder im SharePoint/Drive ablegen
- Mehrere Schritte oder Tool-Aufrufe noetig (Zusammenfassung + Versand, Recherche + Empfehlung etc.)
- Daten aus externen Quellen abrufen (Outlook-Kontakte, SharePoint-Dateien, Kalender etc.)
- Recherche im Internet oder aktuelle Informationen noetig
- Informationen beschaffen die du NICHT im Transkript oder in deinem Vorwissen hast
- E-Mail versenden
- Dokument generieren oder in einer Datenquelle ablegen
- Mehrere Schritte oder Tool-Aufrufe noetig
- Daten aus externen Quellen abrufen
Wenn du den gewuenschten Inhalt nicht selbst liefern kannst, setze needsAgent=true.
Wenn needsAgent=true:
- Setze shouldRespond=false (der Agent uebernimmt; du sprichst NICHT eigenstaendig).

View file

@ -60,6 +60,7 @@ from modules.shared.jsonContinuation import getContexts
from modules.shared.jsonUtils import buildContinuationContext, tryParseJson
from modules.shared.jsonUtils import closeJsonStructures
from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText
from modules.shared.jsonUtils import extractJsonString, repairBrokenJson
logger = logging.getLogger(__name__)
@ -447,7 +448,6 @@ class AiCallLooper:
extracted = extractJsonString(contexts.completePart)
parsed, parseErr, _ = tryParseJson(extracted)
if parseErr is not None:
from modules.shared.jsonUtils import repairBrokenJson
repaired = repairBrokenJson(extracted)
if repaired:
parsed = repaired
@ -470,9 +470,10 @@ class AiCallLooper:
return useCase.finalResultHandler(
result, normalized, extracted, debugPrefix, self.services
)
except Exception as e:
except (json.JSONDecodeError, KeyError, TypeError) as e:
logger.warning(
f"Iteration {iteration}: completePart not serializable after getContexts success: {e}"
f"Iteration {iteration}: completePart not serializable after getContexts success: "
f"{type(e).__name__}: {e}"
)
mergeFailCount += 1
if mergeFailCount >= MAX_MERGE_FAILS:
@ -491,6 +492,15 @@ class AiCallLooper:
)
self.services.chat.progressLogFinish(iterationOperationId, True)
continue
except Exception as e:
logger.error(
f"Iteration {iteration}: unexpected error during completePart processing "
f"(re-raising, NOT a pipeline-mismatch retry): {type(e).__name__}: {e}",
exc_info=True,
)
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, False)
raise
elif contexts.jsonParsingSuccess and contexts.overlapContext != "":
# JSON parseable but has cut point - CONTINUE to next iteration

View file

@ -7,6 +7,9 @@ from .mainBackgroundJobService import (
startJob,
getJobStatus,
listJobs,
cancelJob,
cancelJobsByConnection,
isTerminalStatus,
JobProgressCallback,
)
@ -15,5 +18,8 @@ __all__ = [
"startJob",
"getJobStatus",
"listJobs",
"cancelJob",
"cancelJobsByConnection",
"isTerminalStatus",
"JobProgressCallback",
]

View file

@ -30,10 +30,11 @@ clear message. No silent zombies.
import asyncio
import logging
import time
from datetime import datetime, timezone
from typing import Any, Awaitable, Callable, Dict, List, Optional
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.connectors.connectorDbPostgre import DatabaseConnector, getCachedConnector
from modules.shared.configuration import APP_CONFIG
from modules.shared.dbRegistry import registerDatabase
from modules.datamodels.datamodelBackgroundJob import (
@ -49,7 +50,46 @@ JOBS_DATABASE = APP_CONFIG.get("DB_DATABASE", "poweron_app")
registerDatabase(JOBS_DATABASE)
JobProgressCallback = Callable[[int, Optional[str]], None]
_CANCEL_CHECK_INTERVAL_S = 3.0
class JobProgressCallback:
"""Callable progress reporter with cooperative cancel-check for long-running walkers."""
def __init__(self, jobId: str):
self._jobId = jobId
self._cancelledCache: Optional[bool] = None
self._lastCheckedAt: float = 0.0
def __call__(self, progress: int, message: Optional[str] = None) -> None:
try:
clamped = max(0, min(100, int(progress)))
fields: Dict[str, Any] = {"progress": clamped}
if message is not None:
fields["progressMessage"] = message[:500]
_updateJob(self._jobId, fields)
except Exception as ex:
logger.warning("Progress update failed for job %s: %s", self._jobId, ex)
def isCancelled(self) -> bool:
"""Check if this job was cancelled. Reads DB at most every 3s to limit load."""
now = time.time()
if self._cancelledCache is True:
return True
if now - self._lastCheckedAt < _CANCEL_CHECK_INTERVAL_S:
return self._cancelledCache or False
self._lastCheckedAt = now
try:
job = _loadJob(self._jobId)
if job and job.get("status") == BackgroundJobStatusEnum.CANCELLED.value:
self._cancelledCache = True
return True
except Exception:
pass
self._cancelledCache = False
return False
JobHandler = Callable[[Dict[str, Any], JobProgressCallback], Awaitable[Optional[Dict[str, Any]]]]
@ -64,7 +104,13 @@ def registerJobHandler(jobType: str, handler: JobHandler) -> None:
def _getDb() -> DatabaseConnector:
return DatabaseConnector(
"""Return the shared cached connector for the jobs DB.
Reuses the same connector across all job CRUD calls instead of opening a
fresh psycopg2 connection (and re-running `_create_database_if_not_exists`
+ `_create_tables` + `_initializeSystemTable`) on every operation.
"""
return getCachedConnector(
dbDatabase=JOBS_DATABASE,
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
dbPort=int(APP_CONFIG.get("DB_PORT", "5432")),
@ -155,16 +201,7 @@ def _markError(jobId: str, errorMessage: str) -> None:
def _makeProgressCallback(jobId: str) -> JobProgressCallback:
def _cb(progress: int, message: Optional[str] = None) -> None:
try:
clamped = max(0, min(100, int(progress)))
fields: Dict[str, Any] = {"progress": clamped}
if message is not None:
fields["progressMessage"] = message[:500]
_updateJob(jobId, fields)
except Exception as ex:
logger.warning("Progress update failed for job %s: %s", jobId, ex)
return _cb
return JobProgressCallback(jobId)
async def _runJob(jobId: str) -> None:
@ -220,12 +257,51 @@ def isTerminalStatus(status: str) -> bool:
return status in {s.value for s in TERMINAL_JOB_STATUSES}
def cancelJob(jobId: str, *, reason: str = "user_requested") -> bool:
"""Mark a job as CANCELLED. Walkers detect this via JobProgressCallback.isCancelled().
Returns False if the job is already in a terminal state or does not exist.
"""
job = _loadJob(jobId)
if not job:
return False
if isTerminalStatus(job.get("status", "")):
return False
_updateJob(jobId, {
"status": BackgroundJobStatusEnum.CANCELLED.value,
"errorMessage": f"cancelled: {reason}"[:1000],
"finishedAt": datetime.now(timezone.utc).timestamp(),
})
logger.info("BackgroundJob %s cancelled (reason=%s)", jobId, reason)
return True
def cancelJobsByConnection(connectionId: str, *, jobType: str = "connection.bootstrap") -> int:
"""Cancel all RUNNING/PENDING jobs whose payload.connectionId matches.
Returns count of jobs marked as cancelled.
"""
db = _getDb()
rows = db.getRecordset(BackgroundJob, recordFilter={"jobType": jobType})
count = 0
for row in rows:
status = row.get("status", "")
if status not in (BackgroundJobStatusEnum.PENDING.value, BackgroundJobStatusEnum.RUNNING.value):
continue
payload = row.get("payload") or {}
if payload.get("connectionId") == connectionId:
if cancelJob(row["id"], reason=f"connection_stop:{connectionId[:8]}"):
count += 1
return count
def recoverInterruptedJobs() -> int:
"""Flip any RUNNING jobs to ERROR (called at worker boot).
A RUNNING job in the DB after process restart means the previous worker
died mid-execution; the asyncio task is gone and the job will never
finish on its own.
finish on its own. The daily scheduler or manual "Neu indexieren"
button handles retry no automatic re-queue to avoid infinite loops.
"""
db = _getDb()
try:
@ -243,3 +319,61 @@ def recoverInterruptedJobs() -> int:
if count:
logger.warning("Recovered %d interrupted background job(s) after restart", count)
return count
_ZOMBIE_MAX_AGE_SECONDS = 30 * 60
def killZombieJobs(maxAgeSeconds: int = _ZOMBIE_MAX_AGE_SECONDS) -> int:
"""Kill RUNNING jobs that have not been updated within `maxAgeSeconds`.
Detects walkers that are stuck in a sync call without progress updates.
A live job updates progress at least every few seconds via JobProgressCallback.
Anything older than maxAgeSeconds without finishing is considered hung.
"""
db = _getDb()
try:
rows = db.getRecordset(BackgroundJob, recordFilter={"status": BackgroundJobStatusEnum.RUNNING.value})
except Exception as ex:
logger.warning("killZombieJobs: failed to scan RUNNING jobs: %s", ex)
return 0
now = time.time()
threshold = now - maxAgeSeconds
count = 0
for row in rows:
started = row.get("startedAt") or row.get("createdAt")
if not started or started > threshold:
continue
ageMin = (now - started) / 60
try:
_markError(row["id"], f"Zombie killed (stuck >{maxAgeSeconds // 60}min, no progress)")
count += 1
payload = row.get("payload") or {}
logger.warning(
"killZombieJobs: killed %s (type=%s connId=%s ageMin=%.1f)",
row["id"], row.get("jobType"), payload.get("connectionId", "")[:12], ageMin,
)
except Exception as ex:
logger.warning("killZombieJobs: could not kill %s: %s", row.get("id"), ex)
return count
def registerZombieKillerScheduler(*, intervalMinutes: int = 5) -> None:
"""Register a recurring cron job that kills stuck RUNNING jobs.
Idempotent. Runs every `intervalMinutes` minutes.
"""
try:
from modules.shared.eventManagement import eventManager
async def _runKiller():
killZombieJobs()
eventManager.registerCron(
jobId="background_jobs.zombie_killer",
func=_runKiller,
cronKwargs={"minute": f"*/{intervalMinutes}"},
)
logger.info("Zombie-killer scheduler registered (every %d min)", intervalMinutes)
except Exception as ex:
logger.warning("Zombie-killer scheduler registration failed (non-critical): %s", ex)

View file

@ -532,8 +532,16 @@ class ChatService:
self, connectionId: str, sourceType: str, path: str, label: str,
featureInstanceId: str = None, displayPath: str = None,
) -> Dict[str, Any]:
"""Create a new external data source reference."""
"""Create a new external data source reference.
Returns existing record if connectionId + path already exists (upsert semantics).
"""
from modules.datamodels.datamodelDataSource import DataSource
existing = self.interfaceDbApp.db.getRecordset(
DataSource, recordFilter={"connectionId": connectionId, "path": path}
)
if existing:
return existing[0] if isinstance(existing[0], dict) else existing[0].model_dump()
ds = DataSource(
connectionId=connectionId,
sourceType=sourceType,

View file

@ -77,6 +77,7 @@ class ContainerExtractor(Extractor):
"""Extract by recursively unpacking the container."""
fileName = context.get("fileName", "archive")
mimeType = context.get("mimeType", "application/octet-stream")
cascadeDepth = context.get("_cascadeDepth", 0)
rootId = makeId()
parts: List[ContentPart] = [
@ -97,7 +98,7 @@ class ContainerExtractor(Extractor):
parts.extend(lazy)
return parts
state = {"totalSize": 0, "fileCount": 0}
state = {"totalSize": 0, "fileCount": 0, "cascadeDepth": cascadeDepth}
try:
childParts = _resolveContainerRecursive(
fileBytes, mimeType, fileName, rootId, "", 0, state
@ -209,7 +210,12 @@ def _addFilePart(
if extractor and not isinstance(extractor, ContainerExtractor):
try:
childParts = extractor.extract(data, {"fileName": fileName, "mimeType": detectedMime})
cascadeDepth = state.get("cascadeDepth", 0)
childParts = extractor.extract(data, {
"fileName": fileName,
"mimeType": detectedMime,
"_cascadeDepth": cascadeDepth + 1,
})
for part in childParts:
part.parentId = parentId
if not part.metadata:

View file

@ -53,12 +53,13 @@ class EmailExtractor(Extractor):
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
fileName = context.get("fileName", "email")
lower = (fileName or "").lower()
depth = context.get("_cascadeDepth", 0)
if lower.endswith(".msg"):
return self._extractMsg(fileBytes, fileName)
return self._extractEml(fileBytes, fileName)
return self._extractMsg(fileBytes, fileName, depth)
return self._extractEml(fileBytes, fileName, depth)
def _extractEml(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
def _extractEml(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]:
"""Parse standard EML (RFC 822) using stdlib email."""
rootId = makeId()
parts: List[ContentPart] = []
@ -91,7 +92,7 @@ class EmailExtractor(Extractor):
attachName = part.get_filename() or "attachment"
attachData = part.get_payload(decode=True)
if attachData:
parts.extend(_delegateAttachment(attachData, attachName, rootId))
parts.extend(_delegateAttachment(attachData, attachName, rootId, depth))
continue
if contentType == "text/plain":
@ -113,7 +114,7 @@ class EmailExtractor(Extractor):
return parts
def _extractMsg(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
def _extractMsg(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]:
"""Parse Outlook MSG files using extract-msg (optional)."""
rootId = makeId()
parts: List[ContentPart] = []
@ -179,7 +180,7 @@ class EmailExtractor(Extractor):
attachName = getattr(attachment, "longFilename", None) or getattr(attachment, "shortFilename", None) or "attachment"
attachData = getattr(attachment, "data", None)
if attachData:
parts.extend(_delegateAttachment(attachData, attachName, rootId))
parts.extend(_delegateAttachment(attachData, attachName, rootId, depth))
try:
msgFile.close()
@ -199,18 +200,39 @@ def _buildHeaderText(msg) -> str:
return "\n".join(lines)
def _delegateAttachment(attachData: bytes, attachName: str, parentId: str) -> List[ContentPart]:
"""Delegate an attachment to the appropriate type-specific extractor."""
_MAX_CASCADE_DEPTH = 10
def _delegateAttachment(attachData: bytes, attachName: str, parentId: str, depth: int = 0) -> List[ContentPart]:
"""Delegate an attachment to the appropriate type-specific extractor.
Passes ``_cascadeDepth`` through the context so nested EmailContainerEmail
chains share a global depth counter and don't recurse infinitely.
"""
if depth >= _MAX_CASCADE_DEPTH:
logger.warning(f"Cascade depth {depth} reached for {attachName}, skipping extraction")
import base64
encodedData = base64.b64encode(attachData).decode("utf-8") if attachData else ""
return [ContentPart(
id=makeId(), parentId=parentId, label=attachName,
typeGroup="binary", mimeType="application/octet-stream",
data=encodedData,
metadata={"size": len(attachData), "emailAttachment": attachName, "cascadeDepthExceeded": True},
)]
guessedMime, _ = mimetypes.guess_type(attachName)
detectedMime = guessedMime or "application/octet-stream"
from ..subRegistry import ExtractorRegistry
registry = ExtractorRegistry()
from ..subRegistry import getExtractorRegistry
registry = getExtractorRegistry()
extractor = registry.resolve(detectedMime, attachName)
if extractor and not isinstance(extractor, EmailExtractor):
if extractor:
try:
childParts = extractor.extract(attachData, {"fileName": attachName, "mimeType": detectedMime})
childParts = extractor.extract(attachData, {
"fileName": attachName,
"mimeType": detectedMime,
"_cascadeDepth": depth + 1,
})
for part in childParts:
part.parentId = parentId
if not part.metadata:

View file

@ -33,6 +33,7 @@ class ExtractionService:
self._interfaceDbComponent = getComponentInterface(
context.user,
mandateId=context.mandate_id,
featureInstanceId=context.feature_instance_id,
)
self._extractorRegistry = getExtractorRegistry()
if ExtractionService._sharedChunkerRegistry is None:

View file

@ -122,21 +122,54 @@ def _onConnectionRevoked(
)
_SOURCE_TYPE_MAP = {
"msft": {
"sharepoint": ("sharepointFolder", "onedriveFolder"),
"outlook": ("outlookFolder", "calendarFolder", "contactFolder"),
},
"google": {
"drive": ("googleDriveFolder",),
"gmail": ("gmailFolder",),
},
"clickup": {
"clickup": ("clickupList", "clickup"),
},
"infomaniak": {
"kdrive": ("kdriveFolder", "infomaniak"),
},
}
def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list] = None):
"""Load DataSource rows with ragIndexEnabled=true for a connection.
If dataSourceIds is provided (mini-bootstrap), filter to only those IDs.
"""
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.datamodels.datamodelDataSource import DataSource
rootIf = getRootInterface()
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
if dataSourceIds:
return [ds for ds in allDs if ds.get("id") in dataSourceIds and ds.get("ragIndexEnabled")]
return [ds for ds in allDs if ds.get("ragIndexEnabled")]
async def _bootstrapJobHandler(
job: Dict[str, Any],
progressCb,
) -> Dict[str, Any]:
"""Dispatch bootstrap by authority. Each authority runs its own sub-bootstraps."""
"""Dispatch bootstrap by authority, iterating only over ragIndexEnabled DataSources."""
payload = job.get("payload") or {}
connectionId = payload.get("connectionId")
authority = (payload.get("authority") or "").lower()
dataSourceIds = payload.get("dataSourceIds")
if not connectionId:
raise ValueError("connection.bootstrap requires payload.connectionId")
progressCb(5, f"resolving {authority} connection")
# Defensive consent check: if the connection has since disabled knowledge ingestion
# (e.g. user toggled setting after the job was enqueued), skip all walkers.
# Defensive consent check
try:
from modules.interfaces.interfaceDbApp import getRootInterface
_root = getRootInterface()
@ -156,6 +189,21 @@ async def _bootstrapJobHandler(
except Exception as _guardErr:
logger.debug("Could not load connection for consent guard: %s", _guardErr)
# Load only ragIndexEnabled DataSources for this connection
dataSources = _loadRagEnabledDataSources(connectionId, dataSourceIds)
if not dataSources:
logger.info(
"ingestion.connection.bootstrap.skipped — no rag-enabled DataSources connectionId=%s",
connectionId,
extra={
"event": "ingestion.connection.bootstrap.skipped",
"connectionId": connectionId,
"authority": authority,
"reason": "no_data_sources",
},
)
return {"connectionId": connectionId, "authority": authority, "skipped": True, "reason": "no_data_sources"}
def _normalize(res: Any, label: str) -> Dict[str, Any]:
if isinstance(res, Exception):
logger.error(
@ -165,6 +213,10 @@ async def _bootstrapJobHandler(
return {"error": str(res)}
return res or {}
def _filterDs(walkerKey: str) -> list:
sourceTypes = _SOURCE_TYPE_MAP.get(authority, {}).get(walkerKey, ())
return [ds for ds in dataSources if ds.get("sourceType") in sourceTypes]
if authority == "msft":
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
bootstrapSharepoint,
@ -173,10 +225,15 @@ async def _bootstrapJobHandler(
bootstrapOutlook,
)
progressCb(10, "sharepoint + outlook")
progressCb(0, "Synchronisierung läuft...")
spDs = _filterDs("sharepoint")
olDs = _filterDs("outlook")
async def _noopResult():
return {"skipped": True, "reason": "no_datasources"}
spResult, olResult = await asyncio.gather(
bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb),
bootstrapOutlook(connectionId=connectionId, progressCb=progressCb),
bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb, dataSources=spDs) if spDs else _noopResult(),
bootstrapOutlook(connectionId=connectionId, progressCb=progressCb, dataSources=olDs) if olDs else _noopResult(),
return_exceptions=True,
)
return {
@ -194,10 +251,15 @@ async def _bootstrapJobHandler(
bootstrapGmail,
)
progressCb(10, "drive + gmail")
progressCb(0, "Synchronisierung läuft...")
gdDs = _filterDs("drive")
gmDs = _filterDs("gmail")
async def _noopResult():
return {"skipped": True, "reason": "no_datasources"}
gdResult, gmResult = await asyncio.gather(
bootstrapGdrive(connectionId=connectionId, progressCb=progressCb),
bootstrapGmail(connectionId=connectionId, progressCb=progressCb),
bootstrapGdrive(connectionId=connectionId, progressCb=progressCb, dataSources=gdDs) if gdDs else _noopResult(),
bootstrapGmail(connectionId=connectionId, progressCb=progressCb, dataSources=gmDs) if gmDs else _noopResult(),
return_exceptions=True,
)
return {
@ -212,14 +274,29 @@ async def _bootstrapJobHandler(
bootstrapClickup,
)
progressCb(10, "clickup tasks")
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb)
progressCb(0, "Synchronisierung läuft...")
cuDs = _filterDs("clickup")
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb, dataSources=cuDs) if cuDs else {"skipped": True, "reason": "no_datasources"}
return {
"connectionId": connectionId,
"authority": authority,
"clickup": _normalize(cuResult, "clickup"),
}
if authority == "infomaniak":
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncKdrive import (
bootstrapKdrive,
)
progressCb(0, "Synchronisierung läuft...")
kdDs = _filterDs("kdrive")
kdResult = await bootstrapKdrive(connectionId=connectionId, progressCb=progressCb, dataSources=kdDs) if kdDs else {"skipped": True, "reason": "no_datasources"}
return {
"connectionId": connectionId,
"authority": authority,
"kdrive": _normalize(kdResult, "kdrive"),
}
logger.info(
"ingestion.connection.bootstrap.skipped reason=unsupported_authority authority=%s connectionId=%s",
authority, connectionId,

View file

@ -9,7 +9,7 @@ is None).
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@ -21,10 +21,11 @@ _DEFAULT_CLICKUP_SCOPE = "title_description"
@dataclass
class ConnectionIngestionPrefs:
"""Parsed per-connection preferences for knowledge ingestion walkers."""
"""Parsed per-connection preferences for knowledge ingestion walkers.
# PII
neutralizeBeforeEmbed: bool = False
Neutralization is now controlled per DataSource.neutralize (not here).
Surface toggles are obsolete walker iterates only over ragIndexEnabled DataSources.
"""
# Mail (Outlook + Gmail)
mailContentDepth: str = _DEFAULT_MAIL_DEPTH # "metadata" | "snippet" | "full"
@ -32,18 +33,11 @@ class ConnectionIngestionPrefs:
# Files (Drive / SharePoint / OneDrive)
filesIndexBinaries: bool = True
mimeAllowlist: List[str] = field(default_factory=list) # empty = all allowed
# ClickUp
clickupScope: str = _DEFAULT_CLICKUP_SCOPE # "titles" | "title_description" | "with_comments"
clickupIndexAttachments: bool = False
# Per-authority surface toggles (default everything on)
gmailEnabled: bool = True
driveEnabled: bool = True
sharepointEnabled: bool = True
outlookEnabled: bool = True
# Time window
maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS # 0 = no limit
@ -78,22 +72,12 @@ def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs:
v = raw.get(key)
return int(v) if isinstance(v, int) else default
surface = raw.get("surfaceToggles") or {}
google_surf = surface.get("google") or {}
msft_surf = surface.get("msft") or {}
return ConnectionIngestionPrefs(
neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False),
mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH),
mailIndexAttachments=_bool("mailIndexAttachments", False),
filesIndexBinaries=_bool("filesIndexBinaries", True),
mimeAllowlist=list(raw.get("mimeAllowlist") or []),
clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE),
clickupIndexAttachments=_bool("clickupIndexAttachments", False),
gmailEnabled=bool(google_surf.get("gmail", True)),
driveEnabled=bool(google_surf.get("drive", True)),
sharepointEnabled=bool(msft_surf.get("sharepoint", True)),
outlookEnabled=bool(msft_surf.get("outlook", True)),
maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS),
)
except Exception as exc:

View file

@ -23,7 +23,13 @@ import logging
import time
from dataclasses import dataclass, field
from datetime import datetime, timedelta, timezone
from typing import Any, Callable, Dict, List, Optional
from typing import Any, Dict, List, Optional
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
WalkerTimeout,
ingestWithTimeout,
logItemStart,
)
logger = logging.getLogger(__name__)
@ -150,8 +156,6 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -
"data": description,
"contextRef": {"part": "description"},
})
# text_content is ClickUp's rendered-markdown version; include if it adds
# something beyond the plain description (common for bullet lists, checklists).
textContent = _truncate(task.get("text_content"), limits.maxDescriptionChars)
if textContent and textContent != description:
parts.append({
@ -166,33 +170,35 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -
async def bootstrapClickup(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
dataSources: Optional[List[Dict[str, Any]]] = None,
progressCb: Optional[Any] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[ClickupBootstrapLimits] = None,
) -> Dict[str, Any]:
"""Walk workspaces → lists → tasks and ingest each task as a virtual doc."""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
"""Walk workspaces → lists → tasks and ingest each task as a virtual doc.
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
Each DataSource defines the neutralize policy for its subtree.
"""
if not dataSources:
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
if not limits:
limits = ClickupBootstrapLimits(
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
neutralize=prefs.neutralizeBeforeEmbed,
clickupScope=prefs.clickupScope,
)
limits = ClickupBootstrapLimits()
startMs = time.time()
result = ClickupBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=clickup connectionId=%s",
connectionId,
"ingestion.connection.bootstrap.started part=clickup connectionId=%s dataSources=%d",
connectionId, len(dataSources),
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "clickup",
"connectionId": connectionId,
"dataSourceCount": len(dataSources),
},
)
@ -215,30 +221,56 @@ async def bootstrapClickup(
return _finalizeResult(connectionId, result, startMs)
teams = (teamsResp or {}).get("teams") or []
for team in teams[: limits.maxWorkspaces]:
cancelled = False
for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
break
teamId = str(team.get("id", "") or "")
if not teamId:
continue
result.workspaces += 1
try:
await _walkTeam(
svc=svc,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
team=team,
limits=limits,
result=result,
progressCb=progressCb,
)
except Exception as exc:
logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
result.errors.append(f"team({teamId}): {exc}")
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
cancelled = True
break
return _finalizeResult(connectionId, result, startMs)
dsId = ds.get("id", "")
dsNeutralize = ds.get("neutralize", False)
dsLimits = ClickupBootstrapLimits(
maxTasks=limits.maxTasks,
maxWorkspaces=limits.maxWorkspaces,
maxListsPerWorkspace=limits.maxListsPerWorkspace,
maxDescriptionChars=limits.maxDescriptionChars,
maxAgeDays=limits.maxAgeDays,
includeClosed=limits.includeClosed,
neutralize=dsNeutralize,
clickupScope=limits.clickupScope,
)
for team in teams[:dsLimits.maxWorkspaces]:
if result.indexed + result.skippedDuplicate >= dsLimits.maxTasks:
break
teamId = str(team.get("id", "") or "")
if not teamId:
continue
result.workspaces += 1
try:
await _walkTeam(
svc=svc,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
team=team,
limits=dsLimits,
result=result,
progressCb=progressCb,
dataSourceId=dsId,
)
except Exception as exc:
logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
result.errors.append(f"team({teamId}): {exc}")
finalResult = _finalizeResult(connectionId, result, startMs)
if cancelled:
finalResult["cancelled"] = True
return finalResult
async def _resolveDependencies(connectionId: str):
@ -280,8 +312,12 @@ async def _walkTeam(
team: Dict[str, Any],
limits: ClickupBootstrapLimits,
result: ClickupBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
teamId = str(team.get("id", "") or "")
spacesResp = await svc.getSpaces(teamId)
spaces = (spacesResp or {}).get("spaces") or []
@ -294,14 +330,12 @@ async def _walkTeam(
if not spaceId:
continue
# Folderless lists directly under the space
folderless = await svc.getFolderlessLists(spaceId)
for lst in (folderless or {}).get("lists") or []:
if len(listsCollected) >= limits.maxListsPerWorkspace:
break
listsCollected.append({**lst, "_space": space})
# Lists inside folders
foldersResp = await svc.getFolders(spaceId)
for folder in (foldersResp or {}).get("folders") or []:
if len(listsCollected) >= limits.maxListsPerWorkspace:
@ -318,6 +352,8 @@ async def _walkTeam(
for lst in listsCollected:
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
return
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
result.lists += 1
await _walkList(
svc=svc,
@ -330,6 +366,7 @@ async def _walkTeam(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
@ -344,13 +381,16 @@ async def _walkList(
lst: Dict[str, Any],
limits: ClickupBootstrapLimits,
result: ClickupBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
listId = str(lst.get("id", "") or "")
if not listId:
return
page = 0
while result.indexed + result.skippedDuplicate < limits.maxTasks:
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
resp = await svc.getTasksInList(
listId,
page=page,
@ -371,7 +411,6 @@ async def _walkList(
if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
result.skippedPolicy += 1
continue
# Inject the list/folder/space metadata we already loaded.
task["list"] = task.get("list") or {"id": listId, "name": lst.get("name")}
task["folder"] = task.get("folder") or lst.get("_folder") or {}
task["space"] = task.get("space") or lst.get("_space") or {}
@ -385,9 +424,10 @@ async def _walkList(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
if len(tasks) < 100: # ClickUp page-size hint: fewer than 100 => last page
if len(tasks) < 100:
return
page += 1
@ -402,7 +442,8 @@ async def _ingestTask(
task: Dict[str, Any],
limits: ClickupBootstrapLimits,
result: ClickupBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -414,35 +455,44 @@ async def _ingestTask(
name = task.get("name") or f"Task {taskId}"
syntheticId = _syntheticTaskId(connectionId, taskId)
fileName = f"{name[:80].strip() or taskId}.task.json"
logItemStart("clickup", f"{teamId}/{taskId}")
contentObjects = _buildContentObjects(task, limits)
try:
handle = await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="clickup_task",
sourceId=syntheticId,
fileName=fileName,
mimeType="application/vnd.clickup.task+json",
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=revision or None,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"authority": "clickup",
"service": "clickup",
"externalItemId": taskId,
"teamId": teamId,
"listId": ((task.get("list") or {}).get("id")),
"spaceId": ((task.get("space") or {}).get("id")),
"url": task.get("url"),
"status": ((task.get("status") or {}).get("status")),
"tier": limits.clickupScope,
},
)
handle = await ingestWithTimeout(
knowledgeService.requestIngestion(
IngestionJob(
sourceKind="clickup_task",
sourceId=syntheticId,
fileName=fileName,
mimeType="application/vnd.clickup.task+json",
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=revision or None,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "clickup",
"service": "clickup",
"externalItemId": taskId,
"teamId": teamId,
"listId": ((task.get("list") or {}).get("id")),
"spaceId": ((task.get("space") or {}).get("id")),
"url": task.get("url"),
"status": ((task.get("status") or {}).get("status")),
"tier": limits.clickupScope,
},
)
),
label=taskId,
)
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
return
except Exception as exc:
logger.error("clickup ingestion %s failed: %s", taskId, exc, exc_info=True)
result.failed += 1
@ -456,17 +506,17 @@ async def _ingestTask(
else:
result.failed += 1
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
processed = result.indexed + result.skippedDuplicate
processed = result.indexed + result.skippedDuplicate
if progressCb is not None and processed % 5 == 0:
if hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
try:
progressCb(
min(90, 10 + int(80 * processed / max(1, limits.maxTasks))),
f"clickup processed={processed}",
)
progressCb(0, f"{processed} Tasks verarbeitet, {result.indexed} indexiert")
except Exception:
pass
logger.info(
"ingestion.connection.bootstrap.progress part=clickup processed=%d skippedDup=%d failed=%d",
if processed % 50 == 0:
logger.info(
"ingestion.connection.bootstrap.progress part=clickup processed=%d skippedDup=%d failed=%d",
processed, result.skippedDuplicate, result.failed,
extra={
"event": "ingestion.connection.bootstrap.progress",

View file

@ -12,6 +12,7 @@ via export), runs the standard extraction pipeline and routes results through
from __future__ import annotations
import asyncio
import hashlib
import logging
import time
@ -20,6 +21,13 @@ from datetime import datetime, timedelta, timezone
from typing import Any, Callable, Dict, List, Optional
from modules.datamodels.datamodelExtraction import ExtractionOptions
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
WalkerTimeout,
downloadWithTimeout,
extractWithTimeout,
ingestWithTimeout,
logItemStart,
)
logger = logging.getLogger(__name__)
@ -30,7 +38,6 @@ SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
MAX_DEPTH_DEFAULT = 4
MAX_AGE_DAYS_DEFAULT = 365
# Google Drive uses virtual mime-types for folders and non-downloadable assets.
FOLDER_MIME = "application/vnd.google-apps.folder"
@ -41,12 +48,8 @@ class GdriveBootstrapLimits:
maxFileSize: int = MAX_FILE_SIZE_DEFAULT
skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
maxDepth: int = MAX_DEPTH_DEFAULT
# Only ingest files modified within the last N days. None disables filter.
maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
# Pass-through to IngestionJob.neutralize
neutralize: bool = False
# Whether to skip binary/non-text files
filesIndexBinaries: bool = True
@dataclass
@ -95,10 +98,8 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
if not maxAgeDays:
return True
if not modifiedIso:
# No timestamp -> be permissive (Drive native docs sometimes omit it on export).
return True
try:
# Google returns RFC 3339 with `Z` or offset; python 3.11+ parses both.
ts = datetime.fromisoformat(modifiedIso.replace("Z", "+00:00"))
except Exception:
return True
@ -111,34 +112,36 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
async def bootstrapGdrive(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
dataSources: Optional[List[Dict[str, Any]]] = None,
progressCb: Optional[Any] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[GdriveBootstrapLimits] = None,
runExtractionFn: Optional[Callable[..., Any]] = None,
) -> Dict[str, Any]:
"""Walk My Drive starting from the virtual root folder."""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
"""Walk My Drive starting from the virtual root folder.
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
Each DataSource defines the root path + neutralize policy for its subtree.
"""
if not dataSources:
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
if not limits:
limits = GdriveBootstrapLimits(
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
neutralize=prefs.neutralizeBeforeEmbed,
filesIndexBinaries=prefs.filesIndexBinaries,
)
limits = GdriveBootstrapLimits()
startMs = time.time()
result = GdriveBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=gdrive connectionId=%s",
connectionId,
"ingestion.connection.bootstrap.started part=gdrive connectionId=%s dataSources=%d",
connectionId, len(dataSources),
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "gdrive",
"connectionId": connectionId,
"dataSourceCount": len(dataSources),
},
)
@ -158,25 +161,51 @@ async def bootstrapGdrive(
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
try:
await _walkFolder(
adapter=adapter,
knowledgeService=knowledgeService,
runExtractionFn=runExtractionFn,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderPath="/", # DriveAdapter.browse maps "" / "/" -> "root"
depth=0,
limits=limits,
result=result,
progressCb=progressCb,
)
except Exception as exc:
logger.error("gdrive walk failed for %s: %s", connectionId, exc, exc_info=True)
result.errors.append(f"walk: {exc}")
cancelled = False
for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxItems:
break
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
cancelled = True
break
return _finalizeResult(connectionId, result, startMs)
dsPath = ds.get("path", "/")
dsId = ds.get("id", "")
dsNeutralize = ds.get("neutralize", False)
dsMaxAgeDays = ds.get("maxAgeDays", limits.maxAgeDays)
dsLimits = GdriveBootstrapLimits(
maxItems=limits.maxItems,
maxBytes=limits.maxBytes,
maxFileSize=limits.maxFileSize,
skipMimePrefixes=limits.skipMimePrefixes,
maxDepth=limits.maxDepth,
maxAgeDays=dsMaxAgeDays,
neutralize=dsNeutralize,
)
try:
await _walkFolder(
adapter=adapter,
knowledgeService=knowledgeService,
runExtractionFn=runExtractionFn,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderPath=dsPath,
depth=0,
limits=dsLimits,
result=result,
progressCb=progressCb,
dataSourceId=dsId,
)
except Exception as exc:
logger.error("gdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
result.errors.append(f"walk({dsPath}): {exc}")
finalResult = _finalizeResult(connectionId, result, startMs)
if cancelled:
finalResult["cancelled"] = True
return finalResult
async def _resolveDependencies(connectionId: str):
@ -220,10 +249,13 @@ async def _walkFolder(
depth: int,
limits: GdriveBootstrapLimits,
result: GdriveBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
if depth > limits.maxDepth:
return
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
try:
entries = await adapter.browse(folderPath)
except Exception as exc:
@ -236,6 +268,8 @@ async def _walkFolder(
return
if result.bytesProcessed >= limits.maxBytes:
return
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
return
entryPath = getattr(entry, "path", "") or ""
metadata = getattr(entry, "metadata", {}) or {}
@ -254,6 +288,7 @@ async def _walkFolder(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
continue
@ -288,6 +323,7 @@ async def _walkFolder(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
@ -306,29 +342,35 @@ async def _ingestOne(
revision: Optional[str],
limits: GdriveBootstrapLimits,
result: GdriveBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
syntheticFileId = _syntheticFileId(connectionId, externalItemId)
fileName = getattr(entry, "name", "") or externalItemId
declaredSize = int(getattr(entry, "size", 0) or 0) or None
logItemStart("gdrive", entryPath, sizeBytes=declaredSize, mime=mimeType)
try:
downloaded = await adapter.download(entryPath)
downloaded = await downloadWithTimeout(adapter.download(entryPath), label=entryPath)
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
return
except Exception as exc:
logger.warning("gdrive download %s failed: %s", entryPath, exc)
result.failed += 1
result.errors.append(f"download({entryPath}): {exc}")
return
# Adapter.download returns raw bytes today; guard DownloadResult shape too.
fileBytes: bytes
if isinstance(downloaded, (bytes, bytearray)):
fileBytes = bytes(downloaded)
else:
fileBytes = bytes(getattr(downloaded, "data", b"") or b"")
if getattr(downloaded, "mimeType", None):
mimeType = downloaded.mimeType # export may have changed the type
mimeType = downloaded.mimeType
if not fileBytes:
result.failed += 1
return
@ -339,10 +381,16 @@ async def _ingestOne(
result.bytesProcessed += len(fileBytes)
try:
extracted = runExtractionFn(
extracted = await extractWithTimeout(
runExtractionFn,
fileBytes, fileName, mimeType,
ExtractionOptions(mergeStrategy=None),
label=entryPath,
)
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
return
except Exception as exc:
logger.warning("gdrive extraction %s failed: %s", entryPath, exc)
result.failed += 1
@ -354,28 +402,37 @@ async def _ingestOne(
result.skippedPolicy += 1
return
provenance: Dict[str, Any] = {
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "google",
"service": "drive",
"externalItemId": externalItemId,
"entryPath": entryPath,
"tier": "body",
}
try:
handle = await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="gdrive_item",
sourceId=syntheticFileId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=revision,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"authority": "google",
"service": "drive",
"externalItemId": externalItemId,
"entryPath": entryPath,
"tier": "body",
},
)
handle = await ingestWithTimeout(
knowledgeService.requestIngestion(
IngestionJob(
sourceKind="gdrive_item",
sourceId=syntheticFileId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=revision,
neutralize=limits.neutralize,
provenance=provenance,
)
),
label=entryPath,
)
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
return
except Exception as exc:
logger.error("gdrive ingestion %s failed: %s", entryPath, exc, exc_info=True)
result.failed += 1
@ -388,14 +445,13 @@ async def _ingestOne(
result.indexed += 1
else:
result.failed += 1
if handle.error:
result.errors.append(f"ingest({entryPath}): {handle.error}")
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
processed = result.indexed + result.skippedDuplicate
processed = result.indexed + result.skippedDuplicate
if progressCb is not None and processed % 5 == 0:
try:
progressCb(
min(90, 10 + int(80 * processed / max(1, limits.maxItems))),
f"gdrive processed={processed}",
)
progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert")
except Exception:
pass
logger.info(
@ -411,6 +467,8 @@ async def _ingestOne(
},
)
await asyncio.sleep(0)
def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
durationMs = int((time.time() - startMs) * 1000)

View file

@ -24,6 +24,11 @@ from datetime import datetime, timedelta, timezone
from typing import Any, Callable, Dict, List, Optional
from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
WalkerTimeout,
ingestWithTimeout,
logItemStart,
)
logger = logging.getLogger(__name__)
@ -175,35 +180,36 @@ def _buildContentObjects(
async def bootstrapGmail(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
dataSources: Optional[List[Dict[str, Any]]] = None,
progressCb: Optional[Any] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[GmailBootstrapLimits] = None,
googleGetFn: Optional[Callable[..., Any]] = None,
) -> Dict[str, Any]:
"""Enumerate Gmail labels (INBOX + SENT default) and ingest messages."""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
"""Enumerate Gmail labels (INBOX + SENT default) and ingest messages.
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
Each DataSource defines the neutralize policy for its scope.
"""
if not dataSources:
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
if not limits:
limits = GmailBootstrapLimits(
includeAttachments=prefs.mailIndexAttachments,
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
mailContentDepth=prefs.mailContentDepth,
neutralize=prefs.neutralizeBeforeEmbed,
)
limits = GmailBootstrapLimits()
startMs = time.time()
result = GmailBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=gmail connectionId=%s",
connectionId,
"ingestion.connection.bootstrap.started part=gmail connectionId=%s dataSources=%d",
connectionId, len(dataSources),
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "gmail",
"connectionId": connectionId,
"dataSourceCount": len(dataSources),
},
)
@ -221,26 +227,51 @@ async def bootstrapGmail(
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
for labelId in limits.labels:
cancelled = False
for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
break
try:
await _ingestLabel(
googleGetFn=googleGetFn,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
labelId=labelId,
limits=limits,
result=result,
progressCb=progressCb,
)
except Exception as exc:
logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
result.errors.append(f"label({labelId}): {exc}")
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
cancelled = True
break
return _finalizeResult(connectionId, result, startMs)
dsId = ds.get("id", "")
dsNeutralize = ds.get("neutralize", False)
dsLimits = GmailBootstrapLimits(
maxMessages=limits.maxMessages,
labels=limits.labels,
maxBodyChars=limits.maxBodyChars,
includeAttachments=limits.includeAttachments,
maxAttachmentBytes=limits.maxAttachmentBytes,
maxAgeDays=limits.maxAgeDays,
mailContentDepth=limits.mailContentDepth,
neutralize=dsNeutralize,
)
for labelId in dsLimits.labels:
if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages:
break
try:
await _ingestLabel(
googleGetFn=googleGetFn,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
labelId=labelId,
limits=dsLimits,
result=result,
progressCb=progressCb,
dataSourceId=dsId,
)
except Exception as exc:
logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
result.errors.append(f"label({labelId}): {exc}")
finalResult = _finalizeResult(connectionId, result, startMs)
if cancelled:
finalResult["cancelled"] = True
return finalResult
async def _resolveDependencies(connectionId: str):
@ -282,7 +313,8 @@ async def _ingestLabel(
labelId: str,
limits: GmailBootstrapLimits,
result: GmailBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
if remaining <= 0:
@ -316,6 +348,8 @@ async def _ingestLabel(
for stub in messageStubs:
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
break
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
return
msgId = stub.get("id")
if not msgId:
continue
@ -337,6 +371,7 @@ async def _ingestLabel(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
nextPageToken = page.get("nextPageToken")
@ -355,7 +390,8 @@ async def _ingestMessage(
message: Dict[str, Any],
limits: GmailBootstrapLimits,
result: GmailBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -368,33 +404,42 @@ async def _ingestMessage(
subject = headers.get("subject") or "(no subject)"
syntheticId = _syntheticMessageId(connectionId, messageId)
fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml"
logItemStart("gmail", f"{labelId}/{messageId}", mime="message/rfc822")
contentObjects = _buildContentObjects(
message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth
)
try:
handle = await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="gmail_message",
sourceId=syntheticId,
fileName=fileName,
mimeType="message/rfc822",
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=str(revision) if revision else None,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"authority": "google",
"service": "gmail",
"externalItemId": messageId,
"label": labelId,
"threadId": message.get("threadId"),
"tier": limits.mailContentDepth,
},
)
handle = await ingestWithTimeout(
knowledgeService.requestIngestion(
IngestionJob(
sourceKind="gmail_message",
sourceId=syntheticId,
fileName=fileName,
mimeType="message/rfc822",
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=str(revision) if revision else None,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "google",
"service": "gmail",
"externalItemId": messageId,
"label": labelId,
"threadId": message.get("threadId"),
"tier": limits.mailContentDepth,
},
)
),
label=messageId,
)
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
return
except Exception as exc:
logger.error("gmail ingestion %s failed: %s", messageId, exc, exc_info=True)
result.failed += 1
@ -420,23 +465,22 @@ async def _ingestMessage(
parentSyntheticId=syntheticId,
limits=limits,
result=result,
dataSourceId=dataSourceId,
)
except Exception as exc:
logger.warning("gmail attachments %s failed: %s", messageId, exc)
result.errors.append(f"attachments({messageId}): {exc}")
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
processed = result.indexed + result.skippedDuplicate
processed = result.indexed + result.skippedDuplicate
if progressCb is not None and processed % 5 == 0:
try:
progressCb(
min(90, 10 + int(80 * processed / max(1, limits.maxMessages))),
f"gmail processed={processed}",
)
progressCb(0, f"{processed} Mails verarbeitet, {result.indexed} indexiert")
except Exception:
pass
logger.info(
"ingestion.connection.bootstrap.progress part=gmail processed=%d skippedDup=%d failed=%d",
processed, result.skippedDuplicate, result.failed,
if processed % 50 == 0:
logger.info(
"ingestion.connection.bootstrap.progress part=gmail processed=%d skippedDup=%d failed=%d",
processed, result.skippedDuplicate, result.failed,
extra={
"event": "ingestion.connection.bootstrap.progress",
"part": "gmail",
@ -461,6 +505,7 @@ async def _ingestAttachments(
parentSyntheticId: str,
limits: GmailBootstrapLimits,
result: GmailBootstrapResult,
dataSourceId: str = "",
) -> None:
"""Child ingestion jobs for file attachments. Skips inline images (cid: refs)."""
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -512,13 +557,26 @@ async def _ingestAttachments(
fileName = stub["filename"]
mimeType = stub["mimeType"]
syntheticId = _syntheticAttachmentId(connectionId, messageId, stub["attachmentId"])
attLabel = f"{messageId}/att:{stub['attachmentId']}/{fileName}"
logItemStart("gmail-attachment", attLabel, sizeBytes=stub.get("size") or None, mime=mimeType)
try:
extracted = runExtraction(
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
extractWithTimeout as _extractWithTimeout,
)
def _runAttExtraction():
return runExtraction(
extractorRegistry, chunkerRegistry,
rawBytes, fileName, mimeType,
ExtractionOptions(mergeStrategy=None),
)
try:
extracted = await _extractWithTimeout(_runAttExtraction, label=attLabel)
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
continue
except Exception as exc:
logger.warning("gmail attachment extract %s failed: %s", stub["attachmentId"], exc)
result.failed += 1
@ -550,26 +608,33 @@ async def _ingestAttachments(
continue
try:
await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="gmail_attachment",
sourceId=syntheticId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
provenance={
"connectionId": connectionId,
"authority": "google",
"service": "gmail",
"parentId": parentSyntheticId,
"externalItemId": stub["attachmentId"],
"parentMessageId": messageId,
},
)
await ingestWithTimeout(
knowledgeService.requestIngestion(
IngestionJob(
sourceKind="gmail_attachment",
sourceId=syntheticId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
provenance={
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "google",
"service": "gmail",
"parentId": parentSyntheticId,
"externalItemId": stub["attachmentId"],
"parentMessageId": messageId,
},
)
),
label=attLabel,
)
result.attachmentsIndexed += 1
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
except Exception as exc:
logger.warning("gmail attachment ingest %s failed: %s", stub["attachmentId"], exc)
result.failed += 1

View file

@ -0,0 +1,439 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""kDrive bootstrap for the unified knowledge ingestion lane.
Walks every ragIndexEnabled kDrive DataSource, downloads file items and
hands them to KnowledgeService.requestIngestion. Idempotency is provided
by the ingestion facade (content-hash dedup).
"""
from __future__ import annotations
import asyncio
import hashlib
import logging
import time
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional
from modules.datamodels.datamodelExtraction import ExtractionOptions
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
WalkerTimeout,
downloadWithTimeout,
extractWithTimeout,
ingestWithTimeout,
logItemStart,
)
logger = logging.getLogger(__name__)
MAX_ITEMS_DEFAULT = 500
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
MAX_DEPTH_DEFAULT = 4
@dataclass
class KdriveBootstrapLimits:
maxItems: int = MAX_ITEMS_DEFAULT
maxBytes: int = MAX_BYTES_DEFAULT
maxFileSize: int = MAX_FILE_SIZE_DEFAULT
skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
maxDepth: int = MAX_DEPTH_DEFAULT
neutralize: bool = False
@dataclass
class KdriveBootstrapResult:
connectionId: str
indexed: int = 0
skippedDuplicate: int = 0
skippedPolicy: int = 0
failed: int = 0
bytesProcessed: int = 0
errors: List[str] = field(default_factory=list)
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
token = hashlib.sha256(f"{connectionId}:{externalItemId}".encode("utf-8")).hexdigest()[:16]
return f"kd:{connectionId[:8]}:{token}"
def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
parts = getattr(extracted, "parts", None) or []
out: List[Dict[str, Any]] = []
for part in parts:
data = getattr(part, "data", None) or ""
if not data or not str(data).strip():
continue
typeGroup = getattr(part, "typeGroup", "text") or "text"
contentType = "text"
if typeGroup == "image":
contentType = "image"
elif typeGroup in ("binary", "container"):
contentType = "other"
out.append({
"contentObjectId": getattr(part, "id", ""),
"contentType": contentType,
"data": data,
"contextRef": {
"containerPath": fileName,
"location": getattr(part, "label", None) or "file",
**(getattr(part, "metadata", None) or {}),
},
})
return out
async def bootstrapKdrive(
connectionId: str,
*,
dataSources: Optional[List[Dict[str, Any]]] = None,
progressCb: Optional[Any] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[KdriveBootstrapLimits] = None,
runExtractionFn: Optional[Callable[..., Any]] = None,
) -> Dict[str, Any]:
"""Enumerate kDrive folders and ingest files via the facade."""
if not dataSources:
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
if not limits:
limits = KdriveBootstrapLimits()
startMs = time.time()
result = KdriveBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=kdrive connectionId=%s dataSources=%d",
connectionId, len(dataSources),
extra={"event": "ingestion.connection.bootstrap.started", "part": "kdrive",
"connectionId": connectionId, "dataSourceCount": len(dataSources)},
)
if adapter is None or knowledgeService is None or connection is None:
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
if runExtractionFn is None:
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
ExtractorRegistry, ChunkerRegistry,
)
extractorRegistry = ExtractorRegistry()
chunkerRegistry = ChunkerRegistry()
def runExtractionFn(bytesData, name, mime, options):
return runExtraction(extractorRegistry, chunkerRegistry, bytesData, name, mime, options)
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
cancelled = False
for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxItems:
break
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
cancelled = True
break
dsPath = ds.get("path", "")
dsId = ds.get("id", "")
dsNeutralize = ds.get("neutralize", False)
dsLimits = KdriveBootstrapLimits(
maxItems=limits.maxItems,
maxBytes=limits.maxBytes,
maxFileSize=limits.maxFileSize,
skipMimePrefixes=limits.skipMimePrefixes,
maxDepth=limits.maxDepth,
neutralize=dsNeutralize,
)
try:
await _walkFolder(
adapter=adapter,
knowledgeService=knowledgeService,
runExtractionFn=runExtractionFn,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderPath=dsPath,
depth=0,
limits=dsLimits,
result=result,
progressCb=progressCb,
dataSourceId=dsId,
)
except Exception as exc:
logger.error("kdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
result.errors.append(f"walk({dsPath}): {exc}")
finalResult = _finalizeResult(connectionId, result, startMs)
if cancelled:
finalResult["cancelled"] = True
return finalResult
async def _resolveDependencies(connectionId: str):
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.auth import TokenManager
from modules.connectors.providerInfomaniak.connectorInfomaniak import InfomaniakConnector
from modules.serviceCenter import getService
from modules.serviceCenter.context import ServiceCenterContext
from modules.security.rootAccess import getRootUser
rootInterface = getRootInterface()
connection = rootInterface.getUserConnectionById(connectionId)
if connection is None:
raise ValueError(f"UserConnection not found: {connectionId}")
token = TokenManager().getFreshToken(connectionId)
if not token or not token.tokenAccess:
raise ValueError(f"No valid token for connection {connectionId}")
provider = InfomaniakConnector(connection, token.tokenAccess)
adapter = provider.getServiceAdapter("kdrive")
rootUser = getRootUser()
ctx = ServiceCenterContext(
user=rootUser,
mandate_id=str(getattr(connection, "mandateId", "") or ""),
)
knowledgeService = getService("knowledge", ctx)
return adapter, connection, knowledgeService
async def _walkFolder(
*,
adapter,
knowledgeService,
runExtractionFn,
connectionId: str,
mandateId: str,
userId: str,
folderPath: str,
depth: int,
limits: KdriveBootstrapLimits,
result: KdriveBootstrapResult,
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
if depth > limits.maxDepth:
return
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
try:
entries = await adapter.browse(folderPath)
except Exception as exc:
logger.warning("kdrive browse %s failed: %s", folderPath, exc)
result.errors.append(f"browse({folderPath}): {exc}")
return
for entry in entries:
if result.indexed + result.skippedDuplicate >= limits.maxItems:
return
if result.bytesProcessed >= limits.maxBytes:
return
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
return
entryPath = getattr(entry, "path", "") or ""
if getattr(entry, "isFolder", False):
await _walkFolder(
adapter=adapter,
knowledgeService=knowledgeService,
runExtractionFn=runExtractionFn,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderPath=entryPath,
depth=depth + 1,
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
continue
mimeType = getattr(entry, "mimeType", None) or "application/octet-stream"
if any(mimeType.startswith(prefix) for prefix in limits.skipMimePrefixes):
result.skippedPolicy += 1
continue
size = int(getattr(entry, "size", 0) or 0)
if size and size > limits.maxFileSize:
result.skippedPolicy += 1
continue
metadata = getattr(entry, "metadata", {}) or {}
externalItemId = metadata.get("id") or entryPath
revision = metadata.get("revision") or metadata.get("lastModified")
await _ingestOne(
adapter=adapter,
knowledgeService=knowledgeService,
runExtractionFn=runExtractionFn,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
entry=entry,
entryPath=entryPath,
mimeType=mimeType,
externalItemId=externalItemId,
revision=revision,
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
async def _ingestOne(
*,
adapter,
knowledgeService,
runExtractionFn,
connectionId: str,
mandateId: str,
userId: str,
entry,
entryPath: str,
mimeType: str,
externalItemId: str,
revision: Optional[str],
limits: KdriveBootstrapLimits,
result: KdriveBootstrapResult,
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
syntheticFileId = _syntheticFileId(connectionId, externalItemId)
fileName = getattr(entry, "name", "") or externalItemId
declaredSize = int(getattr(entry, "size", 0) or 0) or None
logItemStart("kdrive", entryPath, sizeBytes=declaredSize, mime=mimeType)
try:
downloadResult = await downloadWithTimeout(adapter.download(entryPath), label=entryPath)
fileBytes = getattr(downloadResult, "data", None)
dlFileName = getattr(downloadResult, "fileName", None)
dlMimeType = getattr(downloadResult, "mimeType", None)
if dlFileName:
fileName = dlFileName
if dlMimeType:
mimeType = dlMimeType
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
return
except Exception as exc:
logger.warning("kdrive download %s failed: %s", entryPath, exc)
result.failed += 1
result.errors.append(f"download({entryPath}): {exc}")
return
if not fileBytes:
result.failed += 1
return
result.bytesProcessed += len(fileBytes)
try:
extracted = await extractWithTimeout(
runExtractionFn,
fileBytes, fileName, mimeType,
ExtractionOptions(mergeStrategy=None),
label=entryPath,
)
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
return
except Exception as exc:
logger.warning("kdrive extraction %s failed: %s", entryPath, exc)
result.failed += 1
result.errors.append(f"extract({entryPath}): {exc}")
return
contentObjects = _toContentObjects(extracted, fileName)
if not contentObjects:
result.skippedPolicy += 1
return
provenance: Dict[str, Any] = {
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "infomaniak",
"service": "kdrive",
"externalItemId": externalItemId,
"externalPath": entryPath,
"revision": revision,
}
try:
handle = await ingestWithTimeout(
knowledgeService.requestIngestion(
IngestionJob(
sourceKind="kdrive_item",
sourceId=syntheticFileId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=revision,
neutralize=limits.neutralize,
provenance=provenance,
)
),
label=entryPath,
)
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
return
except Exception as exc:
logger.error("kdrive ingestion %s failed: %s", entryPath, exc, exc_info=True)
result.failed += 1
result.errors.append(f"ingest({entryPath}): {exc}")
return
if handle.status == "duplicate":
result.skippedDuplicate += 1
elif handle.status == "indexed":
result.indexed += 1
else:
result.failed += 1
if handle.error:
result.errors.append(f"ingest({entryPath}): {handle.error}")
processed = result.indexed + result.skippedDuplicate
if progressCb is not None and processed % 5 == 0:
try:
progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert")
except Exception:
pass
await asyncio.sleep(0)
def _finalizeResult(connectionId: str, result: KdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
durationMs = int((time.time() - startMs) * 1000)
logger.info(
"ingestion.connection.bootstrap.done part=kdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d",
connectionId,
result.indexed, result.skippedDuplicate, result.skippedPolicy, result.failed,
durationMs,
extra={"event": "ingestion.connection.bootstrap.done", "part": "kdrive",
"connectionId": connectionId, "indexed": result.indexed,
"skippedDup": result.skippedDuplicate, "skippedPolicy": result.skippedPolicy,
"failed": result.failed, "durationMs": durationMs},
)
return {
"connectionId": result.connectionId,
"indexed": result.indexed,
"skippedDuplicate": result.skippedDuplicate,
"skippedPolicy": result.skippedPolicy,
"failed": result.failed,
"bytesProcessed": result.bytesProcessed,
"durationMs": durationMs,
"errors": result.errors[:20],
}

View file

@ -18,9 +18,15 @@ import hashlib
import logging
import time
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional
from typing import Any, Dict, List, Optional
from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
WalkerTimeout,
extractWithTimeout,
ingestWithTimeout,
logItemStart,
)
logger = logging.getLogger(__name__)
@ -139,34 +145,35 @@ def _buildContentObjects(
async def bootstrapOutlook(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
dataSources: Optional[List[Dict[str, Any]]] = None,
progressCb: Optional[Any] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[OutlookBootstrapLimits] = None,
) -> Dict[str, Any]:
"""Enumerate Outlook folders (inbox + sent by default) and ingest messages."""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
"""Enumerate Outlook folders (inbox + sent by default) and ingest messages.
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
Each DataSource defines the neutralize policy for its messages.
"""
if not dataSources:
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
if not limits:
limits = OutlookBootstrapLimits(
includeAttachments=prefs.mailIndexAttachments,
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
mailContentDepth=prefs.mailContentDepth,
neutralize=prefs.neutralizeBeforeEmbed,
)
limits = OutlookBootstrapLimits()
startMs = time.time()
result = OutlookBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=outlook connectionId=%s",
connectionId,
"ingestion.connection.bootstrap.started part=outlook connectionId=%s dataSources=%d",
connectionId, len(dataSources),
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "outlook",
"connectionId": connectionId,
"dataSourceCount": len(dataSources),
},
)
@ -176,27 +183,52 @@ async def bootstrapOutlook(
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
folderIds = await _selectFolderIds(adapter, limits)
for folderId in folderIds:
cancelled = False
for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
break
try:
await _ingestFolder(
adapter=adapter,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderId=folderId,
limits=limits,
result=result,
progressCb=progressCb,
)
except Exception as exc:
logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
result.errors.append(f"folder({folderId}): {exc}")
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
cancelled = True
break
return _finalizeResult(connectionId, result, startMs)
dsId = ds.get("id", "")
dsNeutralize = ds.get("neutralize", False)
dsLimits = OutlookBootstrapLimits(
maxMessages=limits.maxMessages,
maxFolders=limits.maxFolders,
maxBodyChars=limits.maxBodyChars,
includeAttachments=limits.includeAttachments,
maxAttachmentBytes=limits.maxAttachmentBytes,
maxAgeDays=limits.maxAgeDays,
mailContentDepth=limits.mailContentDepth,
neutralize=dsNeutralize,
)
folderIds = await _selectFolderIds(adapter, dsLimits)
for folderId in folderIds:
if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages:
break
try:
await _ingestFolder(
adapter=adapter,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderId=folderId,
limits=dsLimits,
result=result,
progressCb=progressCb,
dataSourceId=dsId,
)
except Exception as exc:
logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
result.errors.append(f"folder({folderId}): {exc}")
finalResult = _finalizeResult(connectionId, result, startMs)
if cancelled:
finalResult["cancelled"] = True
return finalResult
async def _resolveDependencies(connectionId: str):
@ -266,8 +298,12 @@ async def _ingestFolder(
folderId: str,
limits: OutlookBootstrapLimits,
result: OutlookBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
if remaining <= 0:
return
@ -307,6 +343,8 @@ async def _ingestFolder(
for message in page.get("value", []) or []:
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
break
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
return
await _ingestMessage(
adapter=adapter,
knowledgeService=knowledgeService,
@ -317,6 +355,7 @@ async def _ingestFolder(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
nextLink = page.get("@odata.nextLink")
@ -338,7 +377,8 @@ async def _ingestMessage(
message: Dict[str, Any],
limits: OutlookBootstrapLimits,
result: OutlookBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -350,33 +390,42 @@ async def _ingestMessage(
subject = message.get("subject") or "(no subject)"
syntheticId = _syntheticMessageId(connectionId, messageId)
fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml"
logItemStart("outlook", messageId, mime="message/rfc822")
contentObjects = _buildContentObjects(
message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth
)
# Always at least the header is emitted, so `contentObjects` is non-empty.
try:
handle = await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="outlook_message",
sourceId=syntheticId,
fileName=fileName,
mimeType="message/rfc822",
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=revision,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"authority": "msft",
"service": "outlook",
"externalItemId": messageId,
"internetMessageId": message.get("internetMessageId"),
"tier": limits.mailContentDepth,
},
)
handle = await ingestWithTimeout(
knowledgeService.requestIngestion(
IngestionJob(
sourceKind="outlook_message",
sourceId=syntheticId,
fileName=fileName,
mimeType="message/rfc822",
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=revision,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "msft",
"service": "outlook",
"externalItemId": messageId,
"internetMessageId": message.get("internetMessageId"),
"tier": limits.mailContentDepth,
},
)
),
label=messageId,
)
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
return
except Exception as exc:
logger.error("outlook ingestion %s failed: %s", messageId, exc, exc_info=True)
result.failed += 1
@ -402,23 +451,22 @@ async def _ingestMessage(
parentSyntheticId=syntheticId,
limits=limits,
result=result,
dataSourceId=dataSourceId,
)
except Exception as exc:
logger.warning("outlook attachments %s failed: %s", messageId, exc)
result.errors.append(f"attachments({messageId}): {exc}")
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
processed = result.indexed + result.skippedDuplicate
processed = result.indexed + result.skippedDuplicate
if progressCb is not None and processed % 5 == 0:
try:
progressCb(
min(90, 10 + int(80 * processed / max(1, limits.maxMessages))),
f"outlook processed={processed}",
)
progressCb(0, f"{processed} Mails verarbeitet, {result.indexed} indexiert")
except Exception:
pass
logger.info(
"ingestion.connection.bootstrap.progress part=outlook processed=%d skippedDup=%d failed=%d",
processed, result.skippedDuplicate, result.failed,
if processed % 50 == 0:
logger.info(
"ingestion.connection.bootstrap.progress part=outlook processed=%d skippedDup=%d failed=%d",
processed, result.skippedDuplicate, result.failed,
extra={
"event": "ingestion.connection.bootstrap.progress",
"part": "outlook",
@ -443,6 +491,7 @@ async def _ingestAttachments(
parentSyntheticId: str,
limits: OutlookBootstrapLimits,
result: OutlookBootstrapResult,
dataSourceId: str = "",
) -> None:
"""Child ingestion jobs for file attachments (skip inline & oversized)."""
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -481,13 +530,22 @@ async def _ingestAttachments(
mimeType = attachment.get("contentType") or "application/octet-stream"
attachmentId = attachment.get("id") or fileName
syntheticId = _syntheticAttachmentId(connectionId, messageId, attachmentId)
attLabel = f"{messageId}/att:{attachmentId}/{fileName}"
logItemStart("outlook-attachment", attLabel, sizeBytes=size or None, mime=mimeType)
try:
extracted = runExtraction(
def _runAttExtraction():
return runExtraction(
extractorRegistry, chunkerRegistry,
rawBytes, fileName, mimeType,
ExtractionOptions(mergeStrategy=None),
)
try:
extracted = await extractWithTimeout(_runAttExtraction, label=attLabel)
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
continue
except Exception as exc:
logger.warning("outlook attachment extract %s failed: %s", attachmentId, exc)
result.failed += 1
@ -519,27 +577,34 @@ async def _ingestAttachments(
continue
try:
await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="outlook_attachment",
sourceId=syntheticId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"authority": "msft",
"service": "outlook",
"parentId": parentSyntheticId,
"externalItemId": attachmentId,
"parentMessageId": messageId,
},
)
await ingestWithTimeout(
knowledgeService.requestIngestion(
IngestionJob(
sourceKind="outlook_attachment",
sourceId=syntheticId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "msft",
"service": "outlook",
"parentId": parentSyntheticId,
"externalItemId": attachmentId,
"parentMessageId": messageId,
},
)
),
label=attLabel,
)
result.attachmentsIndexed += 1
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
except Exception as exc:
logger.warning("outlook attachment ingest %s failed: %s", attachmentId, exc)
result.failed += 1

View file

@ -20,6 +20,13 @@ from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional
from modules.datamodels.datamodelExtraction import ExtractionOptions
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
WalkerTimeout,
downloadWithTimeout,
extractWithTimeout,
ingestWithTimeout,
logItemStart,
)
logger = logging.getLogger(__name__)
@ -94,35 +101,36 @@ def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
async def bootstrapSharepoint(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
dataSources: Optional[List[Dict[str, Any]]] = None,
progressCb: Optional[Any] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[SharepointBootstrapLimits] = None,
runExtractionFn: Optional[Callable[..., Any]] = None,
) -> Dict[str, Any]:
"""Enumerate SharePoint drives and ingest every reachable file via the façade.
"""Enumerate SharePoint drives and ingest files via the facade.
Parameters allow injection for tests; production callers pass only
`connectionId` (and optionally a progressCb) and everything else is
resolved against the registered services.
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
Each DataSource defines the root path + neutralize policy for its subtree.
"""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
if not dataSources:
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
if not limits:
limits = SharepointBootstrapLimits(neutralize=prefs.neutralizeBeforeEmbed)
limits = SharepointBootstrapLimits()
startMs = time.time()
result = SharepointBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=sharepoint connectionId=%s",
connectionId,
"ingestion.connection.bootstrap.started part=sharepoint connectionId=%s dataSources=%d",
connectionId, len(dataSources),
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "sharepoint",
"connectionId": connectionId,
"dataSourceCount": len(dataSources),
},
)
@ -142,17 +150,27 @@ async def bootstrapSharepoint(
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
try:
sites = await adapter.browse("/", limit=limits.maxSites)
except Exception as exc:
logger.error("sharepoint site discovery failed for %s: %s", connectionId, exc, exc_info=True)
result.errors.append(f"site_discovery: {exc}")
return _finalizeResult(connectionId, result, startMs)
for site in sites[: limits.maxSites]:
cancelled = False
for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxItems:
break
sitePath = getattr(site, "path", "") or ""
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
cancelled = True
break
dsPath = ds.get("path", "")
dsId = ds.get("id", "")
dsNeutralize = ds.get("neutralize", False)
dsLimits = SharepointBootstrapLimits(
maxItems=limits.maxItems,
maxBytes=limits.maxBytes,
maxFileSize=limits.maxFileSize,
skipMimePrefixes=limits.skipMimePrefixes,
maxDepth=limits.maxDepth,
maxSites=limits.maxSites,
neutralize=dsNeutralize,
)
try:
await _walkFolder(
adapter=adapter,
@ -161,17 +179,21 @@ async def bootstrapSharepoint(
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderPath=sitePath,
folderPath=dsPath,
depth=0,
limits=limits,
limits=dsLimits,
result=result,
progressCb=progressCb,
dataSourceId=dsId,
)
except Exception as exc:
logger.error("sharepoint walk failed for site %s: %s", sitePath, exc, exc_info=True)
result.errors.append(f"walk({sitePath}): {exc}")
logger.error("sharepoint walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
result.errors.append(f"walk({dsPath}): {exc}")
return _finalizeResult(connectionId, result, startMs)
finalResult = _finalizeResult(connectionId, result, startMs)
if cancelled:
finalResult["cancelled"] = True
return finalResult
async def _resolveDependencies(connectionId: str):
@ -221,10 +243,13 @@ async def _walkFolder(
depth: int,
limits: SharepointBootstrapLimits,
result: SharepointBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
if depth > limits.maxDepth:
return
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
try:
entries = await adapter.browse(folderPath)
except Exception as exc:
@ -237,6 +262,8 @@ async def _walkFolder(
return
if result.bytesProcessed >= limits.maxBytes:
return
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
return
entryPath = getattr(entry, "path", "") or ""
if getattr(entry, "isFolder", False):
@ -252,6 +279,7 @@ async def _walkFolder(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
continue
@ -283,6 +311,7 @@ async def _walkFolder(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
@ -301,15 +330,22 @@ async def _ingestOne(
revision: Optional[str],
limits: SharepointBootstrapLimits,
result: SharepointBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
syntheticFileId = _syntheticFileId(connectionId, externalItemId)
fileName = getattr(entry, "name", "") or externalItemId
declaredSize = int(getattr(entry, "size", 0) or 0) or None
logItemStart("sharepoint", entryPath, sizeBytes=declaredSize, mime=mimeType)
try:
fileBytes = await adapter.download(entryPath)
fileBytes = await downloadWithTimeout(adapter.download(entryPath), label=entryPath)
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
return
except Exception as exc:
logger.warning("sharepoint download %s failed: %s", entryPath, exc)
result.failed += 1
@ -322,10 +358,16 @@ async def _ingestOne(
result.bytesProcessed += len(fileBytes)
try:
extracted = runExtractionFn(
extracted = await extractWithTimeout(
runExtractionFn,
fileBytes, fileName, mimeType,
ExtractionOptions(mergeStrategy=None),
label=entryPath,
)
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
return
except Exception as exc:
logger.warning("sharepoint extraction %s failed: %s", entryPath, exc)
result.failed += 1
@ -339,6 +381,7 @@ async def _ingestOne(
provenance: Dict[str, Any] = {
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "msft",
"service": "sharepoint",
"externalItemId": externalItemId,
@ -346,20 +389,27 @@ async def _ingestOne(
"revision": revision,
}
try:
handle = await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="sharepoint_item",
sourceId=syntheticFileId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=revision,
neutralize=limits.neutralize,
provenance=provenance,
)
handle = await ingestWithTimeout(
knowledgeService.requestIngestion(
IngestionJob(
sourceKind="sharepoint_item",
sourceId=syntheticFileId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=revision,
neutralize=limits.neutralize,
provenance=provenance,
)
),
label=entryPath,
)
except WalkerTimeout as exc:
result.failed += 1
result.errors.append(str(exc))
return
except Exception as exc:
logger.error("sharepoint ingestion %s failed: %s", entryPath, exc, exc_info=True)
result.failed += 1
@ -375,27 +425,17 @@ async def _ingestOne(
if handle.error:
result.errors.append(f"ingest({entryPath}): {handle.error}")
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
processed = result.indexed + result.skippedDuplicate
processed = result.indexed + result.skippedDuplicate
if progressCb is not None and processed % 5 == 0:
try:
progressCb(
min(90, 10 + int(80 * processed / max(1, limits.maxItems))),
f"sharepoint processed={processed}",
)
progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert")
except Exception:
pass
logger.info(
"ingestion.connection.bootstrap.progress part=sharepoint processed=%d skippedDup=%d failed=%d",
processed, result.skippedDuplicate, result.failed,
extra={
"event": "ingestion.connection.bootstrap.progress",
"part": "sharepoint",
"connectionId": connectionId,
"processed": processed,
"skippedDup": result.skippedDuplicate,
"failed": result.failed,
},
)
if processed % 50 == 0:
logger.info(
"ingestion.connection.bootstrap.progress part=sharepoint processed=%d indexed=%d failed=%d",
processed, result.indexed, result.failed,
)
# Yield so the event loop can interleave other tasks (download/extract are
# CPU-ish and extraction uses sync libs; cooperative scheduling prevents

View file

@ -0,0 +1,78 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Resolve effective policies (neutralize, ragIndexEnabled) for DataSource tree hierarchies.
Tree-inheritance rule: nearest ancestor DataSource with an explicit value wins.
If no ancestor has a value, the default (False) is used.
"""
from __future__ import annotations
import logging
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
def resolveEffectiveNeutralize(
ds: Dict[str, Any],
allDataSources: List[Dict[str, Any]],
) -> bool:
"""Compute effective neutralize by walking up the path tree.
A DataSource at /sites/HR/Documents inherits from /sites/HR if
that ancestor has neutralize=True and the child has no explicit override.
"""
ownValue = ds.get("neutralize")
if ownValue is not None and ownValue is not False:
return True
if ownValue is False:
return False
return _findAncestorPolicy(ds, allDataSources, "neutralize")
def resolveEffectiveRagIndexEnabled(
ds: Dict[str, Any],
allDataSources: List[Dict[str, Any]],
) -> bool:
"""Compute effective ragIndexEnabled by walking up the path tree."""
ownValue = ds.get("ragIndexEnabled")
if ownValue is True:
return True
if ownValue is False:
return False
return _findAncestorPolicy(ds, allDataSources, "ragIndexEnabled")
def _findAncestorPolicy(
ds: Dict[str, Any],
allDataSources: List[Dict[str, Any]],
field: str,
) -> bool:
"""Walk ancestors (longest-prefix match) to find an inherited policy value."""
dsPath = ds.get("path", "")
connectionId = ds.get("connectionId", "")
if not dsPath:
return False
ancestors = []
for candidate in allDataSources:
if candidate.get("id") == ds.get("id"):
continue
if candidate.get("connectionId") != connectionId:
continue
candidatePath = candidate.get("path", "")
if not candidatePath:
continue
if dsPath.startswith(candidatePath) and len(candidatePath) < len(dsPath):
ancestors.append(candidate)
ancestors.sort(key=lambda a: len(a.get("path", "")), reverse=True)
for ancestor in ancestors:
val = ancestor.get(field)
if val is True:
return True
if val is False:
return False
return False

View file

@ -0,0 +1,116 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Shared helpers for ingestion walkers (timeouts, per-item logging).
Walkers (sharepoint, gdrive, gmail, outlook, clickup, kdrive) all face the
same risks:
- A single `adapter.download()` call can hang on the network for hours.
- A single `runExtraction()` call can hang on a corrupt PDF/Office doc inside
a sync extractor library, blocking the asyncio loop.
- A single `requestIngestion()` call can stall on the embedding API.
Without timeouts, one bad item freezes the whole bootstrap job and we end
up with "Job stuck at 10% for 10h" zombies.
These helpers wrap each phase in `asyncio.wait_for`. Sync extraction runs
on a worker thread so the loop stays responsive. Every wrapped call also
emits a short start/done log line, so when something hangs we know the
exact item that caused it (path, size, mime).
"""
from __future__ import annotations
import asyncio
import logging
from typing import Any, Awaitable, Callable, Optional
logger = logging.getLogger(__name__)
DOWNLOAD_TIMEOUT_S = 60
EXTRACTION_TIMEOUT_S = 90
INGEST_TIMEOUT_S = 60
class WalkerTimeout(Exception):
"""Raised when a walker phase exceeds its timeout budget."""
async def downloadWithTimeout(
awaitable: Awaitable[Any],
*,
label: str,
timeoutSeconds: int = DOWNLOAD_TIMEOUT_S,
) -> Any:
"""Run a download awaitable with a hard timeout.
`label` is a short human-readable identifier (typically the external path)
used in log messages so we can pinpoint the offending item in case of a
hang or timeout.
"""
logger.info("walker.download.start %s timeout=%ds", label, timeoutSeconds)
try:
result = await asyncio.wait_for(awaitable, timeout=timeoutSeconds)
logger.debug("walker.download.done %s", label)
return result
except asyncio.TimeoutError as ex:
logger.warning("walker.download.timeout %s after %ds", label, timeoutSeconds)
raise WalkerTimeout(f"download timeout after {timeoutSeconds}s: {label}") from ex
async def extractWithTimeout(
syncFn: Callable[..., Any],
*args: Any,
label: str,
timeoutSeconds: int = EXTRACTION_TIMEOUT_S,
) -> Any:
"""Run a synchronous extraction function on a worker thread with timeout.
Sync extractors (PDF, OCR, MS Office) cannot be cancelled cleanly from
asyncio; `wait_for` only protects the awaiter. The underlying thread may
keep running until the process exits but at least the walker proceeds
to the next item instead of freezing forever.
"""
logger.info("walker.extract.start %s timeout=%ds", label, timeoutSeconds)
try:
result = await asyncio.wait_for(
asyncio.to_thread(syncFn, *args),
timeout=timeoutSeconds,
)
logger.debug("walker.extract.done %s", label)
return result
except asyncio.TimeoutError as ex:
logger.warning("walker.extract.timeout %s after %ds", label, timeoutSeconds)
raise WalkerTimeout(f"extract timeout after {timeoutSeconds}s: {label}") from ex
async def ingestWithTimeout(
awaitable: Awaitable[Any],
*,
label: str,
timeoutSeconds: int = INGEST_TIMEOUT_S,
) -> Any:
"""Run an ingestion request with a hard timeout."""
logger.debug("walker.ingest.start %s timeout=%ds", label, timeoutSeconds)
try:
result = await asyncio.wait_for(awaitable, timeout=timeoutSeconds)
logger.debug("walker.ingest.done %s", label)
return result
except asyncio.TimeoutError as ex:
logger.warning("walker.ingest.timeout %s after %ds", label, timeoutSeconds)
raise WalkerTimeout(f"ingest timeout after {timeoutSeconds}s: {label}") from ex
def logItemStart(service: str, label: str, *, sizeBytes: Optional[int] = None, mime: Optional[str] = None) -> None:
"""Log that processing of one item is about to begin.
When the worker hangs, the LAST `walker.item.start` line in the log
points to the exact item that caused the freeze. This is the single
most valuable diagnostic for stuck-job triage.
"""
parts = [f"walker.item.start service={service} path={label}"]
if sizeBytes is not None:
parts.append(f"size={sizeBytes}")
if mime:
parts.append(f"mime={mime}")
logger.info(" ".join(parts))

View file

@ -98,7 +98,8 @@ class WebService:
searchUrls = []
searchResultsWithContent = []
if needsSearch and (not allUrls or len(allUrls) < maxNumberPages):
self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content")
if operationId:
self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content")
try:
searchUrls, searchResultsWithContent = await self._performWebSearch(
@ -113,16 +114,14 @@ class WebService:
searchUrls = []
searchResultsWithContent = []
# Prioritize Tavily search URLs over AI-extracted URLs (they're more relevant)
if searchUrls:
# Prepend Tavily URLs to the list (they're more relevant)
allUrls = searchUrls + allUrls
logger.info(f"Using {len(searchUrls)} Tavily URLs + {len(allUrls) - len(searchUrls)} other URLs = {len(allUrls)} total")
else:
# If Tavily search failed, use AI-extracted URLs
logger.warning("Tavily search returned no URLs, using AI-extracted URLs only")
self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
if operationId:
self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
# If we have search results (even without content), use them directly instead of crawling
# Tavily search results are more relevant than generic AI-extracted URLs

View file

@ -85,6 +85,11 @@ class AiAuditLogger:
try:
from modules.datamodels.datamodelAiAudit import AiAuditLogEntry
if contentInput:
contentInput = contentInput.replace("\x00", "")
if contentOutput:
contentOutput = contentOutput.replace("\x00", "")
inputPreview = (contentInput or "")[:_PREVIEW_LENGTH] or None
outputPreview = (contentOutput or "")[:_PREVIEW_LENGTH] or None
inputHash = hashlib.sha256(contentInput.encode("utf-8")).hexdigest() if contentInput else None

View file

@ -144,6 +144,14 @@ NAVIGATION_SECTIONS = [
"path": "/automations",
"order": 30,
},
{
"id": "rag-inventory",
"objectKey": "ui.system.ragInventory",
"label": t("RAG-Inventar"),
"icon": "FaDatabase",
"path": "/rag-inventory",
"order": 35,
},
{
"id": "store",
"objectKey": "ui.system.store",
@ -322,6 +330,16 @@ NAVIGATION_SECTIONS = [
"adminOnly": True,
"sysAdminOnly": True,
},
{
"id": "admin-stt-benchmark",
"objectKey": "ui.admin.sttBenchmark",
"label": t("STT Benchmark"),
"icon": "FaMicrophone",
"path": "/admin/stt-benchmark",
"order": 92,
"adminOnly": True,
"sysAdminOnly": True,
},
{
"id": "admin-languages",
"objectKey": "ui.admin.languages",

View file

@ -47,7 +47,9 @@ backports-tarfile==1.2.0
bcrypt==4.0.1
# via -r requirements.txt
beautifulsoup4==4.12.2
# via -r requirements.txt
# via
# -r requirements.txt
# extract-msg
bleach==6.3.0
# via -r requirements.txt
bokeh==3.3.4
@ -81,6 +83,10 @@ click-plugins==1.1.1.2
# via fiona
cligj==0.7.2
# via fiona
colorclass==2.2.2
# via oletools
compressed-rtf==1.0.7
# via extract-msg
contourpy==1.3.3
# via
# bokeh
@ -89,6 +95,7 @@ cryptography==43.0.3
# via
# -r requirements.txt
# msal
# msoffcrypto-tool
# pyjwt
# python-jose
# secretstorage
@ -102,6 +109,10 @@ dnspython==2.8.0
# via email-validator
docutils==0.22.4
# via -r requirements.txt
easygui==0.98.3
# via oletools
ebcdic==1.1.1
# via extract-msg
ecdsa==0.19.1
# via python-jose
email-validator==2.0.0
@ -110,6 +121,8 @@ et-xmlfile==2.0.0
# via openpyxl
executing==2.2.1
# via stack-data
extract-msg==0.55.0
# via -r requirements.txt
fastapi==0.115.0
# via -r requirements.txt
fiona==1.10.1
@ -251,6 +264,8 @@ langgraph-sdk==0.3.3
# via langgraph
langsmith==0.6.8
# via langchain-core
lark==1.3.1
# via rtfde
limits==5.6.0
# via slowapi
linkify-it-py==2.0.3
@ -285,6 +300,8 @@ msal==1.24.1
# via
# -r requirements.txt
# office365-rest-python-client
msoffcrypto-tool==6.0.0
# via oletools
multidict==6.7.1
# via
# aiohttp
@ -310,6 +327,15 @@ oauthlib==3.3.1
# via requests-oauthlib
office365-rest-python-client==2.6.2
# via -r requirements.txt
olefile==0.47
# via
# extract-msg
# msoffcrypto-tool
# oletools
oletools==0.60.2
# via
# pcodedmp
# rtfde
openpyxl==3.1.5
# via -r requirements.txt
orjson==3.11.7
@ -345,6 +371,8 @@ parso==0.8.5
# via jedi
passlib==1.7.4
# via -r requirements.txt
pcodedmp==1.2.6
# via oletools
pillow==12.1.0
# via
# -r requirements.txt
@ -413,6 +441,7 @@ pyparsing==3.3.2
# via
# httplib2
# matplotlib
# oletools
pypdf2==3.0.1
# via -r requirements.txt
pyproj==3.7.2
@ -453,6 +482,8 @@ pyyaml==6.0.3
# via
# bokeh
# langchain-core
red-black-tree-mod==1.22
# via extract-msg
referencing==0.37.0
# via
# jsonschema
@ -489,6 +520,8 @@ rsa==4.9.1
# via
# google-auth
# python-jose
rtfde==0.1.2.2
# via extract-msg
seaborn==0.13.0
# via -r requirements.txt
secretstorage==3.5.0
@ -573,7 +606,9 @@ typing-inspection==0.4.2
tzdata==2025.3
# via pandas
tzlocal==5.3.1
# via apscheduler
# via
# apscheduler
# extract-msg
uc-micro-py==1.0.3
# via linkify-it-py
uritemplate==4.2.0

View file

@ -110,6 +110,9 @@ asyncpg==0.30.0
## Stripe payments
stripe>=11.0.0
## Outlook MSG file extraction
extract-msg>=0.55.0
## Geospatial libraries for STAC connector
pyproj>=3.6.0 # For coordinate transformations (EPSG:2056 <-> EPSG:4326)
shapely>=2.0.0 # For geometric operations (intersections, area calculations)

View file

@ -0,0 +1,88 @@
#!/usr/bin/env python3
"""Migration: Rename DataSource.autoSync -> ragIndexEnabled, lastSynced -> lastIndexed.
This is a one-off migration for the RAG consent & control unification.
Safe to run multiple times (checks column existence before acting).
Usage:
python script_db_migrate_datasource_rag.py [--dry-run]
"""
import os
import sys
import argparse
import logging
from pathlib import Path
scriptPath = Path(__file__).resolve()
gatewayPath = scriptPath.parent.parent
sys.path.insert(0, str(gatewayPath))
os.chdir(str(gatewayPath))
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
logger = logging.getLogger(__name__)
import psycopg2
from modules.shared.configuration import APP_CONFIG
def _getConnection():
return psycopg2.connect(
host=APP_CONFIG.get("DB_HOST", "localhost"),
port=int(APP_CONFIG.get("DB_PORT", "5432")),
database=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
user=APP_CONFIG.get("DB_USER"),
password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
)
def _columnExists(cur, table: str, column: str) -> bool:
cur.execute(
"""SELECT 1 FROM information_schema.columns
WHERE table_schema = 'public' AND table_name = %s AND column_name = %s""",
(table, column),
)
return cur.fetchone() is not None
def migrate(dryRun: bool = False):
conn = _getConnection()
conn.autocommit = False
cur = conn.cursor()
renames = [
("DataSource", "autoSync", "ragIndexEnabled"),
("DataSource", "lastSynced", "lastIndexed"),
]
executed = []
for table, oldCol, newCol in renames:
if _columnExists(cur, table, oldCol) and not _columnExists(cur, table, newCol):
sql = f'ALTER TABLE public."{table}" RENAME COLUMN "{oldCol}" TO "{newCol}";'
logger.info("EXEC: %s", sql)
if not dryRun:
cur.execute(sql)
executed.append(sql)
elif _columnExists(cur, table, newCol):
logger.info("SKIP: %s.%s already exists (migration already applied)", table, newCol)
elif not _columnExists(cur, table, oldCol):
logger.warning("SKIP: %s.%s does not exist (table schema may differ)", table, oldCol)
if not dryRun and executed:
conn.commit()
logger.info("Migration committed (%d statements)", len(executed))
elif dryRun and executed:
conn.rollback()
logger.info("DRY RUN — would execute %d statements", len(executed))
else:
logger.info("Nothing to do — schema already up to date")
cur.close()
conn.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--dry-run", action="store_true", help="Print SQL without executing")
args = parser.parse_args()
migrate(dryRun=args.dry_run)

3
tests/eval/__init__.py Normal file
View file

@ -0,0 +1,3 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Eval harness for the Feature Data Sub-Agent (Phase 1.5)."""

View file

@ -0,0 +1,246 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""In-memory drop-in for FeatureDataProvider used by the eval harness.
Implements the same three public methods (browseTable / queryTable /
aggregateTable) plus the small surface the Sub-Agent reads (getActualColumns),
but runs all filters/aggregations in Python over the BenchmarkFixture rows.
This keeps the eval hermetic: no DB connection, no fixtures to insert/clean,
no flakiness from shared test schemas. Only the LLM call is real.
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional
_ALLOWED_AGGREGATES = {"SUM", "COUNT", "AVG", "MIN", "MAX"}
class FakeFeatureDataProvider:
"""In-memory provider compatible with :class:`FeatureDataProvider`."""
def __init__(
self,
rowsByTable: Dict[str, List[Dict[str, Any]]],
availableTables: Optional[List[Dict[str, Any]]] = None,
) -> None:
self._rowsByTable = {name: list(rows) for name, rows in rowsByTable.items()}
self._availableTables = list(availableTables or [])
self.callLog: List[Dict[str, Any]] = []
def getAvailableTables(self, featureCode: str) -> List[Dict[str, Any]]: # noqa: ARG002
return list(self._availableTables)
def getTableSchema(self, featureCode: str, tableName: str) -> Optional[Dict[str, Any]]: # noqa: ARG002
for obj in self._availableTables:
if obj.get("meta", {}).get("table") == tableName:
return obj
return None
def getActualColumns(self, tableName: str) -> List[str]:
rows = self._rowsByTable.get(tableName, [])
if not rows:
return []
seen: List[str] = []
seenSet: set = set()
for row in rows:
for key in row.keys():
if key not in seenSet:
seen.append(key)
seenSet.add(key)
return seen
def browseTable(
self,
tableName: str,
featureInstanceId: str,
mandateId: str,
fields: List[str] = None,
limit: int = 50,
offset: int = 0,
extraFilters: Optional[List[Dict[str, Any]]] = None,
) -> Dict[str, Any]:
self.callLog.append({"method": "browseTable", "table": tableName, "fields": fields, "limit": limit})
rows = self._scopeRows(tableName, featureInstanceId, mandateId)
rows = _applyFilters(rows, extraFilters)
total = len(rows)
rows = rows[offset : offset + limit]
if fields:
rows = [{k: v for k, v in row.items() if k in fields} for row in rows]
return {"rows": rows, "total": total, "limit": limit, "offset": offset}
def queryTable(
self,
tableName: str,
featureInstanceId: str,
mandateId: str,
filters: List[Dict[str, Any]] = None,
fields: List[str] = None,
orderBy: str = None,
limit: int = 50,
offset: int = 0,
extraFilters: Optional[List[Dict[str, Any]]] = None,
) -> Dict[str, Any]:
self.callLog.append({
"method": "queryTable", "table": tableName, "filters": filters,
"fields": fields, "orderBy": orderBy, "limit": limit,
})
rows = self._scopeRows(tableName, featureInstanceId, mandateId)
combined = list(filters or []) + list(extraFilters or [])
rows = _applyFilters(rows, combined)
if orderBy:
try:
rows = sorted(rows, key=lambda r: (r.get(orderBy) is None, r.get(orderBy)))
except TypeError:
rows = sorted(rows, key=lambda r: str(r.get(orderBy)))
total = len(rows)
rows = rows[offset : offset + limit]
if fields:
rows = [{k: v for k, v in row.items() if k in fields} for row in rows]
return {"rows": rows, "total": total, "limit": limit, "offset": offset}
def aggregateTable(
self,
tableName: str,
featureInstanceId: str,
mandateId: str,
aggregate: str,
field: str,
groupBy: str = None,
extraFilters: Optional[List[Dict[str, Any]]] = None,
) -> Dict[str, Any]:
self.callLog.append({
"method": "aggregateTable", "table": tableName,
"aggregate": aggregate, "field": field, "groupBy": groupBy,
})
aggregate = aggregate.upper()
if aggregate not in _ALLOWED_AGGREGATES:
return {"rows": [], "error": f"Unsupported aggregate: {aggregate}"}
rows = self._scopeRows(tableName, featureInstanceId, mandateId)
rows = _applyFilters(rows, extraFilters)
if groupBy:
groups: Dict[Any, List[Dict[str, Any]]] = {}
for row in rows:
groups.setdefault(row.get(groupBy), []).append(row)
outRows = [
{"groupValue": key, "result": _aggregate(aggregate, [r.get(field) for r in grp])}
for key, grp in groups.items()
]
outRows.sort(key=lambda r: (r["result"] is None, -(r["result"] or 0)))
else:
outRows = [{"result": _aggregate(aggregate, [r.get(field) for r in rows])}]
return {
"rows": outRows,
"aggregate": aggregate,
"field": field,
"groupBy": groupBy,
}
def _scopeRows(self, tableName: str, featureInstanceId: str, mandateId: str) -> List[Dict[str, Any]]:
rows = self._rowsByTable.get(tableName, [])
return [
row for row in rows
if (row.get("featureInstanceId") in (None, featureInstanceId))
and (row.get("mandateId") in (None, mandateId))
]
def _applyFilters(rows: List[Dict[str, Any]], filters: Optional[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
if not filters:
return rows
out = rows
for f in filters:
field = f.get("field")
op = (f.get("op") or "=").upper()
value = f.get("value")
out = [r for r in out if _matchesFilter(r.get(field), op, value)]
return out
def _matchesFilter(rowValue: Any, op: str, filterValue: Any) -> bool:
if op in ("IS NULL",):
return rowValue is None
if op in ("IS NOT NULL",):
return rowValue is not None
if rowValue is None:
return False
if op == "=":
return _coerceEqual(rowValue, filterValue)
if op == "!=":
return not _coerceEqual(rowValue, filterValue)
if op == ">":
return _coerceFloat(rowValue) > _coerceFloat(filterValue)
if op == "<":
return _coerceFloat(rowValue) < _coerceFloat(filterValue)
if op == ">=":
return _coerceFloat(rowValue) >= _coerceFloat(filterValue)
if op == "<=":
return _coerceFloat(rowValue) <= _coerceFloat(filterValue)
if op in ("LIKE", "ILIKE"):
pattern = str(filterValue or "")
target = str(rowValue)
if op == "ILIKE":
pattern = pattern.lower()
target = target.lower()
return _sqlLike(target, pattern)
if op == "IN":
if isinstance(filterValue, (list, tuple, set)):
return any(_coerceEqual(rowValue, v) for v in filterValue)
return _coerceEqual(rowValue, filterValue)
return False
def _coerceEqual(a: Any, b: Any) -> bool:
if a == b:
return True
try:
return str(a) == str(b)
except Exception:
return False
def _coerceFloat(value: Any) -> float:
if value is None:
return 0.0
try:
return float(value)
except (TypeError, ValueError):
return 0.0
def _sqlLike(value: str, pattern: str) -> bool:
"""Approximate SQL LIKE -- only % and _ wildcards."""
import re
regex = ""
i = 0
while i < len(pattern):
ch = pattern[i]
if ch == "%":
regex += ".*"
elif ch == "_":
regex += "."
else:
regex += re.escape(ch)
i += 1
return re.fullmatch(regex, value or "") is not None
def _aggregate(op: str, values: List[Any]) -> Any:
if op == "COUNT":
return sum(1 for v in values if v is not None)
nums = [_coerceFloat(v) for v in values if v is not None]
if not nums:
return 0 if op == "SUM" else None
if op == "SUM":
return round(sum(nums), 4)
if op == "AVG":
return round(sum(nums) / len(nums), 4)
if op == "MIN":
return min(nums)
if op == "MAX":
return max(nums)
return None

View file

@ -0,0 +1,735 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Trustee Sub-Agent Eval Harness (Phase 1.5).
Standalone runner that fires real AI calls against the Feature Data
Sub-Agent in three configurations:
* ``baseline`` -- production code without the pre-execute validator
(Repair-Loop disabled, Trustee domain hints active).
* ``phase1`` -- pre-execute validator on (Repair-Loop active),
domain hints active, no ontology yet.
* ``phase2`` -- validator on, ontology-driven schema context +
constraints (replaces hand-written domain hints).
For each mode we run all 19 gold-standard questions against an
in-memory :class:`FakeFeatureDataProvider`, capture the agent's tool
calls and final answer, score them against the gold standard, and
write a Markdown report to ``local/notes/`` for analysis.
Usage::
cd gateway
python -m tests.eval.runTrusteeBenchmark # all 3 modes
python -m tests.eval.runTrusteeBenchmark phase1 # one mode only
python -m tests.eval.runTrusteeBenchmark baseline phase1
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
import re
import sys
import time
import uuid
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
# ---------------------------------------------------------------------------
# Path setup so `python -m tests.eval.runTrusteeBenchmark` works from gateway/
# ---------------------------------------------------------------------------
_GATEWAY_DIR = Path(__file__).resolve().parents[2]
if str(_GATEWAY_DIR) not in sys.path:
sys.path.insert(0, str(_GATEWAY_DIR))
import yaml # noqa: E402
from modules.serviceCenter.services.serviceAgent.datamodelAgent import ( # noqa: E402
AgentConfig,
AgentEventTypeEnum,
)
from modules.datamodels.datamodelAi import ( # noqa: E402
AiCallRequest,
AiCallResponse,
OperationTypeEnum,
)
from modules.serviceCenter.services.serviceAgent.agentLoop import runAgentLoop # noqa: E402
from modules.serviceCenter.services.serviceAgent.featureDataAgent import ( # noqa: E402
_buildSubAgentTools,
_buildSchemaContext,
)
from modules.serviceCenter.services.serviceAgent.datamodelOntology import ( # noqa: E402
QueryValidationError,
)
from modules.serviceCenter.services.serviceAgent.queryValidator import ( # noqa: E402
QueryValidator,
)
from tests.eval.fakeFeatureDataProvider import ( # noqa: E402
FakeFeatureDataProvider,
)
from tests.fixtures.trusteeBenchmark.loadTrusteeBenchmarkFixture import ( # noqa: E402
buildTrusteeBenchmarkFixture,
BenchmarkFixture,
)
logger = logging.getLogger("trusteeBenchmark")
# ---------------------------------------------------------------------------
# NoOpValidator -- baseline mode (Repair-Loop OFF)
# ---------------------------------------------------------------------------
class _NoOpValidator(QueryValidator):
"""Validator that never rejects anything (used for baseline measurement)."""
def validateBrowseQuery(self, tableName, args): # noqa: ARG002
return None
def validateQueryTable(self, tableName, args): # noqa: ARG002
return None
def validateAggregateQuery(self, tableName, args): # noqa: ARG002
return None
# ---------------------------------------------------------------------------
# Mode-specific tool/prompt building
# ---------------------------------------------------------------------------
@dataclass
class _ModeConfig:
name: str
label: str
useValidator: bool
useOntology: bool
_MODES: Dict[str, _ModeConfig] = {
"baseline": _ModeConfig(name="baseline", label="Baseline (no validator)", useValidator=False, useOntology=False),
"phase1": _ModeConfig(name="phase1", label="Phase 1 (validator on)", useValidator=True, useOntology=False),
"phase2": _ModeConfig(name="phase2", label="Phase 2 (validator + ontology)", useValidator=True, useOntology=True),
}
def _buildValidator(mode: _ModeConfig) -> QueryValidator:
"""Construct the per-mode validator.
* baseline: no-op (Repair-Loop disabled, used to measure raw LLM
accuracy against today's prompt path).
* phase1: convention-based QueryValidator (NEVER_AGGREGATE on
``*Balance``/``*Total`` suffixes; no ontology).
* phase2: ontology-driven QueryValidator (constraints from the
trustee ontology override the convention defaults).
"""
if not mode.useValidator:
return _NoOpValidator()
if mode.useOntology:
try:
from modules.features.trustee.trusteeOntology import getTrusteeOntology
return QueryValidator(ontology=getTrusteeOntology())
except Exception as e:
logger.warning("Could not load trustee ontology, falling back: %s", e)
return QueryValidator()
def _applyEnvForMode(mode: _ModeConfig) -> None:
"""Set the ontology toggle for the production prompt builder.
The Phase 2 path uses ``featureDataAgent._buildSchemaContext`` to pull
the prompt block from ``getAgentOntology()`` automatically. For
baseline/phase1 we set ``POWERON_DISABLE_FEATURE_ONTOLOGY=1`` so the
builder falls back to the legacy ``getAgentDomainHints()`` block --
measuring exactly the production prompt that ships today.
"""
if mode.useOntology:
os.environ.pop("POWERON_DISABLE_FEATURE_ONTOLOGY", None)
else:
os.environ["POWERON_DISABLE_FEATURE_ONTOLOGY"] = "1"
def _buildSystemPrompt(featureCode: str, instanceLabel: str, selectedTables: List[Dict[str, Any]]) -> str:
"""Build the sub-agent system prompt via the production path.
Mode-specific behaviour (legacy hints vs ontology block) is controlled
by the ``POWERON_DISABLE_FEATURE_ONTOLOGY`` env flag set per mode in
:func:`_applyEnvForMode`. Keeping the builder call identical for all
three modes means the benchmark measures the EXACT prompt the agent
would see in production -- no eval-only forks.
"""
return _buildSchemaContext(featureCode, instanceLabel, selectedTables, requestLang="de")
# ---------------------------------------------------------------------------
# Question loading + per-question evaluation
# ---------------------------------------------------------------------------
@dataclass
class _Question:
id: str
question: str
intent: str
expectedTools: List[str]
expectedTable: Optional[str]
expectedAggregate: Optional[str]
expectedAggregateField: Optional[str]
requiredFilters: Dict[str, Any]
forbiddenTools: List[str]
expectedNumbers: List[float]
expectedAnswerContains: List[str]
numericTolerance: float
def _loadQuestions(yamlPath: Path) -> List[_Question]:
with open(yamlPath, "r", encoding="utf-8") as f:
rawList = yaml.safe_load(f)
questions: List[_Question] = []
for raw in rawList:
questions.append(_Question(
id=raw["id"],
question=raw["question"],
intent=raw.get("intent", ""),
expectedTools=list(raw.get("expectedTools") or []),
expectedTable=raw.get("expectedTable"),
expectedAggregate=raw.get("expectedAggregate"),
expectedAggregateField=raw.get("expectedAggregateField"),
requiredFilters=dict(raw.get("requiredFilters") or {}),
forbiddenTools=list(raw.get("forbiddenTools") or []),
expectedNumbers=[float(x) for x in (raw.get("expectedNumbers") or [])],
expectedAnswerContains=[str(x) for x in (raw.get("expectedAnswerContains") or [])],
numericTolerance=float(raw.get("numericTolerance") or 0.005),
))
return questions
@dataclass
class _RunResult:
questionId: str
finalText: str
toolCalls: List[Dict[str, Any]] = field(default_factory=list)
toolResults: List[Dict[str, Any]] = field(default_factory=list)
summary: Dict[str, Any] = field(default_factory=dict)
durationS: float = 0.0
error: Optional[str] = None
@property
def costCHF(self) -> float:
return float(self.summary.get("costCHF") or 0.0)
@property
def rounds(self) -> int:
return int(self.summary.get("rounds") or 0)
@property
def validationFailures(self) -> int:
return int(self.summary.get("validationFailures") or 0)
@property
def repairAttempts(self) -> int:
return int(self.summary.get("repairAttempts") or 0)
@property
def successAfterRepair(self) -> int:
return int(self.summary.get("successAfterRepair") or 0)
@dataclass
class _Score:
patternOk: bool = False
forbidOk: bool = False
numericOk: bool = False
accuracyOk: bool = False
notes: List[str] = field(default_factory=list)
def _scoreRun(question: _Question, run: _RunResult) -> _Score:
score = _Score()
if run.error:
score.notes.append(f"Sub-agent error: {run.error}")
return score
score.patternOk = _checkPattern(question, run)
score.forbidOk = _checkForbid(question, run)
score.numericOk = _checkNumeric(question, run)
score.accuracyOk = score.patternOk and score.forbidOk and score.numericOk
return score
def _checkPattern(question: _Question, run: _RunResult) -> bool:
"""Did the agent call one of the expected tools on the expected table with required filters?"""
if not question.expectedTools:
return True
matchingCalls = [
c for c in run.toolCalls
if c.get("toolName") in question.expectedTools
and (not question.expectedTable or c.get("args", {}).get("tableName") == question.expectedTable)
]
if not matchingCalls:
return False
if question.expectedAggregate:
wantAgg = question.expectedAggregate.upper()
wantField = question.expectedAggregateField
for c in matchingCalls:
args = c.get("args", {})
if c.get("toolName") != "aggregateTable":
continue
if (args.get("aggregate") or "").upper() != wantAgg:
continue
if wantField and args.get("field") != wantField:
continue
if not _filtersSatisfied(question.requiredFilters, args.get("extraFilters") or args.get("filters") or []):
continue
return True
return False
if question.requiredFilters:
for c in matchingCalls:
args = c.get("args", {})
filters = args.get("filters") or args.get("extraFilters") or []
if _filtersSatisfied(question.requiredFilters, filters):
return True
return False
return True
def _filtersSatisfied(required: Dict[str, Any], actualFilters: List[Dict[str, Any]]) -> bool:
if not required:
return True
for reqField, reqValue in required.items():
if reqField.endswith("Like"):
field = reqField[:-4]
wanted = str(reqValue)
ok = any(
(f.get("field") == field) and (f.get("op", "").upper() in ("LIKE", "ILIKE"))
and str(f.get("value")) == wanted
for f in actualFilters
)
if not ok:
return False
else:
ok = any(
f.get("field") == reqField and _filterValueEqual(f.get("value"), reqValue)
for f in actualFilters
)
if not ok:
return False
return True
def _filterValueEqual(a: Any, b: Any) -> bool:
if a == b:
return True
try:
return str(a).strip() == str(b).strip()
except Exception:
return False
def _checkForbid(question: _Question, run: _RunResult) -> bool:
"""Did the agent AVOID forbidden tool/op combinations?
Forbidden hits only count if the call actually went through to the
provider (success=True). Validator-rejected calls don't count -- the
Repair-Loop is doing its job and steering the agent away.
"""
if not question.forbiddenTools:
return True
forbiddenSet = set(question.forbiddenTools)
for r in run.toolResults:
if not r.get("success"):
continue
if r.get("toolName") in forbiddenSet:
return False
return True
def _checkNumeric(question: _Question, run: _RunResult) -> bool:
text = (run.finalText or "")
if question.expectedNumbers:
textNumbers = _extractNumbers(text)
for expected in question.expectedNumbers:
tol = max(abs(expected) * question.numericTolerance, 0.5)
if not any(abs(n - expected) <= tol for n in textNumbers):
return False
if question.expectedAnswerContains:
lowered = text.lower()
for needle in question.expectedAnswerContains:
if needle.lower() not in lowered:
return False
return True
def _extractNumbers(text: str) -> List[float]:
"""Pick out all numbers from a free-text answer.
Handles Swiss thousand separators (apostrophe and U+2019), German
decimals (comma), plain integers/floats, and JSON numbers. Trailing
punctuation (``,``, ``;``, ``.`` from end-of-sentence) is stripped
before parsing so ``"180500.0,"`` parses cleanly to 180500.0.
"""
cleaned = text.replace("\u2019", "'")
tokens = re.findall(r"-?\d[\d'.,]*", cleaned)
out: List[float] = []
for tok in tokens:
tok = tok.rstrip(",;")
if tok.endswith(".") and tok.count(".") == 1:
tok = tok[:-1]
norm = tok.replace("'", "")
if norm.count(",") == 1 and norm.count(".") == 0:
norm = norm.replace(",", ".")
elif norm.count(",") >= 1 and norm.count(".") >= 1:
if norm.rfind(",") > norm.rfind("."):
norm = norm.replace(".", "").replace(",", ".")
else:
norm = norm.replace(",", "")
else:
norm = norm.replace(",", "")
try:
out.append(float(norm))
except ValueError:
continue
return out
# ---------------------------------------------------------------------------
# AI call wiring
# ---------------------------------------------------------------------------
def _bootstrapServices() -> Tuple[Any, str, str]:
"""Spin up a minimal service hub bound to the root user + initial mandate.
Returns the ServiceHub, the user id, and the mandate id used for billing.
"""
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.datamodels.datamodelUam import Mandate
from modules.serviceHub import getInterface as getServices
rootInterface = getRootInterface()
user = rootInterface.currentUser
mandateId = rootInterface.getInitialId(Mandate)
if not mandateId:
raise RuntimeError("No initial mandate available -- run bootstrap loader first.")
services = getServices(user, workflow=None, mandateId=mandateId, featureInstanceId=None)
return services, user.id, mandateId
async def _runOneQuestion(
*,
services: Any,
userId: str,
mandateId: str,
fixture: BenchmarkFixture,
question: _Question,
mode: _ModeConfig,
) -> _RunResult:
"""Execute a single sub-agent run for one question under one mode."""
provider = FakeFeatureDataProvider(
rowsByTable=fixture.rowsByTable,
availableTables=fixture.selectedTables,
)
validator = _buildValidator(mode)
registry = _buildSubAgentTools(
provider=provider,
featureInstanceId=fixture.featureInstanceId,
mandateId=fixture.mandateId,
tableFilters={},
validator=validator,
)
systemPrompt = _buildSystemPrompt(
featureCode="trustee",
instanceLabel="Demo AG",
selectedTables=fixture.selectedTables,
)
cost = 0.0
async def _aiCallFn(req: AiCallRequest) -> AiCallResponse:
nonlocal cost
resp = await services.ai.callAi(req)
cost += float(getattr(resp, "priceCHF", 0.0) or 0.0)
return resp
async def _getCost() -> float:
return cost
config = AgentConfig(
maxRounds=6,
maxCostCHF=0.50,
operationType=OperationTypeEnum.DATA_QUERY,
)
run = _RunResult(questionId=question.id, finalText="")
t0 = time.time()
try:
async for event in runAgentLoop(
prompt=question.question,
toolRegistry=registry,
config=config,
aiCallFn=_aiCallFn,
getWorkflowCostFn=_getCost,
workflowId=f"eval-{mode.name}-{question.id}-{uuid.uuid4().hex[:6]}",
userId=userId,
featureInstanceId=fixture.featureInstanceId,
mandateId=mandateId,
systemPromptOverride=systemPrompt,
):
if event.type == AgentEventTypeEnum.FINAL:
run.finalText = event.content or run.finalText
elif event.type == AgentEventTypeEnum.MESSAGE and event.content:
run.finalText += event.content
elif event.type == AgentEventTypeEnum.TOOL_CALL:
run.toolCalls.append(dict(event.data or {}))
elif event.type == AgentEventTypeEnum.TOOL_RESULT:
run.toolResults.append(dict(event.data or {}))
elif event.type == AgentEventTypeEnum.AGENT_SUMMARY:
run.summary = dict(event.data or {})
elif event.type == AgentEventTypeEnum.ERROR:
run.error = (run.error or "") + (event.content or "")
except Exception as e:
run.error = f"{type(e).__name__}: {e}"
logger.exception("Sub-agent run failed for %s/%s", mode.name, question.id)
run.durationS = time.time() - t0
return run
# ---------------------------------------------------------------------------
# Report
# ---------------------------------------------------------------------------
@dataclass
class _ModeReport:
mode: _ModeConfig
perQuestion: List[Tuple[_Question, _RunResult, _Score]] = field(default_factory=list)
@property
def total(self) -> int:
return len(self.perQuestion)
def _count(self, attr: str) -> int:
return sum(1 for _, _, s in self.perQuestion if getattr(s, attr))
@property
def accuracy(self) -> float:
return self._count("accuracyOk") / max(self.total, 1)
@property
def patternCompliance(self) -> float:
return self._count("patternOk") / max(self.total, 1)
@property
def repairConversionRate(self) -> float:
attempts = sum(r.repairAttempts for _, r, _ in self.perQuestion)
succeeded = sum(r.successAfterRepair for _, r, _ in self.perQuestion)
if attempts == 0:
return 0.0
return succeeded / attempts
@property
def totalCostCHF(self) -> float:
return sum(r.costCHF for _, r, _ in self.perQuestion)
@property
def totalRounds(self) -> int:
return sum(r.rounds for _, r, _ in self.perQuestion)
@property
def totalValidationFailures(self) -> int:
return sum(r.validationFailures for _, r, _ in self.perQuestion)
def _writeReport(reports: List[_ModeReport], outputPath: Path) -> None:
lines: List[str] = []
lines.append("# Trustee Sub-Agent Benchmark Report")
lines.append("")
lines.append(f"Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}")
lines.append("")
lines.append("## Summary")
lines.append("")
lines.append("| Mode | Questions | Accuracy | Pattern compliance | Repair conversion | Validator rejects | Rounds | Cost (CHF) |")
lines.append("|---|---|---|---|---|---|---|---|")
for rep in reports:
lines.append(
f"| {rep.mode.label} | {rep.total} | {rep.accuracy:.1%} | {rep.patternCompliance:.1%} | "
f"{rep.repairConversionRate:.1%} | {rep.totalValidationFailures} | {rep.totalRounds} | "
f"{rep.totalCostCHF:.4f} |"
)
lines.append("")
lines.append("## Per-question detail")
for rep in reports:
lines.append("")
lines.append(f"### {rep.mode.label}")
lines.append("")
lines.append("| id | acc | pattern | forbid | numeric | rounds | val-fail | repairs | cost CHF | duration | tools |")
lines.append("|---|---|---|---|---|---|---|---|---|---|---|")
for q, r, s in rep.perQuestion:
toolList = ",".join(
f"{c.get('toolName')}({c.get('args',{}).get('tableName','?')})"
for c in r.toolCalls
)
lines.append(
f"| {q.id} | {_yn(s.accuracyOk)} | {_yn(s.patternOk)} | {_yn(s.forbidOk)} | {_yn(s.numericOk)} | "
f"{r.rounds} | {r.validationFailures} | {r.repairAttempts}/{r.successAfterRepair} | "
f"{r.costCHF:.4f} | {r.durationS:.1f}s | {toolList} |"
)
lines.append("")
lines.append("#### Notes & failures")
for q, r, s in rep.perQuestion:
if s.accuracyOk:
continue
lines.append(f"- **{q.id}** ({q.intent}): pattern={s.patternOk} forbid={s.forbidOk} numeric={s.numericOk}")
if r.error:
lines.append(f" - error: `{r.error}`")
lines.append(f" - answer: `{(r.finalText or '').strip().replace('|', '/').splitlines()[0][:240]}`")
for note in s.notes:
lines.append(f" - note: {note}")
outputPath.parent.mkdir(parents=True, exist_ok=True)
outputPath.write_text("\n".join(lines), encoding="utf-8")
def _yn(b: bool) -> str:
return "OK" if b else "FAIL"
# ---------------------------------------------------------------------------
# Main entry point
# ---------------------------------------------------------------------------
async def _runMain(modesToRun: List[str], onlyQuestionId: Optional[str] = None) -> None:
logging.basicConfig(
level=logging.WARNING,
format="%(asctime)s %(levelname)s %(name)s -- %(message)s",
)
logger.setLevel(logging.INFO)
fixture = buildTrusteeBenchmarkFixture()
questionsPath = _GATEWAY_DIR / "tests" / "fixtures" / "trusteeBenchmark" / "questions.yaml"
allQuestions = _loadQuestions(questionsPath)
if onlyQuestionId:
allQuestions = [q for q in allQuestions if q.id == onlyQuestionId]
if not allQuestions:
print(f"No question matches id={onlyQuestionId!r}")
return
print(f"Loaded {len(allQuestions)} questions, {len(modesToRun)} modes -> {len(allQuestions) * len(modesToRun)} sub-agent runs.")
services, userId, mandateId = _bootstrapServices()
print(f"Bootstrap OK: user={userId}, mandate={mandateId}")
reports: List[_ModeReport] = []
for modeName in modesToRun:
mode = _MODES[modeName]
_applyEnvForMode(mode)
rep = _ModeReport(mode=mode)
print(f"\n=== Mode: {mode.label} ===")
for idx, question in enumerate(allQuestions, start=1):
print(f" [{idx:>2}/{len(allQuestions)}] {question.id}: {question.question[:80]} ...", flush=True)
run = await _runOneQuestion(
services=services,
userId=userId,
mandateId=mandateId,
fixture=fixture,
question=question,
mode=mode,
)
score = _scoreRun(question, run)
rep.perQuestion.append((question, run, score))
print(
f" -> acc={_yn(score.accuracyOk)} "
f"pattern={_yn(score.patternOk)} forbid={_yn(score.forbidOk)} "
f"numeric={_yn(score.numericOk)} rounds={run.rounds} cost={run.costCHF:.4f} "
f"val-fail={run.validationFailures} repairs={run.repairAttempts}/{run.successAfterRepair}",
flush=True,
)
reports.append(rep)
timestamp = time.strftime("%Y%m%d-%H%M%S")
outDir = _GATEWAY_DIR.parent / "local" / "notes"
reportPath = outDir / f"trustee-benchmark-{timestamp}.md"
_writeReport(reports, reportPath)
rawJsonPath = outDir / f"trustee-benchmark-{timestamp}.json"
rawJsonPath.write_text(
json.dumps(
[
{
"mode": rep.mode.name,
"accuracy": rep.accuracy,
"patternCompliance": rep.patternCompliance,
"repairConversionRate": rep.repairConversionRate,
"totalCostCHF": rep.totalCostCHF,
"totalRounds": rep.totalRounds,
"totalValidationFailures": rep.totalValidationFailures,
"items": [
{
"questionId": q.id,
"intent": q.intent,
"accuracyOk": s.accuracyOk,
"patternOk": s.patternOk,
"forbidOk": s.forbidOk,
"numericOk": s.numericOk,
"rounds": r.rounds,
"validationFailures": r.validationFailures,
"repairAttempts": r.repairAttempts,
"successAfterRepair": r.successAfterRepair,
"costCHF": r.costCHF,
"durationS": r.durationS,
"finalText": (r.finalText or "")[:600],
"toolCalls": r.toolCalls,
"error": r.error,
}
for q, r, s in rep.perQuestion
],
}
for rep in reports
],
indent=2,
ensure_ascii=False,
),
encoding="utf-8",
)
print(f"\nReport written: {reportPath}")
print(f"Raw JSON: {rawJsonPath}")
for rep in reports:
print(f" {rep.mode.label}: acc={rep.accuracy:.1%} pattern={rep.patternCompliance:.1%} cost={rep.totalCostCHF:.4f}")
def _parseArgs(argv: List[str]) -> Tuple[List[str], Optional[str]]:
modes: List[str] = []
only: Optional[str] = None
for arg in argv:
if arg.startswith("--only="):
only = arg.split("=", 1)[1]
elif arg in _MODES:
modes.append(arg)
else:
print(f"Unknown argument: {arg!r}. Allowed modes: {list(_MODES)}")
sys.exit(2)
if not modes:
modes = ["baseline", "phase1", "phase2"]
return modes, only
def main() -> None:
modes, only = _parseArgs(sys.argv[1:])
asyncio.run(_runMain(modes, onlyQuestionId=only))
if __name__ == "__main__":
main()

View file

@ -0,0 +1,16 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Trustee benchmark fixture: synthetic but realistic Swiss KMU accounting data.
Used by the Feature Data Sub-Agent eval harness (Phase 1.5) to measure
hallucination rates against a fixed gold standard. Data is built in-memory
via Pydantic models -- no SQL, no DB connection -- so the harness stays
hermetic and reproducible.
"""
from tests.fixtures.trusteeBenchmark.loadTrusteeBenchmarkFixture import (
buildTrusteeBenchmarkFixture,
BenchmarkFixture,
)
__all__ = ["buildTrusteeBenchmarkFixture", "BenchmarkFixture"]

View file

@ -0,0 +1,275 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Synthetic Trustee benchmark fixture for the Feature Data Sub-Agent eval.
Builds an in-memory snapshot of one fictional Swiss KMU mandate
("Demo AG") with:
* 3 fiscal years (2023, 2024, 2025) of `TrusteeDataAccountBalance` rows
-- both annual totals (periodMonth=0) and monthly snapshots.
* 8 representative accounts spanning all major chart-of-accounts blocks
(cash, banks, receivables, payables, revenue, materials, personnel,
operating expenses).
* Per-month `TrusteeDataJournalEntry` + multiple `TrusteeDataJournalLine`
rows so debit/credit/COUNT aggregations have meaningful answers.
The data is deterministic (no RNG) so a question's gold-standard answer
is stable across runs.
This module deliberately stays decoupled from the production DB pipeline
-- the harness uses :class:`FakeFeatureDataProvider` (see
``gateway/tests/eval/fakeFeatureDataProvider.py``) to serve queries
against this in-memory snapshot, mirroring the public methods of
``FeatureDataProvider`` (browseTable / queryTable / aggregateTable).
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Dict, List
_MANDATE_ID = "m-demo-ag"
_FEATURE_INSTANCE_ID = "fi-demo-ag-trustee"
# ---------------------------------------------------------------------------
# Account master data
# ---------------------------------------------------------------------------
_ACCOUNT_MASTER: List[Dict[str, Any]] = [
{"accountNumber": "1000", "label": "Hauptkasse", "accountType": "asset", "currency": "CHF"},
{"accountNumber": "1020", "label": "ZKB Geschaeftskonto", "accountType": "asset", "currency": "CHF"},
{"accountNumber": "1021", "label": "PostFinance", "accountType": "asset", "currency": "CHF"},
{"accountNumber": "1100", "label": "Forderungen aus Lieferungen und Leistungen", "accountType": "asset", "currency": "CHF"},
{"accountNumber": "2000", "label": "Verbindlichkeiten aus Lieferungen", "accountType": "liability", "currency": "CHF"},
{"accountNumber": "3000", "label": "Ertrag aus Beratung", "accountType": "revenue", "currency": "CHF"},
{"accountNumber": "5400", "label": "Materialaufwand", "accountType": "expense", "currency": "CHF"},
{"accountNumber": "6000", "label": "Mietaufwand", "accountType": "expense", "currency": "CHF"},
]
# Annual closing balances per (year, accountNumber) -- the canonical reference.
# Asset/expense balances are positive, liability/revenue balances are stored
# as positive numbers (sign by accountType, like most accounting systems).
_ANNUAL_CLOSING: Dict[int, Dict[str, float]] = {
2023: {
"1000": 4_800.00,
"1020": 132_500.00,
"1021": 22_400.00,
"1100": 58_200.00,
"2000": 41_300.00,
"3000": 410_000.00,
"5400": 92_000.00,
"6000": 36_000.00,
},
2024: {
"1000": 5_200.00,
"1020": 148_900.00,
"1021": 26_750.00,
"1100": 61_400.00,
"2000": 44_100.00,
"3000": 462_500.00,
"5400": 104_300.00,
"6000": 39_000.00,
},
2025: {
"1000": 5_900.00,
"1020": 152_400.00,
"1021": 28_100.00,
"1100": 66_800.00,
"2000": 47_900.00,
"3000": 488_700.00,
"5400": 112_100.00,
"6000": 42_000.00,
},
}
def _openingFromPriorYear(year: int, accountNumber: str) -> float:
"""Opening balance of year N = closing balance of year N-1 (0 if N-1 is unknown)."""
prior = year - 1
return float(_ANNUAL_CLOSING.get(prior, {}).get(accountNumber, 0.0))
def _monthlyProgression(opening: float, closing: float, month: int) -> float:
"""Linear interpolation between opening and closing for monthly snapshots.
Not realistic in detail but deterministic and monotonic per account, so
questions about "Stand per Ende März" produce stable answers.
"""
if month <= 0:
return float(closing)
frac = month / 12.0
return round(float(opening) + (float(closing) - float(opening)) * frac, 2)
# ---------------------------------------------------------------------------
# Journal entries / lines -- minimal but realistic
# ---------------------------------------------------------------------------
_JOURNAL_ENTRIES_2025: List[Dict[str, Any]] = [
{"month": 3, "day": 15, "reference": "RG-2025-0042", "description": "Beratung Kunde ACME AG", "amount": 18_500.00, "debit": "1100", "credit": "3000"},
{"month": 3, "day": 22, "reference": "EK-2025-0017", "description": "Materialeinkauf Buehler AG", "amount": 9_200.00, "debit": "5400", "credit": "2000"},
{"month": 3, "day": 28, "reference": "MIETE-2025-03", "description": "Mietzins Buero Maerz", "amount": 3_000.00, "debit": "6000", "credit": "1020"},
{"month": 4, "day": 5, "reference": "RG-2025-0051", "description": "Beratung Kunde Bell AG", "amount": 24_300.00, "debit": "1100", "credit": "3000"},
{"month": 4, "day": 18, "reference": "EK-2025-0024", "description": "Materialeinkauf Industriebedarf", "amount": 7_800.00, "debit": "5400", "credit": "2000"},
{"month": 6, "day": 12, "reference": "RG-2025-0079", "description": "Beratung Kunde Bell AG", "amount": 32_100.00, "debit": "1100", "credit": "3000"},
{"month": 6, "day": 30, "reference": "MIETE-2025-Q2", "description": "Mietzins Buero Q2-Abrechnung", "amount": 3_500.00, "debit": "6000", "credit": "1020"},
{"month": 9, "day": 4, "reference": "RG-2025-0114", "description": "Beratung Kunde Migros", "amount": 41_500.00, "debit": "1100", "credit": "3000"},
{"month": 9, "day": 25, "reference": "EK-2025-0061", "description": "Materialeinkauf Buehler AG", "amount": 12_400.00, "debit": "5400", "credit": "2000"},
{"month": 11, "day": 14, "reference": "RG-2025-0188", "description": "Beratung Kunde ACME AG", "amount": 28_700.00, "debit": "1100", "credit": "3000"},
]
# ---------------------------------------------------------------------------
# Snapshot containers
# ---------------------------------------------------------------------------
@dataclass
class BenchmarkFixture:
"""In-memory rows that mimic feature DB tables.
Each ``rowsByTable[tableName]`` is a list of column dicts compatible
with the Pydantic feature data models (TrusteeDataAccountBalance, etc.).
"""
mandateId: str
featureInstanceId: str
rowsByTable: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict)
selectedTables: List[Dict[str, Any]] = field(default_factory=list)
def _buildSelectedTables() -> List[Dict[str, Any]]:
"""Return the DATA_OBJECT-shaped descriptors the sub-agent expects.
Mirrors what the catalog would return for the trustee feature; the
real `getDataObjects("trustee")` call would yield the same shape but
we hard-code the three tables we actually populate.
"""
return [
{
"objectKey": "data.feature.trustee.TrusteeDataAccount",
"label": {"de": "Kontenplan", "en": "Chart of accounts"},
"meta": {
"table": "TrusteeDataAccount",
"fields": ["id", "accountNumber", "label", "accountType", "currency", "isActive"],
},
},
{
"objectKey": "data.feature.trustee.TrusteeDataAccountBalance",
"label": {"de": "Kontosalden", "en": "Account balances"},
"meta": {
"table": "TrusteeDataAccountBalance",
"fields": [
"id", "accountNumber", "periodYear", "periodMonth",
"openingBalance", "debitTotal", "creditTotal",
"closingBalance", "currency",
],
},
},
{
"objectKey": "data.feature.trustee.TrusteeDataJournalLine",
"label": {"de": "Buchungszeilen", "en": "Journal lines"},
"meta": {
"table": "TrusteeDataJournalLine",
"fields": [
"id", "journalEntryId", "accountNumber",
"debitAmount", "creditAmount", "currency", "description",
],
},
},
]
def buildTrusteeBenchmarkFixture() -> BenchmarkFixture:
"""Materialize the full in-memory benchmark snapshot.
All rows include ``mandateId`` and ``featureInstanceId`` columns so the
fake provider can scope them the same way the real one does.
"""
accountRows: List[Dict[str, Any]] = []
for i, acc in enumerate(_ACCOUNT_MASTER):
accountRows.append({
"id": f"acc-{i:03d}",
"accountNumber": acc["accountNumber"],
"label": acc["label"],
"accountType": acc["accountType"],
"currency": acc["currency"],
"isActive": True,
"mandateId": _MANDATE_ID,
"featureInstanceId": _FEATURE_INSTANCE_ID,
})
balanceRows: List[Dict[str, Any]] = []
rowIdx = 0
for year, closings in _ANNUAL_CLOSING.items():
for accountNumber, closing in closings.items():
opening = _openingFromPriorYear(year, accountNumber)
balanceRows.append({
"id": f"bal-{rowIdx:04d}",
"accountNumber": accountNumber,
"periodYear": year,
"periodMonth": 0,
"openingBalance": opening,
"debitTotal": round(max(closing - opening, 0.0) * 1.2, 2),
"creditTotal": round(max(closing - opening, 0.0) * 0.2, 2),
"closingBalance": float(closing),
"currency": "CHF",
"mandateId": _MANDATE_ID,
"featureInstanceId": _FEATURE_INSTANCE_ID,
})
rowIdx += 1
for month in range(1, 13):
monthly = _monthlyProgression(opening, closing, month)
balanceRows.append({
"id": f"bal-{rowIdx:04d}",
"accountNumber": accountNumber,
"periodYear": year,
"periodMonth": month,
"openingBalance": opening,
"debitTotal": round((monthly - opening) * 1.2, 2) if monthly > opening else 0.0,
"creditTotal": round((monthly - opening) * 0.2, 2) if monthly > opening else 0.0,
"closingBalance": monthly,
"currency": "CHF",
"mandateId": _MANDATE_ID,
"featureInstanceId": _FEATURE_INSTANCE_ID,
})
rowIdx += 1
lineRows: List[Dict[str, Any]] = []
for j, entry in enumerate(_JOURNAL_ENTRIES_2025):
entryId = f"je-2025-{j:03d}"
lineRows.append({
"id": f"jl-{j*2:04d}",
"journalEntryId": entryId,
"accountNumber": entry["debit"],
"debitAmount": float(entry["amount"]),
"creditAmount": 0.0,
"currency": "CHF",
"description": entry["description"],
"mandateId": _MANDATE_ID,
"featureInstanceId": _FEATURE_INSTANCE_ID,
})
lineRows.append({
"id": f"jl-{j*2+1:04d}",
"journalEntryId": entryId,
"accountNumber": entry["credit"],
"debitAmount": 0.0,
"creditAmount": float(entry["amount"]),
"currency": "CHF",
"description": entry["description"],
"mandateId": _MANDATE_ID,
"featureInstanceId": _FEATURE_INSTANCE_ID,
})
fixture = BenchmarkFixture(
mandateId=_MANDATE_ID,
featureInstanceId=_FEATURE_INSTANCE_ID,
rowsByTable={
"TrusteeDataAccount": accountRows,
"TrusteeDataAccountBalance": balanceRows,
"TrusteeDataJournalLine": lineRows,
},
selectedTables=_buildSelectedTables(),
)
return fixture

View file

@ -0,0 +1,226 @@
# Trustee Sub-Agent Benchmark -- 19 questions analog Hein 2025
#
# Each question covers ONE expected hallucination class so we can attribute
# accuracy gains to specific phases (validator / ontology).
#
# Scoring per question (all binary unless noted):
# patternOk -- did the agent call the right tool(s) with the right filters?
# forbidOk -- did it AVOID the forbidden tool/op (e.g. SUM closingBalance)?
# numericOk -- does the final answer contain the expected number(s)?
# accuracyOk -- patternOk AND forbidOk AND numericOk
#
# tolerance: relative tolerance for numeric comparison (default 0.005 = 0.5 %).
- id: q01
question: "Was ist der Banksaldo per 31.12.2025 fuer das ZKB-Konto 1020?"
intent: BANK_BALANCE_AT_DATE
expectedTools: [queryTable]
expectedTable: TrusteeDataAccountBalance
requiredFilters:
accountNumber: "1020"
periodYear: 2025
periodMonth: 0
forbiddenTools: [aggregateTable]
expectedNumbers: [152400.0]
- id: q02
question: "Wie hoch ist die Hauptkasse (Konto 1000) per Ende 2024?"
intent: CASH_BALANCE_AT_DATE
expectedTools: [queryTable]
expectedTable: TrusteeDataAccountBalance
requiredFilters:
accountNumber: "1000"
periodYear: 2024
periodMonth: 0
forbiddenTools: [aggregateTable]
expectedNumbers: [5200.0]
- id: q03
question: "Summiere alle Bankkonten (102x) per 31.12.2025."
intent: BANK_GROUP_TOTAL_AT_DATE
expectedTools: [queryTable]
expectedTable: TrusteeDataAccountBalance
requiredFilters:
periodYear: 2025
periodMonth: 0
accountNumberLike: "102%"
forbiddenTools: [aggregateTable]
expectedNumbers: [180500.0]
numericTolerance: 0.01
- id: q04
question: "Wie hat sich der Schlusssaldo des ZKB-Kontos 1020 ueber die Jahre 2023 bis 2025 entwickelt?"
intent: BALANCE_HISTORY_PER_YEAR
expectedTools: [queryTable]
expectedTable: TrusteeDataAccountBalance
requiredFilters:
accountNumber: "1020"
periodMonth: 0
forbiddenTools: [aggregateTable]
expectedNumbers: [132500.0, 148900.0, 152400.0]
- id: q05
question: "Welches Konto hatte 2025 den hoechsten Schlusssaldo bei den Aktiven (1xxx)?"
intent: TOP_ASSET_AT_DATE
expectedTools: [queryTable]
expectedTable: TrusteeDataAccountBalance
requiredFilters:
periodYear: 2025
periodMonth: 0
accountNumberLike: "1%"
forbiddenTools: [aggregateTable]
expectedAnswerContains: ["1020"]
expectedNumbers: [152400.0]
- id: q06
question: "Welche Konten gehoeren zu den Bankkonten (102x)?"
intent: ACCOUNT_LIST_FILTER
expectedTools: [queryTable]
expectedTable: TrusteeDataAccount
requiredFilters:
accountNumberLike: "102%"
forbiddenTools: [aggregateTable]
expectedAnswerContains: ["1020", "1021"]
- id: q07
question: "Wie hoch war der Materialaufwand (Konto 5400) im Jahr 2025?"
intent: EXPENSE_AT_YEAR
expectedTools: [queryTable]
expectedTable: TrusteeDataAccountBalance
requiredFilters:
accountNumber: "5400"
periodYear: 2025
periodMonth: 0
forbiddenTools: [aggregateTable]
expectedNumbers: [112100.0]
- id: q08
question: "Wie viele Buchungszeilen gibt es insgesamt im System?"
intent: COUNT_ROWS
expectedTools: [aggregateTable]
expectedTable: TrusteeDataJournalLine
expectedAggregate: COUNT
forbiddenTools: []
expectedNumbers: [20]
- id: q09
question: "Wie hoch ist der gesamte Beratungsertrag (Konto 3000) im Jahr 2025?"
intent: REVENUE_AT_YEAR
expectedTools: [queryTable]
expectedTable: TrusteeDataAccountBalance
requiredFilters:
accountNumber: "3000"
periodYear: 2025
periodMonth: 0
forbiddenTools: [aggregateTable]
expectedNumbers: [488700.0]
- id: q10
question: "Wie viel wurde 2025 auf das Materialaufwand-Konto 5400 gebucht (Soll-Summe ueber Buchungszeilen)?"
intent: JOURNAL_SUM_AT_ACCOUNT
expectedTools: [aggregateTable]
expectedTable: TrusteeDataJournalLine
expectedAggregate: SUM
expectedAggregateField: debitAmount
requiredFilters:
accountNumber: "5400"
forbiddenTools: []
expectedNumbers: [29400.0]
numericTolerance: 0.01
- id: q11
question: "Welche Buchungen im 1. Quartal 2025 (Januar bis Maerz) wurden auf Konto 3000 gebucht?"
intent: JOURNAL_LINES_BY_ACCOUNT
expectedTools: [queryTable]
expectedTable: TrusteeDataJournalLine
requiredFilters:
accountNumber: "3000"
forbiddenTools: [aggregateTable]
expectedAnswerContains: ["18500", "ACME"]
- id: q12
question: "Wie hoch war die Hauptkasse (Konto 1000) jeweils per Ende Maerz 2025 und per Ende Juni 2025?"
intent: MULTI_MONTH_SNAPSHOT
expectedTools: [queryTable]
expectedTable: TrusteeDataAccountBalance
requiredFilters:
accountNumber: "1000"
periodYear: 2025
forbiddenTools: [aggregateTable]
expectedNumbers: [5375.0, 5550.0]
numericTolerance: 0.01
- id: q13
question: "Wie hoch ist die Summe aller Aufwandskonten (5xxx und 6xxx) per Ende 2025?"
intent: EXPENSE_GROUP_TOTAL
expectedTools: [queryTable]
expectedTable: TrusteeDataAccountBalance
requiredFilters:
periodYear: 2025
periodMonth: 0
forbiddenTools: [aggregateTable]
expectedNumbers: [154100.0]
numericTolerance: 0.01
- id: q14
question: "Welches Konto hat den hoechsten openingBalance fuer 2025?"
intent: TOP_OPENING_BALANCE
# Both routes are legitimate: queryTable+orderBy+limit=1, or
# aggregateTable(MAX) followed by queryTable lookup. We only insist that
# the final answer names the right account and (optionally) the value.
expectedTools: [queryTable, aggregateTable]
expectedTable: TrusteeDataAccountBalance
forbiddenTools: []
expectedAnswerContains: ["3000"]
expectedNumbers: [462500.0]
- id: q15
question: "Liste alle Konten vom Typ asset auf."
intent: ACCOUNTS_BY_TYPE
expectedTools: [queryTable]
expectedTable: TrusteeDataAccount
requiredFilters:
accountType: "asset"
forbiddenTools: [aggregateTable]
expectedAnswerContains: ["1000", "1020", "1021", "1100"]
- id: q16
question: "Wie hoch ist der Schlusssaldo der Forderungen aus Lieferungen und Leistungen (Konto 1100) per Ende 2025?"
intent: BALANCE_BY_NAME_LOOKUP
expectedTools: [queryTable]
expectedTable: TrusteeDataAccountBalance
requiredFilters:
accountNumber: "1100"
periodYear: 2025
periodMonth: 0
forbiddenTools: [aggregateTable]
expectedNumbers: [66800.0]
- id: q17
question: "Wie hoch waren die Verbindlichkeiten (Konto 2000) jeweils per Ende 2023, 2024 und 2025?"
intent: LIABILITY_HISTORY
expectedTools: [queryTable]
expectedTable: TrusteeDataAccountBalance
requiredFilters:
accountNumber: "2000"
periodMonth: 0
forbiddenTools: [aggregateTable]
expectedNumbers: [41300.0, 44100.0, 47900.0]
- id: q18
question: "Wie viele Bankkonten gibt es im Kontenplan (102x)?"
intent: ACCOUNT_COUNT_BY_PREFIX
expectedTools: [queryTable, aggregateTable]
expectedTable: TrusteeDataAccount
requiredFilters:
accountNumberLike: "102%"
forbiddenTools: []
expectedNumbers: [2]
- id: q19
question: "Gib mir alle Buchungszeilen mit einem Sollbetrag groesser als 20'000 CHF."
intent: JOURNAL_LINES_BY_AMOUNT
expectedTools: [queryTable]
expectedTable: TrusteeDataJournalLine
forbiddenTools: [aggregateTable]
expectedAnswerContains: ["24300", "32100", "41500", "28700"]

View file

@ -0,0 +1,23 @@
# Copyright (c) 2025 Patrick Motsch
"""Unit tests for Google STT helper config (no API calls)."""
from modules.connectors.connectorVoiceGoogle import _buildPrimarySttRecognitionFields
def test_buildPrimaryStt_lightweight_stripsHeavyFeatures():
d = _buildPrimarySttRecognitionFields(model="latest_short", lightweight=True)
assert d["model"] == "latest_short"
assert d["enable_word_time_offsets"] is False
assert d["enable_word_confidence"] is False
assert d["max_alternatives"] == 1
assert d["use_enhanced"] is False
assert d["enable_automatic_punctuation"] is True
def test_buildPrimaryStt_full_matchesLegacyDefaults():
d = _buildPrimarySttRecognitionFields(model="latest_long", lightweight=False)
assert d["model"] == "latest_long"
assert d["enable_word_time_offsets"] is True
assert d["enable_word_confidence"] is True
assert d["max_alternatives"] == 3
assert d["use_enhanced"] is True

View file

@ -0,0 +1,112 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Unit tests for the repair-loop telemetry aggregation in agentLoop.
These counters (`validationFailures`, `repairAttempts`, `successAfterRepair`)
land on `AgentTrace` and are surfaced via the `AGENT_SUMMARY` event. The
Eval-Harness (Phase 1.5) reads them to compute the repair conversion rate.
"""
from __future__ import annotations
from modules.serviceCenter.services.serviceAgent.agentLoop import _computeRepairCounters
from modules.serviceCenter.services.serviceAgent.datamodelAgent import (
AgentRoundLog, ToolCallLog,
)
def _round(*toolCalls: ToolCallLog) -> AgentRoundLog:
return AgentRoundLog(roundNumber=1, toolCalls=list(toolCalls))
def _failed(toolName: str, code: str) -> ToolCallLog:
return ToolCallLog(
toolName=toolName,
success=False,
validationFailureCode=code,
error=f"{code}: ...",
)
def _ok(toolName: str) -> ToolCallLog:
return ToolCallLog(toolName=toolName, success=True)
def test_computeRepairCounters_emptyTrace():
fails, attempts, succeeded = _computeRepairCounters([])
assert (fails, attempts, succeeded) == (0, 0, 0)
def test_computeRepairCounters_allCleanRunsHaveZeroCounters():
rounds = [
_round(_ok("queryTable"), _ok("browseTable")),
_round(_ok("aggregateTable")),
]
fails, attempts, succeeded = _computeRepairCounters(rounds)
assert (fails, attempts, succeeded) == (0, 0, 0)
def test_computeRepairCounters_singleFailureCountsButNoRepairYet():
"""One failure in round 1, no follow-up call -- counts the failure but
nothing else. Repair only counts when the LLM tries again."""
rounds = [_round(_failed("queryTable", "FIELD_NOT_FOUND"))]
fails, attempts, succeeded = _computeRepairCounters(rounds)
assert (fails, attempts, succeeded) == (1, 0, 0)
def test_computeRepairCounters_repairThatSucceeds():
"""Round 1 fails, round 2 retries same tool successfully."""
rounds = [
_round(_failed("queryTable", "FIELD_NOT_FOUND")),
_round(_ok("queryTable")),
]
fails, attempts, succeeded = _computeRepairCounters(rounds)
assert (fails, attempts, succeeded) == (1, 1, 1)
def test_computeRepairCounters_repairThatFailsAgain():
"""Round 1 fails, round 2 retries same tool but fails validation again."""
rounds = [
_round(_failed("queryTable", "FIELD_NOT_FOUND")),
_round(_failed("queryTable", "FIELD_NOT_FOUND")),
]
fails, attempts, succeeded = _computeRepairCounters(rounds)
assert (fails, attempts, succeeded) == (2, 1, 0)
def test_computeRepairCounters_siblingCallsInSameRoundAreNotRepairs():
"""When the LLM emits two queryTable calls in the same round, the
second is NOT a repair attempt -- it had no way to see the first
one's rejection yet (parallel dispatch within a round)."""
rounds = [
_round(
_failed("queryTable", "FIELD_NOT_FOUND"),
_failed("queryTable", "FIELD_NOT_FOUND"),
),
]
fails, attempts, succeeded = _computeRepairCounters(rounds)
assert (fails, attempts, succeeded) == (2, 0, 0)
def test_computeRepairCounters_differentToolNamesAreIndependent():
"""A queryTable failure does not flag a later browseTable as a repair."""
rounds = [
_round(_failed("queryTable", "FIELD_NOT_FOUND")),
_round(_ok("browseTable")),
]
fails, attempts, succeeded = _computeRepairCounters(rounds)
assert (fails, attempts, succeeded) == (1, 0, 0)
def test_computeRepairCounters_multiToolMix():
"""Trustee-like sequence: SUM(closingBalance) rejected, LLM switches to
queryTable with a typo (rejected), then fixes the typo (success)."""
rounds = [
_round(_failed("aggregateTable", "INVALID_AGGREGATE_TARGET")),
_round(_failed("queryTable", "FIELD_NOT_FOUND")),
_round(_ok("queryTable")),
]
fails, attempts, succeeded = _computeRepairCounters(rounds)
# 2 validation failures total, 1 prior-rejected queryTable retry that
# succeeded; aggregateTable was never retried so no attempt counted for it.
assert (fails, attempts, succeeded) == (2, 1, 1)

View file

@ -100,6 +100,9 @@ def _adapter(svc):
return SimpleNamespace(_svc=svc)
_DEFAULT_DS = [{"id": "ds-1", "neutralize": False}]
def test_bootstrap_walks_team_space_lists_and_tasks():
svc = _FakeClickupService(taskCount=2)
knowledge = _FakeKnowledgeService()
@ -108,6 +111,7 @@ def test_bootstrap_walks_team_space_lists_and_tasks():
async def _run():
return await bootstrapClickup(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
@ -126,10 +130,10 @@ def test_bootstrap_walks_team_space_lists_and_tasks():
assert job.mimeType == "application/vnd.clickup.task+json"
assert job.mandateId == "m1"
assert job.provenance["connectionId"] == "c1"
assert job.provenance["dataSourceId"] == "ds-1"
assert job.provenance["authority"] == "clickup"
assert job.provenance["teamId"] == "team-1"
assert job.contentVersion # numeric millisecond string
# At least the header content-object is present.
ids = [co["contentObjectId"] for co in job.contentObjects]
assert "header" in ids
@ -146,6 +150,7 @@ def test_bootstrap_reports_duplicates_on_second_run():
async def _run():
return await bootstrapClickup(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
@ -165,6 +170,7 @@ def test_bootstrap_skips_tasks_older_than_maxAgeDays():
async def _run():
return await bootstrapClickup(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
@ -185,6 +191,7 @@ def test_bootstrap_maxTasks_caps_ingestion():
async def _run():
return await bootstrapClickup(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
@ -195,9 +202,41 @@ def test_bootstrap_maxTasks_caps_ingestion():
assert result["indexed"] == 3
def test_bootstrap_skips_when_no_datasources():
async def _run():
return await bootstrapClickup(connectionId="c1")
result = asyncio.run(_run())
assert result["skipped"] is True
assert result["reason"] == "no_datasources"
def test_bootstrap_honours_datasource_neutralize():
svc = _FakeClickupService(taskCount=1)
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapClickup(
connectionId="c1",
dataSources=[{"id": "ds-n", "neutralize": True}],
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
limits=ClickupBootstrapLimits(maxAgeDays=None),
)
asyncio.run(_run())
for job in knowledge.calls:
assert job.neutralize is True
assert job.provenance["dataSourceId"] == "ds-n"
if __name__ == "__main__":
test_bootstrap_walks_team_space_lists_and_tasks()
test_bootstrap_reports_duplicates_on_second_run()
test_bootstrap_skips_tasks_older_than_maxAgeDays()
test_bootstrap_maxTasks_caps_ingestion()
test_bootstrap_skips_when_no_datasources()
test_bootstrap_honours_datasource_neutralize()
print("OK — bootstrapClickup tests passed")

View file

@ -119,6 +119,9 @@ def _fakeRunExtraction(data, name, mime, options):
)
_DEFAULT_DS = [{"id": "ds1", "path": "/", "neutralize": False}]
def test_bootstrap_walks_drive_and_subfolders():
adapter = _FakeDriveAdapter()
knowledge = _FakeKnowledgeService()
@ -127,6 +130,7 @@ def test_bootstrap_walks_drive_and_subfolders():
async def _run():
return await bootstrapGdrive(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
@ -160,6 +164,7 @@ def test_bootstrap_reports_duplicates_on_second_run():
async def _run():
return await bootstrapGdrive(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
@ -180,11 +185,11 @@ def test_bootstrap_skips_files_older_than_maxAgeDays():
async def _run():
return await bootstrapGdrive(
connectionId="c1",
dataSources=[{"id": "ds1", "path": "/", "neutralize": False, "maxAgeDays": 180}],
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
runExtractionFn=_fakeRunExtraction,
limits=GdriveBootstrapLimits(maxAgeDays=180),
)
result = asyncio.run(_run())
@ -200,6 +205,7 @@ def test_bootstrap_passes_connection_provenance():
async def _run():
return await bootstrapGdrive(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
@ -212,14 +218,25 @@ def test_bootstrap_passes_connection_provenance():
assert job.sourceKind == "gdrive_item"
assert job.mandateId == "m1"
assert job.provenance["connectionId"] == "c1"
assert job.provenance["dataSourceId"] == "ds1"
assert job.provenance["authority"] == "google"
assert job.provenance["service"] == "drive"
assert job.contentVersion # modifiedTime ISO string
def test_bootstrap_skips_when_no_datasources():
async def _run():
return await bootstrapGdrive(connectionId="c1")
result = asyncio.run(_run())
assert result["skipped"] is True
assert result["reason"] == "no_datasources"
if __name__ == "__main__":
test_bootstrap_walks_drive_and_subfolders()
test_bootstrap_reports_duplicates_on_second_run()
test_bootstrap_skips_files_older_than_maxAgeDays()
test_bootstrap_passes_connection_provenance()
test_bootstrap_skips_when_no_datasources()
print("OK — bootstrapGdrive tests passed")

View file

@ -111,6 +111,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
async def _run():
return await bootstrapOutlook(
connectionId="c1",
dataSources=[{"id": "ds1", "neutralize": False}],
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
@ -129,6 +130,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
assert job.sourceKind == "outlook_message"
assert job.mimeType == "message/rfc822"
assert job.provenance["connectionId"] == "c1"
assert job.provenance["dataSourceId"] == "ds1"
assert job.provenance["service"] == "outlook"
assert job.contentVersion == "ck1"
assert any(co["contentObjectId"] == "header" for co in job.contentObjects)
@ -146,6 +148,7 @@ def test_bootstrap_outlook_follows_pagination():
async def _run():
return await bootstrapOutlook(
connectionId="c1",
dataSources=[{"id": "ds1", "neutralize": False}],
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
@ -171,6 +174,7 @@ def test_bootstrap_outlook_reports_duplicates():
async def _run():
return await bootstrapOutlook(
connectionId="c1",
dataSources=[{"id": "ds1", "neutralize": False}],
adapter=adapter,
connection=connection,
knowledgeService=knowledge,

View file

@ -19,11 +19,18 @@ asked for the closing balance per period).
from __future__ import annotations
import asyncio
from unittest.mock import MagicMock
import pytest
from modules.shared import fkRegistry
from modules.serviceCenter.services.serviceAgent.datamodelAgent import (
ToolCallRequest, ToolResult,
)
from modules.serviceCenter.services.serviceAgent.featureDataAgent import (
_buildSchemaContext,
_buildSubAgentTools,
_buildTableSchemaBlock,
_formatFieldLine,
_summarizePythonType,
@ -152,10 +159,29 @@ def test_buildSchemaContext_forbidsSummingAggregateFields():
assert "closingBalance" in prompt
def test_buildSchemaContext_appendsTrusteeDomainHints():
"""When the feature module exposes getAgentDomainHints(), the schema prompt
must include those hints so the sub-agent knows e.g. that 102x are bank
accounts and periodMonth=0 is the annual total."""
def test_buildSchemaContext_appendsTrusteeOntologyBlock(monkeypatch):
"""When the feature exposes getAgentOntology(), the schema prompt must
include the compiled ontology block (Phase 2 path)."""
monkeypatch.delenv("POWERON_DISABLE_FEATURE_ONTOLOGY", raising=False)
selected = [_trusteeAccountBalanceObj()]
prompt = _buildSchemaContext(
featureCode="trustee",
instanceLabel="Demo AG",
selectedTables=selected,
requestLang="de",
)
assert "DOMAIN ONTOLOGY (trustee):" in prompt
assert "BankAccount" in prompt
assert "NEVER_AGGREGATE on TrusteeDataAccountBalance.closingBalance" in prompt.replace("never aggregate", "NEVER_AGGREGATE")
assert "BANK_BALANCE_AT_DATE" in prompt
def test_buildSchemaContext_fallsBackToLegacyHints_whenOntologyDisabled(monkeypatch):
"""With POWERON_DISABLE_FEATURE_ONTOLOGY=1 the builder must fall back to
the legacy `getAgentDomainHints()` block. This is the path used by the
eval harness to measure `baseline` and `phase1` accuracy without the
ontology-driven prompt."""
monkeypatch.setenv("POWERON_DISABLE_FEATURE_ONTOLOGY", "1")
selected = [_trusteeAccountBalanceObj()]
prompt = _buildSchemaContext(
featureCode="trustee",
@ -164,16 +190,14 @@ def test_buildSchemaContext_appendsTrusteeDomainHints():
requestLang="de",
)
assert "TRUSTEE DOMAIN HINTS" in prompt
assert "DOMAIN ONTOLOGY" not in prompt
assert "102x Bank / Post" in prompt
assert "periodMonth = 0" in prompt
assert "ANTI-PATTERNS" in prompt
assert 'LIKE \'102%\'' in prompt or "LIKE '102%'" in prompt
def test_buildSchemaContext_skipsHintsForFeaturesWithoutHook():
"""Features that don't export getAgentDomainHints() should produce a prompt
without the trailing hints block. Verified by using a feature code that
cannot resolve to a main module (registry returns None)."""
def test_buildSchemaContext_skipsHintsForFeaturesWithoutHook(monkeypatch):
"""Features that don't export getAgentDomainHints()/getAgentOntology()
should produce a prompt without any trailing hints block."""
monkeypatch.delenv("POWERON_DISABLE_FEATURE_ONTOLOGY", raising=False)
selected = [_trusteeAccountBalanceObj()]
prompt = _buildSchemaContext(
featureCode="nosuchfeature",
@ -182,4 +206,90 @@ def test_buildSchemaContext_skipsHintsForFeaturesWithoutHook():
requestLang="de",
)
assert "TRUSTEE DOMAIN HINTS" not in prompt
assert "DOMAIN ONTOLOGY" not in prompt
assert "Keep your answer SHORT" in prompt
# ------------------------------------------------------------------
# Validator integration (Phase 1: Repair-Loop)
#
# These tests guard that pre-execute validation fires BEFORE the provider
# is touched, and that the structured error payload reaches the LLM via
# `ToolResult.errorDetails` -- the contract the LLM relies on for repair.
# ------------------------------------------------------------------
def _buildRegistryWithMockProvider():
"""Build a sub-agent ToolRegistry where the provider is a MagicMock.
The mock records calls so we can assert the validator short-circuits
before the DB layer is reached."""
provider = MagicMock()
provider.browseTable.return_value = {"rows": [], "total": 0, "limit": 50, "offset": 0}
provider.queryTable.return_value = {"rows": [], "total": 0, "limit": 50, "offset": 0}
provider.aggregateTable.return_value = {"rows": [], "aggregate": "SUM", "field": "x"}
registry = _buildSubAgentTools(
provider=provider,
featureInstanceId="fi-test",
mandateId="m-test",
tableFilters=None,
validator=None,
)
return registry, provider
def _dispatchSync(registry, toolName, args):
"""Synchronously dispatch a tool call through the registry."""
call = ToolCallRequest(name=toolName, args=args)
loop = asyncio.new_event_loop()
try:
return loop.run_until_complete(registry.dispatch(call, context={}))
finally:
loop.close()
def test_subAgentTools_invalidFieldShortCircuitsBeforeProvider():
"""A queryTable call with an unknown field must NOT reach the provider."""
registry, provider = _buildRegistryWithMockProvider()
result = _dispatchSync(registry, "queryTable", {
"tableName": "TrusteeDataAccountBalance",
"filters": [{"field": "klosingBalance", "op": "=", "value": 1}],
})
assert isinstance(result, ToolResult)
assert result.success is False
assert result.errorDetails is not None
assert result.errorDetails["code"] == "FIELD_NOT_FOUND"
assert result.errorDetails["suggestion"] == "closingBalance"
assert result.error and result.error.startswith("FIELD_NOT_FOUND:")
provider.queryTable.assert_not_called()
def test_subAgentTools_sumClosingBalanceShortCircuits():
"""The flagship hallucination -- SUM(closingBalance) -- must be blocked
by the pre-execute validator before the DB is touched."""
registry, provider = _buildRegistryWithMockProvider()
result = _dispatchSync(registry, "aggregateTable", {
"tableName": "TrusteeDataAccountBalance",
"aggregate": "SUM",
"field": "closingBalance",
})
assert result.success is False
assert result.errorDetails["code"] == "INVALID_AGGREGATE_TARGET"
assert result.errorDetails["field"] == "closingBalance"
provider.aggregateTable.assert_not_called()
def test_subAgentTools_validCallReachesProvider():
"""Sanity: a valid call passes the validator and hits the provider."""
registry, provider = _buildRegistryWithMockProvider()
result = _dispatchSync(registry, "queryTable", {
"tableName": "TrusteeDataAccountBalance",
"filters": [
{"field": "periodYear", "op": "=", "value": 2025},
{"field": "periodMonth", "op": "=", "value": 0},
],
"fields": ["accountNumber", "closingBalance"],
})
assert result.success is True
assert result.errorDetails is None
provider.queryTable.assert_called_once()

View file

@ -0,0 +1,295 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Unit tests for the Feature Data Sub-Agent QueryValidator.
Each constraint is exercised with both a Happy and a Sad path so a future
refactor that silently drops a check is caught immediately.
Test fixture is the real ``TrusteeDataAccountBalance`` / ``TrusteeDataJournalLine``
Pydantic models -- both are perfectly suited because they cover all four
constraint classes in production-realistic shape (string fields, numeric
fields, fields named ``closingBalance`` / ``debitTotal``).
"""
from __future__ import annotations
import pytest
from modules.shared import fkRegistry
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
Constraint,
ConstraintRule,
OntologyDescriptor,
ValidationErrorCode,
)
from modules.serviceCenter.services.serviceAgent.queryValidator import QueryValidator
@pytest.fixture(scope="module", autouse=True)
def _ensureModels():
fkRegistry._ensureModelsLoaded()
@pytest.fixture()
def validator() -> QueryValidator:
return QueryValidator()
# ------------------------------------------------------------------
# FieldExists -- browseTable / queryTable / aggregateTable
# ------------------------------------------------------------------
def test_browseQuery_happyPath_returnsNone(validator):
err = validator.validateBrowseQuery(
"TrusteeDataAccountBalance",
{"fields": ["accountNumber", "closingBalance"]},
)
assert err is None
def test_browseQuery_invalidField_returnsFieldNotFound(validator):
err = validator.validateBrowseQuery(
"TrusteeDataAccountBalance",
{"fields": ["closingBlance"]}, # typo
)
assert err is not None
assert err.code == ValidationErrorCode.FIELD_NOT_FOUND
assert err.field == "closingBlance"
assert err.suggestion == "closingBalance"
def test_queryTable_filterOnInvalidField_returnsFieldNotFound(validator):
err = validator.validateQueryTable(
"TrusteeDataAccountBalance",
{"filters": [{"field": "klosingBalance", "op": "=", "value": 100}]},
)
assert err is not None
assert err.code == ValidationErrorCode.FIELD_NOT_FOUND
assert err.suggestion == "closingBalance"
def test_queryTable_unknownTable_isLenient(validator):
"""When the table isn't in MODEL_REGISTRY we skip validation -- relying on
the SQL layer to surface schema errors. Prevents false positives for
pure UDB tables not exposed via Pydantic."""
err = validator.validateQueryTable(
"NoSuchTable123",
{"filters": [{"field": "anything", "op": "=", "value": 1}]},
)
assert err is None
# ------------------------------------------------------------------
# OperatorCompatible
# ------------------------------------------------------------------
def test_queryTable_likeOnStringField_isOk(validator):
err = validator.validateQueryTable(
"TrusteeDataAccountBalance",
{"filters": [{"field": "accountNumber", "op": "LIKE", "value": "102%"}]},
)
assert err is None
def test_queryTable_likeOnNumericField_isOperatorIncompatible(validator):
err = validator.validateQueryTable(
"TrusteeDataAccountBalance",
{"filters": [{"field": "closingBalance", "op": "LIKE", "value": "100%"}]},
)
assert err is not None
assert err.code == ValidationErrorCode.OPERATOR_INCOMPATIBLE
assert err.field == "closingBalance"
def test_queryTable_gteOnNumericField_isOk(validator):
err = validator.validateQueryTable(
"TrusteeDataAccountBalance",
{"filters": [{"field": "closingBalance", "op": ">=", "value": 100}]},
)
assert err is None
def test_queryTable_gteOnStringField_isOperatorIncompatible(validator):
err = validator.validateQueryTable(
"TrusteeDataAccountBalance",
{"filters": [{"field": "currency", "op": ">=", "value": "CHF"}]},
)
assert err is not None
assert err.code == ValidationErrorCode.OPERATOR_INCOMPATIBLE
def test_queryTable_equalsOnAnyField_isOk(validator):
"""`=` and `!=` work on any field type."""
err = validator.validateQueryTable(
"TrusteeDataAccountBalance",
{"filters": [{"field": "currency", "op": "=", "value": "CHF"}]},
)
assert err is None
def test_queryTable_isNullOnAnyField_isOk(validator):
err = validator.validateQueryTable(
"TrusteeDataAccountBalance",
{"filters": [{"field": "mandateId", "op": "IS NULL", "value": None}]},
)
assert err is None
# ------------------------------------------------------------------
# AggregateTarget -- the highest-impact rule
# ------------------------------------------------------------------
def test_aggregate_sumDebitAmount_isOk(validator):
err = validator.validateAggregateQuery(
"TrusteeDataJournalLine",
{"aggregate": "SUM", "field": "debitAmount"},
)
assert err is None
def test_aggregate_sumClosingBalance_isInvalidAggregateTarget(validator):
"""The flagship bug: SUM(closingBalance) across periods. Must be blocked."""
err = validator.validateAggregateQuery(
"TrusteeDataAccountBalance",
{"aggregate": "SUM", "field": "closingBalance"},
)
assert err is not None
assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET
assert err.field == "closingBalance"
assert "already aggregated" in err.hint
def test_aggregate_avgDebitTotal_isInvalidAggregateTarget(validator):
"""`*Total` columns are turnovers per period -- AVG across periods is nonsense."""
err = validator.validateAggregateQuery(
"TrusteeDataAccountBalance",
{"aggregate": "AVG", "field": "debitTotal"},
)
assert err is not None
assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET
def test_aggregate_countClosingBalance_isOk(validator):
"""COUNT on a balance column is meaningful (how many balance rows exist)."""
err = validator.validateAggregateQuery(
"TrusteeDataAccountBalance",
{"aggregate": "COUNT", "field": "closingBalance"},
)
assert err is None
def test_aggregate_sumOnStringField_isTypeMismatch(validator):
err = validator.validateAggregateQuery(
"TrusteeDataAccountBalance",
{"aggregate": "SUM", "field": "currency"},
)
assert err is not None
assert err.code == ValidationErrorCode.TYPE_MISMATCH
def test_aggregate_invalidField_returnsFieldNotFound(validator):
err = validator.validateAggregateQuery(
"TrusteeDataAccountBalance",
{"aggregate": "SUM", "field": "nonExistent"},
)
assert err is not None
assert err.code == ValidationErrorCode.FIELD_NOT_FOUND
def test_aggregate_invalidGroupBy_returnsFieldNotFound(validator):
err = validator.validateAggregateQuery(
"TrusteeDataJournalLine",
{"aggregate": "SUM", "field": "debitAmount", "groupBy": "ghostColumn"},
)
assert err is not None
assert err.code == ValidationErrorCode.FIELD_NOT_FOUND
# ------------------------------------------------------------------
# OrderByValid
# ------------------------------------------------------------------
def test_queryTable_orderByValid_isOk(validator):
err = validator.validateQueryTable(
"TrusteeDataAccountBalance",
{"orderBy": "periodYear"},
)
assert err is None
def test_queryTable_orderByInvalid_returnsOrderByInvalid(validator):
err = validator.validateQueryTable(
"TrusteeDataAccountBalance",
{"orderBy": "periodYr"},
)
assert err is not None
assert err.code == ValidationErrorCode.ORDER_BY_INVALID
assert err.suggestion == "periodYear"
def test_queryTable_orderByLiteralStringNone_isOk(validator):
"""LLMs sometimes pass the literal string 'None'."""
err = validator.validateQueryTable(
"TrusteeDataAccountBalance",
{"orderBy": "None"},
)
assert err is None
# ------------------------------------------------------------------
# Ontology-driven override (Phase 2 readiness check)
# ------------------------------------------------------------------
def test_ontologyOverride_blocksAggregateForOntologyField():
"""When the ontology marks a field NEVER_AGGREGATE, SUM/AVG is blocked
even if the field name doesn't match the convention suffixes."""
ontology = OntologyDescriptor(
featureCode="trustee",
constraints=[
Constraint(
appliesTo="TrusteeDataJournalLine.debitAmount",
rule=ConstraintRule.NEVER_AGGREGATE,
message="Synthetic test rule.",
)
],
)
validatorWithOntology = QueryValidator(ontology=ontology)
err = validatorWithOntology.validateAggregateQuery(
"TrusteeDataJournalLine",
{"aggregate": "SUM", "field": "debitAmount"},
)
assert err is not None
assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET
# ------------------------------------------------------------------
# QueryValidationError serialization (consumed by featureDataAgent)
# ------------------------------------------------------------------
def test_validationError_toShortErrorIncludesCodeAndField(validator):
err = validator.validateAggregateQuery(
"TrusteeDataAccountBalance",
{"aggregate": "SUM", "field": "closingBalance"},
)
assert err is not None
short = err.toShortError()
assert short.startswith("INVALID_AGGREGATE_TARGET:")
assert "closingBalance" in short
def test_validationError_toErrorDetailsHasFourKeys(validator):
err = validator.validateQueryTable(
"TrusteeDataAccountBalance",
{"filters": [{"field": "klosingBalance", "op": "=", "value": 0}]},
)
assert err is not None
details = err.toErrorDetails()
assert set(details.keys()) == {"code", "field", "suggestion", "hint"}
assert details["code"] == "FIELD_NOT_FOUND"
assert details["suggestion"] == "closingBalance"

View file

@ -0,0 +1,199 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Unit tests for the trustee ontology and the ontology-to-prompt compiler.
Verifies:
* the descriptor passes Pydantic validation
* `constraintsForTable` correctly scopes by table/field prefix
* the compiler emits a stable header + every entity name + every
constraint message
* the QueryValidator picks up ontology constraints (NEVER_AGGREGATE on
closingBalance) over the convention-based defaults
* the `getAgentOntology()` hook on `mainTrustee` returns the descriptor
* `_buildValidatorForFeature("trustee")` wires the validator with the
ontology
"""
from __future__ import annotations
import pytest
from modules.features.trustee.mainTrustee import getAgentOntology
from modules.features.trustee.trusteeOntology import getTrusteeOntology
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
ConstraintRule,
OntologyDescriptor,
SemanticType,
ValidationErrorCode,
)
from modules.serviceCenter.services.serviceAgent.featureDataAgent import (
_buildValidatorForFeature,
_loadFeatureOntologyBlock,
)
from modules.serviceCenter.services.serviceAgent.ontologyToPromptCompiler import (
compileOntologyToPrompt,
)
from modules.serviceCenter.services.serviceAgent.queryValidator import QueryValidator
from modules.shared import fkRegistry
@pytest.fixture(scope="module", autouse=True)
def _ensureModels():
fkRegistry._ensureModelsLoaded()
# ---------------------------------------------------------------------------
# OntologyDescriptor structure
# ---------------------------------------------------------------------------
def test_trusteeOntology_returnsValidDescriptor():
ont = getTrusteeOntology()
assert isinstance(ont, OntologyDescriptor)
assert ont.featureCode == "trustee"
assert ont.entities and ont.relations and ont.constraints and ont.canonicalPatterns
def test_trusteeOntology_hasBankAccountSpecialization():
ont = getTrusteeOntology()
bank = next((e for e in ont.entities if e.name == "BankAccount"), None)
assert bank is not None
assert bank.parentEntity == "Account"
assert bank.semanticType == SemanticType.ACCOUNT
def test_trusteeOntology_closingBalanceIsNeverAggregate():
ont = getTrusteeOntology()
constraints = ont.constraintsForTable("TrusteeDataAccountBalance")
matching = [
c for c in constraints
if c.rule == ConstraintRule.NEVER_AGGREGATE
and c.appliesTo == "TrusteeDataAccountBalance.closingBalance"
]
assert matching, "Expected NEVER_AGGREGATE constraint on closingBalance"
def test_trusteeOntology_requiresPeriodFilterOnBalanceTable():
ont = getTrusteeOntology()
constraints = ont.constraintsForTable("TrusteeDataAccountBalance")
table_level = [c for c in constraints if c.rule == ConstraintRule.REQUIRES_FILTER_ON]
assert table_level, "Expected at least one REQUIRES_FILTER_ON constraint"
required = table_level[0].params.get("requiredFields") or []
assert "periodYear" in required
assert "periodMonth" in required
def test_constraintsForTable_filtersScopeCorrectly():
ont = getTrusteeOntology()
bal = ont.constraintsForTable("TrusteeDataAccountBalance")
journal = ont.constraintsForTable("TrusteeDataJournalLine")
for c in bal:
assert c.appliesTo.startswith("TrusteeDataAccountBalance")
for c in journal:
assert c.appliesTo.startswith("TrusteeDataJournalLine")
# ---------------------------------------------------------------------------
# Prompt compiler
# ---------------------------------------------------------------------------
def test_compiler_emitsExpectedHeader():
block = compileOntologyToPrompt(getTrusteeOntology())
assert block.startswith("DOMAIN ONTOLOGY (trustee):"), block.splitlines()[0]
def test_compiler_includesAllEntityNames():
ont = getTrusteeOntology()
block = compileOntologyToPrompt(ont)
for e in ont.entities:
assert e.name in block, f"Entity {e.name} missing from compiled prompt"
def test_compiler_includesAllConstraintMessages():
ont = getTrusteeOntology()
block = compileOntologyToPrompt(ont)
for c in ont.constraints:
assert c.message.split(".")[0] in block, f"Constraint message missing: {c.message[:40]}"
def test_compiler_includesCanonicalPatternTools():
ont = getTrusteeOntology()
block = compileOntologyToPrompt(ont)
for p in ont.canonicalPatterns:
assert p.intent in block
assert p.pattern["tool"] in block
def test_compiler_deterministic():
block1 = compileOntologyToPrompt(getTrusteeOntology())
block2 = compileOntologyToPrompt(getTrusteeOntology())
assert block1 == block2
# ---------------------------------------------------------------------------
# QueryValidator x ontology integration
# ---------------------------------------------------------------------------
def test_validator_picksUpOntologyNeverAggregate():
validator = QueryValidator(ontology=getTrusteeOntology())
err = validator.validateAggregateQuery(
"TrusteeDataAccountBalance",
{"aggregate": "SUM", "field": "closingBalance"},
)
assert err is not None
assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET
assert err.field == "closingBalance"
def test_validator_ontologyConstraintFiresOnDebitTotal():
validator = QueryValidator(ontology=getTrusteeOntology())
err = validator.validateAggregateQuery(
"TrusteeDataAccountBalance",
{"aggregate": "SUM", "field": "debitTotal"},
)
assert err is not None
assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET
def test_validator_allowsLegitimateAggregateOnJournalLine():
validator = QueryValidator(ontology=getTrusteeOntology())
err = validator.validateAggregateQuery(
"TrusteeDataJournalLine",
{"aggregate": "SUM", "field": "debitAmount"},
)
assert err is None
# ---------------------------------------------------------------------------
# featureDataAgent integration hooks
# ---------------------------------------------------------------------------
def test_mainTrustee_getAgentOntology_returnsDescriptor():
ont = getAgentOntology()
assert isinstance(ont, OntologyDescriptor)
assert ont.featureCode == "trustee"
def test_loadFeatureOntologyBlock_returnsCompiledBlock():
block = _loadFeatureOntologyBlock("trustee")
assert block.startswith("DOMAIN ONTOLOGY (trustee):")
assert "BankAccount" in block
def test_loadFeatureOntologyBlock_unknownFeatureReturnsEmpty():
assert _loadFeatureOntologyBlock("doesNotExist") == ""
def test_buildValidatorForFeature_trustee_hasOntology():
validator = _buildValidatorForFeature("trustee")
assert validator._ontology is not None
assert validator._ontology.featureCode == "trustee"
def test_buildValidatorForFeature_unknownFeature_noOntology():
validator = _buildValidatorForFeature("doesNotExist")
assert validator._ontology is None