Merge pull request #165 from valueonag/int
Some checks failed
Deploy Gateway / deploy (push) Failing after 3s
Some checks failed
Deploy Gateway / deploy (push) Failing after 3s
Int
This commit is contained in:
commit
a31e0dadc3
92 changed files with 7449 additions and 877 deletions
2
.github/workflows/deploy-gcp.yml
vendored
2
.github/workflows/deploy-gcp.yml
vendored
|
|
@ -56,7 +56,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Determine environment
|
||||
id: env
|
||||
|
|
|
|||
8
.github/workflows/int_gateway-int.yml
vendored
8
.github/workflows/int_gateway-int.yml
vendored
|
|
@ -22,10 +22,10 @@ jobs:
|
|||
contents: read #This is required for actions/checkout
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Set up Python version
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
|
|
@ -49,7 +49,7 @@ jobs:
|
|||
run: zip release.zip ./* -r
|
||||
|
||||
- name: Upload artifact for deployment jobs
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: python-app
|
||||
path: |
|
||||
|
|
@ -66,7 +66,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Download artifact from build job
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-app
|
||||
|
||||
|
|
|
|||
8
.github/workflows/main_gateway-prod.yml
vendored
8
.github/workflows/main_gateway-prod.yml
vendored
|
|
@ -22,10 +22,10 @@ jobs:
|
|||
contents: read #This is required for actions/checkout
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Set up Python version
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
|
|
@ -49,7 +49,7 @@ jobs:
|
|||
run: zip release.zip ./* -r
|
||||
|
||||
- name: Upload artifact for deployment jobs
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: python-app
|
||||
path: |
|
||||
|
|
@ -66,7 +66,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Download artifact from build job
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-app
|
||||
|
||||
|
|
|
|||
|
|
@ -25,10 +25,10 @@ jobs:
|
|||
contents: write # push requirements.lock
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
|
|
|
|||
8
app.py
8
app.py
|
|
@ -404,8 +404,10 @@ async def lifespan(app: FastAPI):
|
|||
try:
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs.mainBackgroundJobService import (
|
||||
recoverInterruptedJobs,
|
||||
registerZombieKillerScheduler,
|
||||
)
|
||||
recoverInterruptedJobs()
|
||||
registerZombieKillerScheduler(intervalMinutes=5)
|
||||
except Exception as e:
|
||||
logger.warning(f"BackgroundJob recovery failed (non-critical): {e}")
|
||||
|
||||
|
|
@ -604,6 +606,12 @@ app.include_router(promptRouter)
|
|||
from modules.routes.routeDataConnections import router as connectionsRouter
|
||||
app.include_router(connectionsRouter)
|
||||
|
||||
from modules.routes.routeRagInventory import router as ragInventoryRouter
|
||||
app.include_router(ragInventoryRouter)
|
||||
|
||||
from modules.routes.routeAdminSttBenchmark import router as sttBenchmarkRouter
|
||||
app.include_router(sttBenchmarkRouter)
|
||||
|
||||
from modules.routes.routeTableViews import router as tableViewsRouter
|
||||
app.include_router(tableViewsRouter)
|
||||
|
||||
|
|
|
|||
97
env-gateway-dev.20260515_122326.backup
Normal file
97
env-gateway-dev.20260515_122326.backup
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
# Development Environment Configuration
|
||||
|
||||
# System Configuration
|
||||
APP_ENV_TYPE = dev
|
||||
APP_ENV_LABEL = Development Instance Patrick
|
||||
APP_API_URL = http://localhost:8000
|
||||
APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/notes/key.txt
|
||||
APP_INIT_PASS_ADMIN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEeFFtRGtQeVUtcjlrU3dab1ZxUm9WSks0MlJVYUtERFlqUElHemZrOGNENk1tcmJNX3Vxc01UMDhlNU40VzZZRVBpUGNmT3podzZrOGhOeEJIUEt4eVlSWG5UYXA3d09DVXlLT21Kb1JYSUU9
|
||||
APP_INIT_PASS_EVENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERzZjNm56WGVBdjJTeG5Udjd6OGQwUVotYXUzQjJ1YVNyVXVBa3NZVml3ODU0MVNkZjhWWmJwNUFkc19BcHlHMTU1Q3BRcHU0cDBoZkFlR2l6UEZQU3d2U3MtMDh5UDZteGFoQ0EyMUE1ckE9
|
||||
|
||||
# PostgreSQL DB Host
|
||||
DB_HOST=localhost
|
||||
DB_USER=poweron_dev
|
||||
DB_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEcUIxNEFfQ2xnS0RrSC1KNnUxTlVvTGZoMHgzaEI4Z3NlVzVROTVLak5Ubi1vaEZubFZaMTFKMGd6MXAxekN2d2NvMy1hRjg2UVhybktlcFA5anZ1WjFlQmZhcXdwaGhWdzRDc3ExeUhzWTg9
|
||||
DB_PORT=5432
|
||||
|
||||
# Security Configuration
|
||||
APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2ZlUFRlcFdOZ001RnlzM2FhLWhRV2tjWWFhaWQwQ3hkcUFvbThMcndxSjFpYTdfRV9OZGhTcksxbXFTZWg5MDZvOHpCVXBHcDJYaHlJM0tyNWRZckZsVHpQcmxTZHJoZUs1M3lfU2ljRnJaTmNSQ0w0X085OXI0QW80M2xfQnJqZmZ6VEh3TUltX0xzeE42SGtZPQ==
|
||||
APP_TOKEN_EXPIRY=300
|
||||
|
||||
# CORS Configuration
|
||||
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
||||
|
||||
# Logging configuration
|
||||
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||
APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs
|
||||
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
|
||||
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
|
||||
APP_LOGGING_CONSOLE_ENABLED = True
|
||||
APP_LOGGING_FILE_ENABLED = True
|
||||
APP_LOGGING_ROTATION_SIZE = 10485760
|
||||
APP_LOGGING_BACKUP_COUNT = 5
|
||||
|
||||
# OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP.
|
||||
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kxaG9WY1FJaWdCbVFVaTllUlJfU3Y3MmJkRmkzMDVDWUNtZEhlNVhISzJPcy00ZUVZcklYLXFMV0dIODV3NXNSSFBKQ0ZsZllES3diTEgySDF0T1ZCbFZHREZtcXFGSWNZN1NJbzJzczRRQWxoeVNsNzlsa0VzMHJPWHUydjBBclo=
|
||||
Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback
|
||||
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyUW96aXFVOVJlLUdyRlVvT1hVU09ILWtMZnV2M19mVUxGMnFPV3FzNTdQa3dTbHVGTDBHTk01ZThLcjh6QUR5VldVZUpfcDlZNTh5YldtLWtjTll6VzJNQ3JCQ3ZubHdmd2JvaExDOXdvQ1pjWDVQTUtFWVAtUHhwS1lFQnJXWk4=
|
||||
Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback
|
||||
|
||||
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyd1hPd09vcVFtbVg0Sm5Nd1VYVEEtWjZMZkFndmFVS0ZlcTU0dzJnYVYzRkZWbjh0QldyZkhseDV2cUgxYkNHTzF6MXhqQlZ2N0UtbmhPeWRKUHBVdzV0Q1ROaWNuN2xjMmVzMjNZQ2ZYZ3dOTHgxaU5sTGRjVHpfakhYeWF0ZGU=
|
||||
Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback
|
||||
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kySXoyd1BmTnhOd1owTUJOWm53WlZMMjFHNGJhSUwyd2NDUW9BanlRWVJPLU5jYzRlcm5QeW96d0JYUkVWVWd2dGNBVEpJbElZY2lWb0o5S0gyNnhoV1pnNXhpSFEyaklZZjcwX2lVU0ktMEJGN01DMDhXQ3k4R1BXc1Q3ejFjOEg=
|
||||
Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback
|
||||
|
||||
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
||||
Service_CLICKUP_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd4ZWVBeHVtRnpIT0VBN0tSZDhLRmFmN05DOVBOelJtLWhkVnJDRVBqUkh3bDFTZFRWaWQ1cWowdGNLUk5IQzlGN1J6RFVCaW8zRnBwLVBnclJfdWgxV3pVRzFEV2lwcW5Rc19Xa1ROWXNJcUF0ajZaYUxOUXk0WHRsRmJLM25FaHV5T2IxdV92ZW1nRjhzaGpwU0l2Wm9FTkRnY2lJVjhuNHUwT29salAxYV8wPQ==
|
||||
Service_CLICKUP_OAUTH_REDIRECT_URI = http://localhost:8000/api/clickup/auth/connect/callback
|
||||
|
||||
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
|
||||
|
||||
# Stripe Billing (both end with _SECRET for encryption script)
|
||||
STRIPE_SECRET_KEY_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5aHNGejgzQmpTdmprdzQxR19KZkh3MlhYUTNseFN3WnlaWjh2SDZyalN6aU9xSktkbUQwUnZrVnlvbGVRQm4yZFdiRU5aSEk5WVJuUnR4VUwtTm9OVk1WWmJQeU5QaDdib0hfVWV5U1BfYTFXRmdoOWdnOWxkb3JFQmF3bm45UjFUVUxmWGtGRkFKUGd6bmhpQlFnaVI3Q2lLdDlsY1VESk1vOEM0ZFBJNW1qcVZ0N2tPYmRLNmVKajZ2M3o3S05lWnRRVG5LdkRseW4wQ3VjNHNQZTZUdz09
|
||||
STRIPE_WEBHOOK_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5dDJMSHBrVk8wTzJhU2xzTTZCZWdvWmU2NGI2WklfRXRJZVUzaVYyOU9GLUZsalUwa2lPdEgtUHo0dVVvRDU1cy1saHJyU0Rxa2xQZjBuakExQzk3bmxBcU9WbEIxUEtpR1JoUFMxZG9ISGRZUXFhdFpSMGxvQUV3a0VLQllfUUtCOHZwTGdteV9rYTFOazBfSlN3ekNWblFpakJlZVlCTmNkWWQ4Sm01a1RCWTlnTlFHWVA0MkZYMlprUExrWFN2V0NVU1BTd1NKczFJbVo3VHpLdlc4UT09
|
||||
STRIPE_API_VERSION = 2026-01-28.clover
|
||||
STRIPE_AUTOMATIC_TAX_ENABLED = false
|
||||
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
|
||||
|
||||
# AI configuration
|
||||
Connector_AiOpenai_API_SECRET = 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
|
||||
Connector_AiAnthropic_API_SECRET = D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
|
||||
Connector_AiPerplexity_API_SECRET = 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
|
||||
Connector_AiTavily_API_SECRET = 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
|
||||
Connector_AiPrivateLlm_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGRHM5eFdUVmVZU1R1cHBwN1RlMUx4T0NlLTJLUFFVX3J2OElDWFpuZmJHVmp4Z3BNNWMwZUVVZUd2TFhRSjVmVkVlcFlVRWtybXh0ZHloZ01ZcnVvX195YjdlWVdEcjZSWFFTTlNBWUlaTlNoLWhqVFBIb0thVlBiaWhjYjFQOFY=
|
||||
Connector_AiMistral_API_SECRET = ogaEVD2fFmiIWHDhKn8oGM0FShFxnAtT
|
||||
|
||||
Service_MSFT_TENANT_ID = common
|
||||
|
||||
# Google Cloud Speech Services configuration
|
||||
Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0=
|
||||
|
||||
# Feature SyncDelta JIRA configuration
|
||||
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0=
|
||||
|
||||
# Teamsbot Browser Bot Service
|
||||
# For local testing: run the bot locally with `npm run dev` in service-teams-browser-bot
|
||||
# The bot will connect back to localhost:8000 via WebSocket
|
||||
TEAMSBOT_BROWSER_BOT_URL = http://localhost:4100
|
||||
|
||||
# Debug Configuration
|
||||
APP_DEBUG_CHAT_WORKFLOW_ENABLED = True
|
||||
APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug
|
||||
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True
|
||||
APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync
|
||||
|
||||
# Azure Communication Services Email Configuration
|
||||
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||
|
||||
# Zurich WFS Parcels (dynamic map layer). Default: Stadt Zürich OGD. Override for full canton if wfs.zh.ch resolves.
|
||||
# Connector_ZhWfsParcels_WFS_URL = https://wfs.zh.ch/av
|
||||
# Connector_ZhWfsParcels_TYPENAMES = av_li_liegenschaften_a
|
||||
|
||||
|
|
@ -19,7 +19,7 @@ APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2Z
|
|||
APP_TOKEN_EXPIRY=300
|
||||
|
||||
# CORS Configuration
|
||||
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss
|
||||
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
||||
|
||||
# Logging configuration
|
||||
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||
|
|
@ -32,18 +32,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
|
|||
APP_LOGGING_BACKUP_COUNT = 5
|
||||
|
||||
# OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP.
|
||||
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
|
||||
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kxaG9WY1FJaWdCbVFVaTllUlJfU3Y3MmJkRmkzMDVDWUNtZEhlNVhISzJPcy00ZUVZcklYLXFMV0dIODV3NXNSSFBKQ0ZsZllES3diTEgySDF0T1ZCbFZHREZtcXFGSWNZN1NJbzJzczRRQWxoeVNsNzlsa0VzMHJPWHUydjBBclo=
|
||||
Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback
|
||||
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
|
||||
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyUW96aXFVOVJlLUdyRlVvT1hVU09ILWtMZnV2M19mVUxGMnFPV3FzNTdQa3dTbHVGTDBHTk01ZThLcjh6QUR5VldVZUpfcDlZNTh5YldtLWtjTll6VzJNQ3JCQ3ZubHdmd2JvaExDOXdvQ1pjWDVQTUtFWVAtUHhwS1lFQnJXWk4=
|
||||
Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback
|
||||
|
||||
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
|
||||
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyd1hPd09vcVFtbVg0Sm5Nd1VYVEEtWjZMZkFndmFVS0ZlcTU0dzJnYVYzRkZWbjh0QldyZkhseDV2cUgxYkNHTzF6MXhqQlZ2N0UtbmhPeWRKUHBVdzV0Q1ROaWNuN2xjMmVzMjNZQ2ZYZ3dOTHgxaU5sTGRjVHpfakhYeWF0ZGU=
|
||||
Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback
|
||||
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
|
||||
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kySXoyd1BmTnhOd1owTUJOWm53WlZMMjFHNGJhSUwyd2NDUW9BanlRWVJPLU5jYzRlcm5QeW96d0JYUkVWVWd2dGNBVEpJbElZY2lWb0o5S0gyNnhoV1pnNXhpSFEyaklZZjcwX2lVU0ktMEJGN01DMDhXQ3k4R1BXc1Q3ejFjOEg=
|
||||
Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback
|
||||
|
||||
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||
|
|
@ -61,12 +61,12 @@ STRIPE_AUTOMATIC_TAX_ENABLED = false
|
|||
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
|
||||
|
||||
# AI configuration
|
||||
Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQnBaSnM4TWFRRmxVQmNQblVIYmc1Y0Q3aW9zZUtDWlNWdGZjbFpncGp2NHN2QjkxMWxibUJnZDBId252MWk5TXN3Yk14ajFIdi1CTkx2ZWx2QzF5OFR6LUx5azQ3dnNLaXJBOHNxc0tlWmtZcTFVelF4eXBSM2JkbHd2eTM0VHNXdHNtVUprZWtPVzctNlJsZHNmM20tU1N6Q1Q2cHFYSi1tNlhZNDNabTVuaEVGWmIydEhadTcyMlBURmw2aUJxOF9GTzR0dTZiNGZfOFlHaVpPZ1A1LXhhOEFtN1J5TEVNNWtMcGpyNkMzSl8xRnZsaTF1WTZrOUZmb0cxVURjSGFLS2dIYTQyZEJtTm90bEYxVWxNNXVPdTVjaVhYbXhxT3JsVDM5VjZMVFZKSE1tZnM9
|
||||
Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpENmFBWG16STFQUVZxNzZZRzRLYTA4X3lRanF1VkF4cU45OExNMzlsQmdISGFxTUxud1dXODBKcFhMVG9KNjdWVnlTTFFROVc3NDlsdlNHLUJXeG41NDBHaXhHR0VHVWl5UW9RNkVWbmlhakRKVW5pM0R4VHk0LUw0TV9LdkljNHdBLXJua21NQkl2b3l4UkVkMGN1YjBrMmJEeWtMay1jbmxrYWJNbUV0aktCXzU1djR2d2RSQXZORTNwcG92ZUVvVGMtQzQzTTVncEZTRGRtZUFIZWQ0dz09
|
||||
Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5ZmdDZ3hrSElrMnQzNFAtel9wX191VjVzN2g1LWZoa0V1YklubEdmMEJDdEZiR1RWeVZrM3V3enBHX3p6WUtTS0kwYkFyVEF0Nm8zX05CelVQcFJUc0lwVW5iNFczc1p1WWJ2WFBmd0lpLUxxWndEeUh0b2hGUHVpN19vb19nMTBnV1A1VmNpWERVX05lQ29VS20wTjZ3PT0=
|
||||
Connector_AiTavily_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQTdnUHMwd2pIaXNtMmtCTFREd0pyQXRKb1F5eGtHSnkyOGZiUnlBOFc0b3Vzcndrc3ViRm1nMDJIOEZKYWxqdWNkZGh5N0Z4R0JlQmxXSG5pVnJUR2VYckZhMWNMZ1FNeXJ3enJLVlpiblhOZTNleUg3ZzZyUzRZanFSeDlVMkI=
|
||||
Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlRzI0aTZXSHhTbklJa1ROdTRJdW94b1p0ZUhRdW55Zzdza2RsUWduSDRhMlVCYnQ4MnA0VU9ZZi10M0tySGwxV2ZweGZQei1FaWU4QThnWVE2LUI2WGhSVWhLbnVkRU5uZDgyVV90TjZXd1czaDdUaXJBU0hNWndfOFZTNzZ4UnNjenlqRWw2T2lBZFphWjNYcmtXSVZhcTUwUGhiYmhMM2lNemM0UFZwUndZVUYtTFUxVUctSEtoWmRkSE1GamJuS0FLMkhmWWNVTmJHemJCRzdhQ0Z6UzVFb1l1b1JjQWliQkt6MlNHeFBHb0h5VlBZWW1NUDNlTWpPV0lqZldjeWs2MExiTHJRdGF4b214c191ZkswVHZCcGVPMzZHaElrbWFvSVlqOXRjTkU9
|
||||
Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlbXEzdGc2NFExb3AzVUw0cEhkZzlNRjZxTVlJMV81LTZhVXhoNXBpYUlMN0FxUUJHQlJnS0N3OV85Uk9sa1J3M1lyZExSMWVsbzVSdzdWQUVsUVp2dzhfLThmY2lNb1NhVGlvbnhLR0NnSmhsOVp2RkxfODc2SFpDYlBkcWp4aFFtNldtZGQ2LUhBZVM5VXk4RTNHNzQyV3FnMVNJMW9yOGpRRkQxUC1hZ3NOOHhqV3Y4LWJSNjFYQ3dwQmhrRWJRRzhaX1N4aFlWLTVsaEJmOWxkTjZMZz09
|
||||
Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlNV9felVPcHVyMU9kVGhGZEt0MG9iRzRrTVM4TFJvSHhGOVo0U1ROWkdEMzRSWjhtMnFrZUhHTHNXelpLZ014RzRkMlIxZDJwcjEwc1dRamY5ekJMR1VLb2w4eEZqZENBRnFaZlRhb1h5VE05Tml1ZlVBWHBaTkJaZUE5NWprVklva0ZFZnB4cFFudGdkalpmTlBhdV9nPT0=
|
||||
Connector_AiTavily_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlY1R2WGpuazk5M05SeDIyLWd3bHpKN3lUdlVFdjhvZEJXdlM4bGlBdTB1TjRia051YllDQ2lwM0V3R3dPd2lKVWxoSm9BNWl1ZFFlVkZ5cXh4TFRVU0Z4NVU5WVRjSUJPc01La3JyaVZSNkhYWU9PR00yMENEb0dRT3l5enEwSFlWZVVzTVR0UWQ4eUxvRmZvWHl0c0xRPT0=
|
||||
Connector_AiPrivateLlm_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGRHM5eFdUVmVZU1R1cHBwN1RlMUx4T0NlLTJLUFFVX3J2OElDWFpuZmJHVmp4Z3BNNWMwZUVVZUd2TFhRSjVmVkVlcFlVRWtybXh0ZHloZ01ZcnVvX195YjdlWVdEcjZSWFFTTlNBWUlaTlNoLWhqVFBIb0thVlBiaWhjYjFQOFY=
|
||||
Connector_AiMistral_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGeEQxYUIxOHhia0JlQWpWQ2dWQWZzY3l6SWwyUnJoR1hRQWloX2lxb2lGNkc4UnA4U2tWNjJaYzB1d1hvNG9fWUp1N3V4OW9FMGhaWVhjSlVwWEc1X2loVDBSZDEtdHdfcTA5QkcxQTR4OHc4RkRzclJrU2d1RFZpNDJkRDRURlE=
|
||||
Connector_AiMistral_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlelh2T2hqNGcxV0hMV1FKbmFDZjVHUWF6T2FXbGlCSnQzSzNXLWJHeXBFWE1nUlh1b1NHY1JRSEVtTVEtc1MtUnZrX2ZCcURqQ2FYNmFWa2xudGJtS3g2eVo4MFZMd09nZTBNMmo1ZHU0bzBJdFRqLVhHSVZNb2Zrc0VkUXI0SVk=
|
||||
|
||||
Service_MSFT_TENANT_ID = common
|
||||
|
||||
|
|
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug
|
|||
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True
|
||||
APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync
|
||||
|
||||
# Manadate Pre-Processing Servers
|
||||
PREPROCESS_ALTHAUS_CHAT_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGbEphQ3ZUMlFMQ2EwSGpoSE9NNzRJNTJtaGk1N0RGakdIYnVVeVFHZmF5OXB3QTVWLVNaZk9wNkhfQkZWRnVwRGRxem9iRzJIWXdpX1NIN2FwSExfT3c9PQ==
|
||||
|
||||
# Preprocessor API Configuration
|
||||
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
|
||||
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
|
||||
|
||||
# Azure Communication Services Email Configuration
|
||||
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||
|
|
|
|||
92
env-gateway-int.20260515_122326.backup
Normal file
92
env-gateway-int.20260515_122326.backup
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
# Integration Environment Configuration
|
||||
|
||||
# System Configuration
|
||||
APP_ENV_TYPE = int
|
||||
APP_ENV_LABEL = Integration Instance
|
||||
APP_API_URL = https://gateway-int.poweron.swiss
|
||||
# Force SameSite=None+Secure for auth cookies (cross-site UI on poweron-center.net). Optional if APP_API_URL is https://
|
||||
APP_COOKIE_SECURE = true
|
||||
APP_KEY_SYSVAR = CONFIG_KEY
|
||||
APP_INIT_PASS_ADMIN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjWm41MWZ4TUZGaVlrX3pWZWNwakJsY3Facm0wLVZDd1VKeTFoZEVZQnItcEdUUnVJS1NXeDBpM2xKbGRsYmxOSmRhc29PZjJSU2txQjdLbUVrTTE1NEJjUXBHbV9NOVJWZUR3QlJkQnJvTEU9
|
||||
APP_INIT_PASS_EVENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjdmtrakgxa0djekZVNGtTZV8wM2I5UUpCZllveVBMWXROYk5yS3BiV3JEelJSM09VYTRONHpnY3VtMGxDRk5JTEZSRFhtcDZ0RVRmZ1RicTFhb3c5dVZRQ1o4SmlkLVpPTW5MMTU2eTQ0Vkk9
|
||||
|
||||
# PostgreSQL DB Host
|
||||
DB_HOST=gateway-int-server.postgres.database.azure.com
|
||||
DB_USER=heeshkdlby
|
||||
DB_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjczYzOUtTa21MMGJVTUQ5UmFfdWc3YlhCbWZOeXFaNEE1QzdJV3BLVjhnalBkLVVCMm5BZzdxdlFXQXc2RHYzLWtPSFZkZE1iWG9rQ1NkVWlpRnF5TURVbnl1cm9iYXlSMGYxd1BGYVc0VDA9
|
||||
DB_PORT=5432
|
||||
|
||||
# Security Configuration
|
||||
APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZzTEp4aDR0MktWRjNoeVBrY1Npd1R0VE9YVHp3M2w1cXRzbUxNaU82QUJvaDNFeVQyN05KblRWblBvbWtoT0VXbkNBbDQ5OHhwSUFnaDZGRG10Vmgtdm1YUkRsYUhFMzRVZURmSFlDTFIzVWg4MXNueDZyMGc5aVpFdWRxY3dkTExGM093ZTVUZVl5LUhGWnlRPQ==
|
||||
APP_TOKEN_EXPIRY=300
|
||||
|
||||
# CORS Configuration
|
||||
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
||||
|
||||
# Logging configuration
|
||||
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||
APP_LOGGING_LOG_DIR = /home/site/wwwroot/
|
||||
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
|
||||
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
|
||||
APP_LOGGING_CONSOLE_ENABLED = True
|
||||
APP_LOGGING_FILE_ENABLED = True
|
||||
APP_LOGGING_ROTATION_SIZE = 10485760
|
||||
APP_LOGGING_BACKUP_COUNT = 5
|
||||
|
||||
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
||||
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kydlVubld1d1h6SUNSWW1aZ3p4X3Zod1NDTjhZVnVYS2lqOERGTFp2OXJ4TGRiNlRLVFpzLUVDTUhkZGhGUWdxa1djdEV5UWkyblN1UHZoaFBjaExNTEpGMG1PRGJEbDdHVll0Ungwcl9JemZ4ZXFzZUNFQmFlZi1DZFlCekU1S3E=
|
||||
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/login/callback
|
||||
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyS1hWZXEzUzZTTE5MUlJncVowMU95Y0hmV1hveDBZOWdLU1RIUWt3SGlXNGxVTXVKc2QyQmtmWTlJRU43ZnRDdnlDTGxQY0hTU25CWWFFdDhUem9HU0VYcTFJTVFEbVk0dUhmVzJNVlEzNTNWdjdmaW9WeUVDVW5PRmNFZEQzNTY=
|
||||
Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback
|
||||
|
||||
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyUTUwNXNGaHRNaGxxbF9sdWJ3Q0xLYU5yOHB4Yk8zMDZvQ29yaEhWOE5JMENXRk5jb2ZBdzRKQ2ZTTld6ZlIxemhOYzN1VE10TjBDRWZEMXlLVWRNYjZ0VG5RZ3I3NWt0SEJzMzdsUmRzcVNmbktRNHZqTUF6a2EyUkVUSFJnZFE=
|
||||
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback
|
||||
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyV1FRVjF0c0d3d0dyWU1TdW9HdXVkdHdsVWZKYTJjbGZPRDhMRjA2M0FkaUZIVmhIUmFKNjg2ekFodHd6NG80VTI3TC1icW1LZ01jWVZuQ1pKRm5nMW5UREJEaGp2Wl9oRDRCSmZVT0JpTnkwXzgwY0pkV29yczQ5akF2d1ZGcVY=
|
||||
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback
|
||||
|
||||
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
||||
Service_CLICKUP_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5SE1uVURMNVE3NkM4cHBKa2R2TjBnLWdpSXI5dHpKWGExZVFiUF95TFNnZ1NwLWFLdmh6eWFZTHVHYTBzU2FGRUpLYkVyM1NvZjZkWDZHN21qUER5ZVNOaGpCc3NrUGd3VnFTclF3OW1nUlVuWXQ1UVhDLVpyb1BwRExOeFpDeVhtbEhDVnd4TVdpbzNBNk5QQWFPdjdza0xBWGxFY1E3WFpCSUlNa1l4RDlBPQ==
|
||||
Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/clickup/auth/connect/callback
|
||||
|
||||
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
|
||||
|
||||
# Stripe Billing (both end with _SECRET for encryption script)
|
||||
STRIPE_SECRET_KEY_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5ekdBaGNGVUlOQUpncTlzLWlTV0V5OWZzQkpDczhCUGw4U1JpTHZ0d3pfYlFNWElLRlNiNlNsaDRYTGZUTkg2OUFrTW1GZXpOUjBVbmRQWjN6ekhHd2ZSQ195OHlaeWh1TmxrUm10V2R3YmdncmFLbFMzVjdqcWJMSUJPR2xuSEozclNoZG1rZVBTaWg3OFQ1Qzdxb0wyQ2RKazc2dG1aZXBUTXlvbDZqLS1KOVI5M3BGc3NQZkZRbnFpRjIwWmh2ZHlVNlpxZVo2dWNmMjQ5eW02QmtzUT09
|
||||
STRIPE_WEBHOOK_SECRET = INT_ENC:Z0FBQUFBQnA4UXZiUUVqTl9lREVRWTh1aHFDcFpwcXRkOUx4MS1ham9Ddkl6T0xzMnJuM1hhUHdGNG5CenY1MUg4RlJBOGFQTWl5cVd5MjJ2REItcHYyRmdLX3ZlT2p5Z3BRVkMtQnRoTVkteXlfaU92MVBtOEI0Ni1kbGlfa0NiRmFRRXNHLVE2NHI=
|
||||
STRIPE_API_VERSION = 2026-01-28.clover
|
||||
STRIPE_AUTOMATIC_TAX_ENABLED = false
|
||||
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
|
||||
|
||||
# AI configuration
|
||||
Connector_AiOpenai_API_SECRET = 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
|
||||
Connector_AiAnthropic_API_SECRET = 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
|
||||
Connector_AiPerplexity_API_SECRET = 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
|
||||
Connector_AiTavily_API_SECRET = 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
|
||||
Connector_AiPrivateLlm_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGSjZ1NWh0aWc1R3Z4MHNaeS1HamtUbndhcUZFZDlqUDhjSmg5eHFfdlVkU0RsVkJ2UVRaMWs3aWhraG5jSlc0YkxNWHVmR2JoSW5ENFFCdkJBM0VienlKSnhzNnBKbTJOUTFKczRfWlQ3bWpmUkRTT1I1OGNUSTlQdExacGRpeXg=
|
||||
Connector_AiMistral_API_SECRET = ogaEVD2fFmiIWHDhKn8oGM0FShFxnAtT
|
||||
|
||||
Service_MSFT_TENANT_ID = common
|
||||
|
||||
# Google Cloud Speech Services configuration
|
||||
Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0=
|
||||
|
||||
# Feature SyncDelta JIRA configuration
|
||||
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0=
|
||||
|
||||
# Teamsbot Browser Bot Service
|
||||
TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io
|
||||
|
||||
# Debug Configuration
|
||||
APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
|
||||
APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
|
||||
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
||||
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
||||
|
||||
# Azure Communication Services Email Configuration
|
||||
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||
|
|
@ -21,7 +21,7 @@ APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZ
|
|||
APP_TOKEN_EXPIRY=300
|
||||
|
||||
# CORS Configuration
|
||||
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
||||
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
||||
|
||||
# Logging configuration
|
||||
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||
|
|
@ -34,18 +34,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
|
|||
APP_LOGGING_BACKUP_COUNT = 5
|
||||
|
||||
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
||||
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
|
||||
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kydlVubld1d1h6SUNSWW1aZ3p4X3Zod1NDTjhZVnVYS2lqOERGTFp2OXJ4TGRiNlRLVFpzLUVDTUhkZGhGUWdxa1djdEV5UWkyblN1UHZoaFBjaExNTEpGMG1PRGJEbDdHVll0Ungwcl9JemZ4ZXFzZUNFQmFlZi1DZFlCekU1S3E=
|
||||
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/login/callback
|
||||
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
|
||||
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyS1hWZXEzUzZTTE5MUlJncVowMU95Y0hmV1hveDBZOWdLU1RIUWt3SGlXNGxVTXVKc2QyQmtmWTlJRU43ZnRDdnlDTGxQY0hTU25CWWFFdDhUem9HU0VYcTFJTVFEbVk0dUhmVzJNVlEzNTNWdjdmaW9WeUVDVW5PRmNFZEQzNTY=
|
||||
Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback
|
||||
|
||||
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
|
||||
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyUTUwNXNGaHRNaGxxbF9sdWJ3Q0xLYU5yOHB4Yk8zMDZvQ29yaEhWOE5JMENXRk5jb2ZBdzRKQ2ZTTld6ZlIxemhOYzN1VE10TjBDRWZEMXlLVWRNYjZ0VG5RZ3I3NWt0SEJzMzdsUmRzcVNmbktRNHZqTUF6a2EyUkVUSFJnZFE=
|
||||
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback
|
||||
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
|
||||
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyV1FRVjF0c0d3d0dyWU1TdW9HdXVkdHdsVWZKYTJjbGZPRDhMRjA2M0FkaUZIVmhIUmFKNjg2ekFodHd6NG80VTI3TC1icW1LZ01jWVZuQ1pKRm5nMW5UREJEaGp2Wl9oRDRCSmZVT0JpTnkwXzgwY0pkV29yczQ5akF2d1ZGcVY=
|
||||
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback
|
||||
|
||||
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||
|
|
@ -63,12 +63,12 @@ STRIPE_AUTOMATIC_TAX_ENABLED = false
|
|||
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
|
||||
|
||||
# AI configuration
|
||||
Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4MENkQ2xJVmE5WFZKUkh2SHJFby1YVXN3ZmVxRkptS3ZWRmlwdU93ZEJjSjlMV2NGbU5mS3NCdmFfcmFYTEJNZXFIQ3ozTWE4ZC1pemlQNk9wbjU1d3BPS0ZCTTZfOF8yWmVXMWx0TU1DamlJLVFhSTJXclZsY3hMVWlPcXVqQWtMdER4T252NHZUWEhUOTdIN1VGR3ltazEweXFqQ0lvb0hYWmxQQnpxb0JwcFNhRDNGWXdoRTVJWm9FalZpTUF5b1RqZlRaYnVKYkp0NWR5Vko1WWJ0Wmg2VWJzYXZ0Z3Q4UkpsTldDX2dsekhKMmM4YjRoa2RwemMwYVQwM2cyMFlvaU5mOTVTWGlROU8xY2ZVRXlxZzJqWkxURWlGZGI2STZNb0NpdEtWUnM9
|
||||
Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjT1ZlRWVJdVZMT3ljSFJDcFdxRFBRVkZhS204NnN5RDBlQ0tpenhTM0FFVktuWW9mWHNwRWx2dHB0eDBSZ0JFQnZKWlp6c01pVGREWHd1eGpERnU0Q2xhaks1clQ1ZXVsdnd2ZzhpNXNQS1BhY3FjSkdkVEhHalNaRGR4emhpakZncnpDQUVxOHVXQzVUWmtQc0FsYmFwTF9TSG5FOUFtWk5Ick1NcHFvY2s1T1c2WXlRUFFJZnh6TWhuaVpMYmppcDR0QUx0a0R6RXlwbGRYb1R4dzJkUT09
|
||||
Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQnB5dkd6UkhtU3lhYmZMSlo0bklQZ2s3UTFBSkprZTNwWkg5Q2lVa0wtenhxWXpva21xVDVMRjdKSmhpTmxWS05IUTRoRHdCbktSRVVjcVFnY1RfV0N2S2dyV0dTMlhxQlRFVm41RkFTWVQzQThuVkZwdlNuVC05QlVRVXB6Qjk3akNpYmY1MFR6R1ByMzlIMllRZlRRYVVRN2ZBPT0=
|
||||
Connector_AiTavily_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkdkJMTDY0akhXNzZDWHVYSEt1cDZoOWEzSktneHZEV2JndTNmWlNSMV9KbFNIZmQzeVlrNE5qUEIwcUlBSGM1a0hOZ3J6djIyOVhnZzI3M1dIUkdicl9FVXF3RGktMmlEYmhnaHJfWTdGUkktSXVUSGdQMC1vSEV6VE8zR2F1SVk=
|
||||
Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlYUZpRDFqLWhQajZxSElqMEMzdGZIRm5TeDBSSFlqenpZYVJEa1BtRXM1M21pd3hjTGZvSDJPcGJoY2gyQlNncWNwNkNIR0NFQnpjXzA5U2t6Zm1DWWNNVEZrTE5DVzRQVGdlZzRldGoyRWhaeTJfYjBHd0ludWpGcWdqd3hKTHJ5T0piVE15Tk1YZUZnSnE4OXdKOUhXd292dHpWMkxlR3dNclc1N2t0ckFoMmd5WTlBci11MXRGNV9UTlFCSmdOOE83bGJyODFUQ3E2NXJpRHJWZUM0cHFHekNJa0FlN3hjd2VFQ1Nqa1JFQ2NFdjlMWW1TbEV4TVZBeDFEZVVnUWlBVUV1Z0NUNHV0RE1fTEJaLTQxQksyVE1LSE1sSG0ycG9fTS1hNzh4dTQ9
|
||||
Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlRHFpNThJb3g3UU05cUw4SVJpOXBTblU5QzU1WFItZ2JkNXVILVN4VHp0Umh2RjJyZXJMNVp5OWFxLWhjRjhub3cxajkxMVRQMnZQdVBGT21obWN0Q0NlOU80MVhMMXRWb1l3cWNpR2Ytc1d0WnVlRUN1TTZ4NjFQcDd0Wll4cFN6dzk1OU5SZGNJck54WmNoeElITzEzejJrczVSQnp6ZTBINGtENHFiT3NnWjdUME9xXzJ5Y0N3dHk5QnpBRkpyVTgxOE0xTVllR2JMUC0yTkwyWWxHQT09
|
||||
Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFla1h1R1M3QlQ5XzJhS0x4eXFpTkZ3WHpLMWVZZldRMGpMX2psMFZ2RmpETTZMZ3ZXblo2MnhyemxYWXRsMHN1LXdZU3k5ampEMjMtdzcyb1J4Ri1rTmxPOWhJMF9MMEtzZ3d5dFZxSFY3TjNac3ZpTVJxUFFmUVpXeHEtbVBTUmtiR0lhQjhVcjM3U1NNX1ZHY1NxUFJ3PT0=
|
||||
Connector_AiTavily_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlbmRSZVRjTzVKRklFbFgwdVZJaE5jNVoyX3dVTVlRUFVUenc4X1JOX2laOHRoTU9mN1lTUVRzb2xNZjJXVjhEYnVIaXdkSWN4NEpJbTFJZFN2cmkwUkJ0ZXNKT2NidktjdDFJX1BkZ3QwU3dQRzg0aG9aNmtxc1FZZ1ZBRjQyM3lOSS1EYkpqWmxoV0xWWE1Fc01uN3RnPT0=
|
||||
Connector_AiPrivateLlm_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGSjZ1NWh0aWc1R3Z4MHNaeS1HamtUbndhcUZFZDlqUDhjSmg5eHFfdlVkU0RsVkJ2UVRaMWs3aWhraG5jSlc0YkxNWHVmR2JoSW5ENFFCdkJBM0VienlKSnhzNnBKbTJOUTFKczRfWlQ3bWpmUkRTT1I1OGNUSTlQdExacGRpeXg=
|
||||
Connector_AiMistral_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGZTNtZ1E4TWIxSEU1OUlreUpxZkJIR0Vxcm9xRHRUbnBxbTQ1cXlkbnltWkJVdTdMYWZ4c3Fsam42TERWUTVhNzZFMU9xVjdyRGFCYml6bmZsZFd2YmJzemlrSWN6Q3o3X0NXX2xXNUQteTNONHdKYzJ5YVpLLWdhU2JhSTJQZnI=
|
||||
Connector_AiMistral_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlU2tMLTFnQWhET2Nia2pTcVpBakRaSVFDdUpHRzZ1bkhGVVhMeEVlSnFZU3F3UFRBUkNMMU4tQU92OUdTeDlpM2VZbXJzLURQZ1lPLVB3azgxSDZabkhkSHJ5Y005aWhtcDJzajk3a2JDQUxCZlNKRGw5elJuSzJMUUpTZ2hiSlU=
|
||||
|
||||
Service_MSFT_TENANT_ID = common
|
||||
|
||||
|
|
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
|
|||
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
||||
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
||||
|
||||
# Manadate Pre-Processing Servers
|
||||
PREPROCESS_ALTHAUS_CHAT_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4UkNBelhvckxCQUVjZm94N3BZUDcxaEMyckE2dm1lRVhqODhrWU1SUjNXZ3dQZlVJOWhveXFkZXpobW5xT0NneGZ2SkNUblFmYXd0WTBYNTl3UmRnSWc9PQ==
|
||||
|
||||
# Preprocessor API Configuration
|
||||
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
|
||||
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
|
||||
|
||||
# Azure Communication Services Email Configuration
|
||||
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||
|
|
|
|||
|
|
@ -32,19 +32,19 @@ APP_LOGGING_ROTATION_SIZE = 10485760
|
|||
APP_LOGGING_BACKUP_COUNT = 5
|
||||
|
||||
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
||||
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
|
||||
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyeUZORDYxOFdlNHk1N25kV3pSQVJMUVFwLUFlMzlzQjQ1eVljOTlzX184RndsTmtTV1FjdWkyQlBiUkdCbGt5S2ltZjJxa2I2dHBMdnJqZnhFSnBCampHYjB3RG5URDM1YzZSLVd6TGdaRXRVcEdadE5zM2thNV9SZy1KZDdLSHY=
|
||||
Service_MSFT_AUTH_REDIRECT_URI=https://api.poweron.swiss/api/msft/auth/login/callback
|
||||
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
|
||||
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySk5uMmlWczBWTE00MHBIcWlBbVJmVmc3MlBWbDA1YTFaS3psZjVLd3d1X2FvRHV0X0c5blpLV0FpY05aMTJMMzUtcG8wakF2TlM3SGQ2VjFZM3JLT1MwTlZ0bm9BRlpkbHVPQTFNaXJvazlQRzN4M2ZZNEVhV1JHV190dWluSUk=
|
||||
Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback
|
||||
|
||||
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
|
||||
Service_GOOGLE_AUTH_REDIRECT_URI =
|
||||
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
|
||||
Service_GOOGLE_DATA_REDIRECT_URI =
|
||||
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kybjVVZ0FldUE1NTJiY2U1N0I0aVU0Z2hfeWlYc2tTdmlxTS1NdGxsRnFHdjZVcW5RRHZkUFhzUTVyX2RaZHlrQThRdTdCRmVBelBOcDlsbFQyd19SZExuWEM5aTcwQ0FvY3ctMUlWU1pndDE0MkdzeTZZRHkwLWU3aW56LW1jS20=
|
||||
Service_GOOGLE_AUTH_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/login/callback
|
||||
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyMnFma3VPOVJtTFFrNDRLN0NkWHY2dUZDWlJzdDVMd3p3N19IY0tWdURRRzExOGZCMjJOYmpKT1E0cTVwYlgtcVJINTY0anZPc1VoTW00cHl6NVh3ZHVTek1oT1RqWUhtamRkZ1dENWlwNTlZSU1oNWczeGdEOC1Gbk5XU2RBcmI=
|
||||
Service_GOOGLE_DATA_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/connect/callback
|
||||
|
||||
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
||||
|
|
@ -62,12 +62,12 @@ STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah
|
|||
|
||||
|
||||
# AI configuration
|
||||
Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3UxLUdwY2hhdGYxRGp4VFJqYXZIcmkxM1ZyOWV2M0Z4MHdFNkVYQ0ROb1d6LUZFUEdvMHhLMEtXYVBCRzM5TlYyY3ROYWtJRk41cDZxd0tYYi00MjVqMTh4QVcyTXl0bmVocEFHbXQwREpwNi1vODdBNmwzazE5bkpNelE2WXpvblIzWlQwbGdEelI2WXFqT1RibXVHcjNWbVhwYzBOM25XTzNmTDAwUjRvYk4yNjIyZHc5c2RSZzREQUFCdUwyb0ZuOXN1dzI2c2FKdXI4NGxEbk92czZWamJXU3ZSbUlLejZjRklRRk4tLV9aVUFZekI2bTU4OHYxNTUybDg3RVo0ZTh6dXNKRW5GNXVackZvcm9laGI0X3R6V3M9
|
||||
Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09
|
||||
Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6NG5CTm9QOFZRV1BIVC0tV2RKTGtCQWFOUXlpRnhEdjN1U2x3VUdDamtIZV9CQzQ5ZmRmcUh3ZUVUa0NxbGhlenVVdWtaYjdpcnhvUlNFLXZfOWh2dWFZai0xUGU5cWpuYmpnRVRWakh0RVNUUTFyX0w5V0NXVWFrQlZuOTd5TkI0eVRoQ0ZBSm9HYUlYamoyY1FCMmlBPT0=
|
||||
Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg=
|
||||
Connector_AiOpenai_API_SECRET = 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
|
||||
Connector_AiAnthropic_API_SECRET = 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
|
||||
Connector_AiPerplexity_API_SECRET = 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
|
||||
Connector_AiTavily_API_SECRET = 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
|
||||
Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo=
|
||||
Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGc2tQc2lvMk1YZk01Q1dob1U5cnR0dG03WWE3WkpoOWo0SEpvLU9Rc2lCNDExdy1wZExaN3lpT2FEQkxnaHRmWmZUUUZUUUJmblZreGlpaFpOdnFhbzlEd1RsVVJtX216cmhxTm5BcTN2eUZ2T054cDE5bmlEamJ3NGR6MVpFQnA=
|
||||
Connector_AiMistral_API_SECRET = H55rGkR3ojIhcp4YMMlgUStgvz7Wym5c
|
||||
|
||||
Service_MSFT_TENANT_ID = common
|
||||
|
||||
|
|
@ -86,13 +86,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
|
|||
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
||||
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
||||
|
||||
# Manadate Pre-Processing Servers
|
||||
PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
|
||||
|
||||
# Preprocessor API Configuration
|
||||
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
|
||||
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
|
||||
|
||||
# Azure Communication Services Email Configuration
|
||||
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||
|
|
|
|||
92
env-gateway-prod.20260515_122326.backup
Normal file
92
env-gateway-prod.20260515_122326.backup
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
# Production Environment Configuration
|
||||
|
||||
# System Configuration
|
||||
APP_ENV_TYPE = prod
|
||||
APP_ENV_LABEL = Production Instance
|
||||
APP_KEY_SYSVAR = CONFIG_KEY
|
||||
APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3UnJRV0sySFlDblpXUlREclREaW1WbUt6bGtQYkdrNkZDOXNOLXFua1hqeFF2RHJnRXJ5VlVGV3hOZm41QjZOMlNTb0duYXNxZi05dXVTc2xDVkx0SVBFLUhncVo5T0VUZHE0UTZLWWw3ck09
|
||||
APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3QVpIY19DQVZSSzJmc2F0VEZvQlU1cHBhTEgxdHdnR3g4eW01aTEzYTUxc1gxTDR1RVVpSHRXYjV6N1BLZUdCUGlfOW1qdy0xSHFVRkNBcGZvaGlSSkZycXRuUllaWnpyVGRoeFg1dGEyNUk9
|
||||
APP_API_URL = https://gateway-prod.poweron.swiss
|
||||
APP_COOKIE_SECURE = true
|
||||
|
||||
# PostgreSQL DB Host
|
||||
DB_HOST=gateway-prod-server.postgres.database.azure.com
|
||||
DB_USER=gzxxmcrdhn
|
||||
DB_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3Y1JScGxjZG9TdUkwaHRzSHZhRHpNcDV3N1U2TnIwZ21PRG5TWFFfR1k0N3BiRk5WelVadjlnXzVSTDZ6NXFQNFpqbnJ1R3dNVkJocm1zVEgtSk0xaDRiR19zNDBEbVIzSk51ekNlQ0Z3b0U9
|
||||
DB_PORT=5432
|
||||
|
||||
# Security Configuration
|
||||
APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUlV5SVpaWXBNX1hpa2xPZGdMSWpnN2ZINHQxeGZnNHJweU5pZjlyYlY5Qm9zOUZEbl9wUEgtZHZXd1NhR19JSG9kbFU4MnFGQnllbFhRQVphRGQyNHlFVWR5VHQyUUpqN0stUmRuY2QyTi1oalczRHpLTEJqWURjZWs4YjZvT2U5YnFqcXEwdEpxV05fX05QMmtrPQ==
|
||||
APP_TOKEN_EXPIRY=300
|
||||
|
||||
# CORS Configuration
|
||||
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
||||
|
||||
# Logging configuration
|
||||
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||
APP_LOGGING_LOG_DIR = /home/site/wwwroot/
|
||||
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
|
||||
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
|
||||
APP_LOGGING_CONSOLE_ENABLED = True
|
||||
APP_LOGGING_FILE_ENABLED = True
|
||||
APP_LOGGING_ROTATION_SIZE = 10485760
|
||||
APP_LOGGING_BACKUP_COUNT = 5
|
||||
|
||||
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
||||
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySFR2NjBKM084QTNpeUlyUmM4R0N0SU1BZ2x4MmVTZTVHQkVzRE9GdmFkV041MzhudFhobjU0RWNnd3lqeXpKUXA5aGtNZkhtYU12QjBtX0NjemVmdEZBdC1TbXVBSXJTcF9vMlJXd0ZNRTRKRFBMUXNjTF85eTBxakR4RVNfYmU=
|
||||
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/login/callback
|
||||
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyNVU4cVRIZFdjS3l2S1RJVTVlc1ozQ1liZXZDX1VwdFZQUzFtS0N6UWYyeGxkNGNmY1hoaWxEUDBXVU5QR2t3Vi1ZV1A2QkxqbnpobzJwOXdzYTBZaFZYdnNkeDE1VVl0bm4weHFiLXdON2gtZzAwMTkxNWRoZldFM2djSkNHVS0=
|
||||
Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback
|
||||
|
||||
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyUmJleVpTOF9OaFV3NGVfcWVBX2oxSjUwMWRGOFZRWFRIN1FZRzZ6U3VQMlg5a21RY1drTHh3U254LW4zM1A1cXQ1TTFWYlNoek9hSHJIeE4tbm1wU1lKRXlKNU5HVWI4VGZwTVE0VnJGaV8wZmNvdkVrMjJGeXdmZ3UyNmVXN1E=
|
||||
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback
|
||||
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyY2pxMDh0U0RqWERianBMTTNtSUZPSzhKUzh4S0RTenR2MmxnRDlvQzJjbDVTczRWLUJtVnhxWTE2MmUxQjJia2xJcVUzVlFlUnpma040NFdHRzVNRUt0OXR0c2JkTkRmQ1RIYllXbXFFaExIQWNycFVHbUxHbmtYOVhOVUV2MFY=
|
||||
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback
|
||||
|
||||
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
||||
Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
|
||||
Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/clickup/auth/connect/callback
|
||||
|
||||
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
|
||||
|
||||
# Stripe Billing (both end with _SECRET for encryption script)
|
||||
STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
|
||||
STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
|
||||
STRIPE_API_VERSION = 2026-01-28.clover
|
||||
STRIPE_AUTOMATIC_TAX_ENABLED = false
|
||||
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah
|
||||
|
||||
|
||||
# AI configuration
|
||||
Connector_AiOpenai_API_SECRET = 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
|
||||
Connector_AiAnthropic_API_SECRET = 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
|
||||
Connector_AiPerplexity_API_SECRET = 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
|
||||
Connector_AiTavily_API_SECRET = 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
|
||||
Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo=
|
||||
Connector_AiMistral_API_SECRET = H55rGkR3ojIhcp4YMMlgUStgvz7Wym5c
|
||||
|
||||
Service_MSFT_TENANT_ID = common
|
||||
|
||||
# Google Cloud Speech Services configuration
|
||||
Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4NFQxaF9uN3h1cVB6dnZid1c1R1VfNDlSQ1NHMEVDZWtKanpMQ29CLXc1MXBqRm1hQ0YtWVhaejBMY1ZTOEFEVlpWQ3hrYkFza1E2RDNsYkdMMndNR0VGNTMwVDRGdURJY3hyaVFxVjEtSEYwNHJzeWM3WmlpZW9jU2E3NTgycEV2allqQ3dJRTNyRFAzaDJ6dklKeXpNRkJhYjFzUkptN2dpbkNpMklrcGxuZl9vTkt3T0JvNm1YTXd5UlkwZWptUXdWVFpnV2J4X3J2WUhIUlFkSElFVnlqMnlJRnNHTnlpMWs2R1dZc2ROWjNYZG85cndmd1E5cUZnVmZRYnVjTG43dXFmSWd2bGFfVWFWSmtpWkpndWNlSUNwcnFNU2NqZXFaV0xsY3l3SElLRkVHcHZGZERKV1ltcGhTS0dhTko1VTJLYzNoZjRkSGVEX3dTMWVVTmdDczV5cE1JQUdSbUJGUm11eFhTVjJHbkt0SzB4UG1Dc2xmbnp1Y041Y2RTeWRuWGdmQy1sTGx0MGtnM2VJQ3EyLXViRlNhTU9ybzZkR1N1bXE5SXhlZENWRFpWSGlYOWx4SUQ3UlR0ZEVxQkxNakRUVFRiUmFnbklOalphLUZkRFVVaXBRUk5NZW5PaUZydTFmQkNPSTdTVTNZd0plWXllNVFJdmN4MVcyTGlwMGFtVjBzOGRxR1FjbzhfYW5zdTB0ZEZBTTJhakltazh1dktNMUZsOUItdFdTb1pIaUxySllXNkdlY20zUS0wTnpFNTB2SU5acG1VcXhyaHBmME8takw3RDh5T043T2VGOV92TzNya2pWSlpYVjZDdXlZcjM3a0hPTlhkaW9oQmxqQlpGRFYyTTY4WmZmT3k4Tk1tdXRuSGdTUVpNT2NKenhXb05PdXBfSEdhMTNxNjdpNXlKUUI2YUgydFFPX1VvXzVJb0UxWTU2YVNiNDQ0QndZanhMMHR1cGdHWGhvcEg1QXEtSXZJdTdZUE12ZEVVWkF4QmtsQS1GYnY3SFIxSHlsOGVfcEpGS1A4QUVEQWNEOFZYYlljQ3ByTU03YU16Y0UzUnJQZEprSWNjT1ZXVEtDWi03Y3ZzRVdYUTlabXJISEo5THRHVXVuM0xqbzA4bGVlZVpOMk1QMmptb21tV0pTMlVoOXdWVU95UW1iQmttc2w1RG9mMWwxXzg1T2IxYUVmTUJEZkpUdTFDTzZ3RlBFeUFiX01iRTZNWkNaSG45TkFOM2pzbUJRZ2N0VFpoejJUTG1RODY3TzZpSzVkYUQzaEpfY2pSTkRzU0VpanlkdXVQQmJ2WU5peno4QWNLTDVxZTlhSHI3NnNiM0k0Y3JkQ0xaOU05bGtsQl8zQklvaktWSDZ4aVp2MHlYelJuUDJyTU9CZC1OZjJxNFc1dDcwSUlxaVh1LTMyWWFwU0IwUU9kOUFpMWpnOERtLTh1VmJiNGVwcXBMbU5fMjVZc0hFbmxQT2puSFd1ZGpyTkphLU5sVlBZWWxrWEZrWGJQWmVkN19tZFZfZ1l1V3pSWlA0V0ZxM2lrWnl2NU9WeTdCbDROSmhfeENKTFhMVXk1d195S2JMUFJoRXZjcVo4V2g0MTNKRnZhUE1wRkNPM3FZOGdVazJPeW5PSGpuZnFGTTdJMkRnam5rUlV6NFlqODlIelRYaEN5VjdJNnVwbllNODNCTFRHMWlXbmM1VlRxbXB3Wm9LRjVrQUpjYzRNMThUMWwwSVhBMUlyamtPZnE4R0o4bEdHay1zMjR5RDJkZ1lYRHZaNHVHU2otR3ZpN25LZlEySEU0UmdTNzJGVHNWQXMyb0dVMV9WUE13ODhZWUFaakxGOWZieGNXZkNYRnV5djEyWTZLcmdrajRBLU1rS1Z0VVRkOWlDMU9fMGVmYXFhZXJGMUhpNkdmb2hkbzZ1OWV6VlNmVzNISjVYTFh6SjJNdWR5MWZidE8yVEo2dnRrZXhMRXBPczUwTG13OGhNUVpIQm0zQmRKRnJ0Nl8wNW1Ob0dHRDVpU0NWREV3TkY2SjktdVBkMFU1ZXBmSFpHQ3FHNTRZdTJvaExpZVEtLTU4YTVyeFBpNDdEajZtWUc4c1dBeUJqQ3NIY1NLS0FIMUxGZzZxNFNkOG9ORGNHWWJCVnZuNnJVTEtoQi1mRTZyUl81ZWJJMi1KOGdERzBhNVRZeHRYUUlqY2JvMFlaNHhWMU9pWFFiZjdaLUhkaG15TTBPZVlkS2R5UVdENTI4QVFiY1RJV0ZNZnlpVWxfZmlnN1BXbGdrbjFGUkhzYl9qeHBxVVJacUE4bjZETENHVFpSamh0NVpOM2hMYTZjYzBuS3J0a3hhZGxSM1V5UHd2OTU3ZHY0Yy1xWDBkWUk0Ymp0MWVrS3YzSktKODhQZnY3QTZ1Wm1VZkZJbS1jamdreks1ZlhpQjFOUDFiOHJ2Nm9NcmdTdU5LQXV2RkZWZEFNZnVKUjVwcVY3dDdhQnpmRVJ6SmlvVXpDM0ZiYXh5bGE2X04tTE9qZ3BiTnN3TF9ZaFRxSUpjNjB1dXZBcy1TZHRHTjFjSUR3WUl4cE9VNzB5Rkk4U3Z1SVZYTl9sYXlZVk83UnFrMlVmcnBpam9lRUlCY19DdVJwOXl2TVVDV1pMRFZTZk9MY3Z1eXA0MnhGazc5YllQaWtOeTc4NjlOa2lGY05RRzY1cG9nbGpYelc4c3FicWxWRkg0YzRSamFlQ19zOU14YWJreU9pNDREZVJ3a0REMUxGTzF1XzI1bEF3VXVZRjlBeWFiLXJsOXgza3VZem1WckhWSnVNbDBNcldadU8xQ3RwOTl5NGgtVlR0QklCLWl5WkE4V1FlQTBCOVU1RE9sQlRrYUNZOGdfUmEwbEZvUTFGUEFWVmQ4V1FhOU9VNjZqemRpZm1sUDhZQTJ0YVBRbWZldkF5THV4QXpfdUtNZ0tlcGdSRFM3c0lDOTNQbnBxdmxYYWNpTmI3MW9BMlZIdTQ5RldudHpNQWQ5NDNPLVVTLXVVNzdHZXh4UXpZa3dVa2J4dTFDV1RkYjRnWXU2M3lJekRYWGNMcWU5OVh6U2xZWDh6MmpqcnpiOHlnMjA5S3RFQm1NZjNSM21adkVnTUpSYVhkTzNkNnJCTmljY0x1cl9kMkx3UHhySjZEdHREanZERzNEUTFlTkR0NWlBczAtdmFGTjdZNVpTMlkxV2czYW5RN2lqemg4eUViZDV6RjdKNXdFcUlvcVhoNkJ6eVJkR1pua1hnNzQwOEs2TXJYSlpGcW9qRDU2QjBOWFFtdXBJRkRKbmdZUF9ZSmRPVEtvUjVhLTV1NjdXQjRhS0duaEtJb2FrQnNjUTRvdFMxdkdTNk1NYlFHUFhhYTJ1eUN3WHN4UlJ4UjdrZjY0SzFGYWVFN1k0cGJnc1RjNmFUenR4NHljbVhablZSWHZmUVN3cXRHNjhsX1BSZWEzdTJUZFA0S2pTaU9YMnZIQ1ZPcGhWMFJqZkVEMWRMR1h3SnU0Z2FzZ3VGM3puNzdhVjhaQXNIWHFsbjB0TDVYSFdSNV9rdWhUUUhSZHBGYkJIVDB5SDdlMC13QTVnS0g5Qkg5RGNxSGJlelVndUhPcEQ0QkRKMTJTZUM1OXJhVm0zYjU0OVY2dk9MQVBheklIQXpVNW9Yc0ROVjEzaFZTWmVxYlBWMlNlSzladzJ6TmNuMG5FVVZkN1VZN1pfS2ZHa0lQcE80S24wSnQtVlJVV09OVWJ3M09YMkZpV2ktVF9ENHhKU2dfYUQ2aUVyamk0VHJHQmVfVHU4clpUTFoteW5aSWRPV1M0RDRMTms4NGRoYmJfVE82aUl2X3VieVJOdDhBQmRwdzdnRTVBNzZwaW93dUlZb3ZRYUtOeG9ULWxvNVp5a0haSjdkcUhRb3d6UGIxRUpCVkVYX2d6TkRqQVozUWxkNGFoc1FXYVd2YWNkME9Qclo0bjYxMFRWTy1nbnI5NTBJNzRMMDluUXRKYTFqQUN4d0d5aHVlamN3Tkk3NWJXeXR0TW9BeUg5Vnp4Q2RnZUY3b3AtMDlrNmlrSGR0eGRtbUdUd2lFRWg4MklEeWJHN2wwZEpVSXMxNDNOWjRFS0tPdWxhMmFCckhfRENIY184aEFDZXNrRDl2dHQtQW12UnRuQXJjaDJoTUpiYkNWQUtfRG9GMUZoNWM4UnBYZ29RWWs2NHcyUm5kdTF3Vk1GeFpiRUJLaVZ2UGFjbi1jV3lMV0N2ZDl4VERPN295X01NNG56ZjZkRzZoYUtmY1E5NlVXemx2SnVfb19iSXg0R2M3Mjd1a2JRPT0=
|
||||
|
||||
# Feature SyncDelta JIRA configuration
|
||||
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4d3Z4d2x6N1FhUktMU0RKbkxfY2pTQkRzXzJ6UXVEbDNCaFM3UHMtQVFGYzNmYWs4N0lMM1R2SFJuZTVFVmx6MGVEbXc5U3NOTnY1TWN0ZDNaamlHQWloalM3VldmREJNSHQ1TlVkSVFJMTVhQWVGSVRMTGw4UTBqNGlQZFVuaHp4WUlKemR5UnBXZlh0REJFLXJ4ejR3PT0=
|
||||
|
||||
# Teamsbot Browser Bot Service
|
||||
TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io
|
||||
|
||||
# Debug Configuration
|
||||
APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
|
||||
APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
|
||||
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
||||
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
||||
|
||||
# Azure Communication Services Email Configuration
|
||||
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||
|
|
@ -20,7 +20,7 @@ APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUl
|
|||
APP_TOKEN_EXPIRY=300
|
||||
|
||||
# CORS Configuration
|
||||
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
||||
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
||||
|
||||
# Logging configuration
|
||||
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||
|
|
@ -33,18 +33,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
|
|||
APP_LOGGING_BACKUP_COUNT = 5
|
||||
|
||||
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
||||
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
|
||||
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySFR2NjBKM084QTNpeUlyUmM4R0N0SU1BZ2x4MmVTZTVHQkVzRE9GdmFkV041MzhudFhobjU0RWNnd3lqeXpKUXA5aGtNZkhtYU12QjBtX0NjemVmdEZBdC1TbXVBSXJTcF9vMlJXd0ZNRTRKRFBMUXNjTF85eTBxakR4RVNfYmU=
|
||||
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/login/callback
|
||||
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
|
||||
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyNVU4cVRIZFdjS3l2S1RJVTVlc1ozQ1liZXZDX1VwdFZQUzFtS0N6UWYyeGxkNGNmY1hoaWxEUDBXVU5QR2t3Vi1ZV1A2QkxqbnpobzJwOXdzYTBZaFZYdnNkeDE1VVl0bm4weHFiLXdON2gtZzAwMTkxNWRoZldFM2djSkNHVS0=
|
||||
Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback
|
||||
|
||||
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
|
||||
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyUmJleVpTOF9OaFV3NGVfcWVBX2oxSjUwMWRGOFZRWFRIN1FZRzZ6U3VQMlg5a21RY1drTHh3U254LW4zM1A1cXQ1TTFWYlNoek9hSHJIeE4tbm1wU1lKRXlKNU5HVWI4VGZwTVE0VnJGaV8wZmNvdkVrMjJGeXdmZ3UyNmVXN1E=
|
||||
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback
|
||||
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
|
||||
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyY2pxMDh0U0RqWERianBMTTNtSUZPSzhKUzh4S0RTenR2MmxnRDlvQzJjbDVTczRWLUJtVnhxWTE2MmUxQjJia2xJcVUzVlFlUnpma040NFdHRzVNRUt0OXR0c2JkTkRmQ1RIYllXbXFFaExIQWNycFVHbUxHbmtYOVhOVUV2MFY=
|
||||
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback
|
||||
|
||||
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||
|
|
@ -63,12 +63,12 @@ STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah
|
|||
|
||||
|
||||
# AI configuration
|
||||
Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3UxLUdwY2hhdGYxRGp4VFJqYXZIcmkxM1ZyOWV2M0Z4MHdFNkVYQ0ROb1d6LUZFUEdvMHhLMEtXYVBCRzM5TlYyY3ROYWtJRk41cDZxd0tYYi00MjVqMTh4QVcyTXl0bmVocEFHbXQwREpwNi1vODdBNmwzazE5bkpNelE2WXpvblIzWlQwbGdEelI2WXFqT1RibXVHcjNWbVhwYzBOM25XTzNmTDAwUjRvYk4yNjIyZHc5c2RSZzREQUFCdUwyb0ZuOXN1dzI2c2FKdXI4NGxEbk92czZWamJXU3ZSbUlLejZjRklRRk4tLV9aVUFZekI2bTU4OHYxNTUybDg3RVo0ZTh6dXNKRW5GNXVackZvcm9laGI0X3R6V3M9
|
||||
Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09
|
||||
Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6NG5CTm9QOFZRV1BIVC0tV2RKTGtCQWFOUXlpRnhEdjN1U2x3VUdDamtIZV9CQzQ5ZmRmcUh3ZUVUa0NxbGhlenVVdWtaYjdpcnhvUlNFLXZfOWh2dWFZai0xUGU5cWpuYmpnRVRWakh0RVNUUTFyX0w5V0NXVWFrQlZuOTd5TkI0eVRoQ0ZBSm9HYUlYamoyY1FCMmlBPT0=
|
||||
Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg=
|
||||
Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmcDVyOGNwbVkwWFJCWmFkZS12RkhLaFhLSF9kWWpEZ0d0NDBqV2FnWlpnYmpSckdLSGpjbmh6aHJXVUZxMElwY1MzcVg1MzBOdURUZXhnZ3pqNEZyQ1JWMVA0YmxhNWJlenNpa1A3TjZkYVZSclFONjU4MF9jMTJaS2d0ZDNnXzJKSmhSRVhyckJpTUlDa0RRWHN5cWVkOUJMTUp5aFRHcDV5Z1A1aWhSUnFNOHBJTDFPdzAzcVJ3bmhueTBmVkJDZTdJakhMOEFRdHBvWFduUzdRV2dNQVdpaXdFSVlHMDJ4NnZRUTBZZ3pOakxPLUdjNlNNQnJQMXpfSWR3NmFodDdDbkEtVmRjdVBhMjRWT1NOV1BYbU15VHRSWFR0UVBBMWtKRTRkS25KMFk9
|
||||
Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmMGhla2xoZWowNjJzc1EzMWJYRXRTcGdWWWctU3hhcXNUbVVaOTJiRFJuSGM5S3ZGZ0M4RFotTGxOQ3loa3l4aVZ2T3FsRVVMck83RTlURFNOdWxHb0JfNVEtRGJ4X193dV9Bd0EtNlVGV0h4SWk2bldfWThxNVVnOGctSkNFR3FXa2pmY2ROcV9EVE1oMndFY1d4MjdLeWtUd0VEeW5CTlFwX2FOcW9DaWVXYWVfMy1ZUnFFUEZnanFOUGZILUpUZU8yUHNSODE3OXBSWVJFNlpBdTJtUT09
|
||||
Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmRm9saTZuR1VSZV9pQllKRGFURmN4cDNNanpsVFM3TVItdDNtNWdoWC1zVllrLUVPeGZDRXF1S3Rxd0tVUGV6bl9Ob0JMa3U5ZUNlRjRVQ1dRWXZDTXlsRU13b2o2R1paalU4RXB6SWxYVEJPa2NmaDRFdzExRXU1X2VnNDlhQzQ3cTE1RlJrSlB5elRMZ2w3NmxlV2l3PT0=
|
||||
Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmZGdyWkJibS03akJtSjF0U2doYXZVVDM1em1kY2ZpRGJISmVCUURfVkw3c2Z3OEFQd1h1SzE0cTExSUtVejRPY3VmWF9XT1ZyS3RxRmVRYktJeDR6OWhYaEM0bkNLVEI1cl9VZ1VFOG9IRTFWc2FUemh0UmNHTGprQ0FweThlSGpSSDAyZmw2YmR0OFREQWxpNERHWm1nPT0=
|
||||
Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo=
|
||||
Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGc2tQc2lvMk1YZk01Q1dob1U5cnR0dG03WWE3WkpoOWo0SEpvLU9Rc2lCNDExdy1wZExaN3lpT2FEQkxnaHRmWmZUUUZUUUJmblZreGlpaFpOdnFhbzlEd1RsVVJtX216cmhxTm5BcTN2eUZ2T054cDE5bmlEamJ3NGR6MVpFQnA=
|
||||
Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmcEVpVmFuWkk4eTJTc3VtRFg4cE9QU3R5NVg0eVFIR29RSVhmXy1rR0pPTm4wbFhIVFFpckx5UmhvSGxqSWV4S0xoTzdESE55R2k5eHowZEprdGhrbEU3eG5JWGpaNWJIdDRqT05zZGNCQVpXd2xTek1teHRBS3NRU2FuUTlSQ2Q=
|
||||
|
||||
Service_MSFT_TENANT_ID = common
|
||||
|
||||
|
|
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
|
|||
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
||||
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
||||
|
||||
# Manadate Pre-Processing Servers
|
||||
PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
|
||||
|
||||
# Preprocessor API Configuration
|
||||
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
|
||||
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
|
||||
|
||||
# Azure Communication Services Email Configuration
|
||||
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||
|
|
|
|||
|
|
@ -319,25 +319,24 @@ class AiOpenai(BaseConnectorAi):
|
|||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00013
|
||||
),
|
||||
AiModel(
|
||||
name="dall-e-3",
|
||||
displayName="OpenAI DALL-E 3",
|
||||
name="gpt-image-1",
|
||||
displayName="OpenAI GPT Image",
|
||||
connectorType="openai",
|
||||
apiUrl="https://api.openai.com/v1/images/generations",
|
||||
temperature=0.0, # Image generation doesn't use temperature
|
||||
maxTokens=0, # Image generation doesn't use tokens
|
||||
temperature=0.0,
|
||||
maxTokens=0,
|
||||
contextLength=0,
|
||||
costPer1kTokensInput=0.04,
|
||||
costPer1kTokensOutput=0.0,
|
||||
speedRating=5, # Slow for image generation
|
||||
qualityRating=9, # High quality art generation
|
||||
# capabilities removed (not used in business logic)
|
||||
speedRating=5,
|
||||
qualityRating=9,
|
||||
functionCall=self.generateImage,
|
||||
priority=PriorityEnum.QUALITY,
|
||||
processingMode=ProcessingModeEnum.DETAILED,
|
||||
operationTypes=createOperationTypeRatings(
|
||||
(OperationTypeEnum.IMAGE_GENERATE, 10)
|
||||
),
|
||||
version="dall-e-3",
|
||||
version="gpt-image-1",
|
||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.04
|
||||
)
|
||||
]
|
||||
|
|
@ -653,105 +652,82 @@ class AiOpenai(BaseConnectorAi):
|
|||
)
|
||||
|
||||
async def generateImage(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||
"""
|
||||
Generate an image using DALL-E 3 using standardized pattern.
|
||||
|
||||
Args:
|
||||
modelCall: AiModelCall with messages and generation options
|
||||
|
||||
Returns:
|
||||
AiModelResponse with generated image data
|
||||
"""
|
||||
"""Generate an image using GPT Image model (gpt-image-1)."""
|
||||
try:
|
||||
# Extract parameters from modelCall
|
||||
messages = modelCall.messages
|
||||
model = modelCall.model
|
||||
options = modelCall.options
|
||||
|
||||
# Get prompt from messages
|
||||
promptContent = messages[0]["content"] if messages else ""
|
||||
|
||||
# Parse prompt using AiCallPromptImage model
|
||||
import json
|
||||
|
||||
|
||||
messages = modelCall.messages
|
||||
options = modelCall.options
|
||||
promptContent = messages[0]["content"] if messages else ""
|
||||
|
||||
try:
|
||||
# Try to parse as JSON
|
||||
promptData = json.loads(promptContent)
|
||||
promptModel = AiCallPromptImage(**promptData)
|
||||
except:
|
||||
# If not JSON, use plain text prompt
|
||||
except Exception:
|
||||
promptModel = AiCallPromptImage(
|
||||
prompt=promptContent,
|
||||
size=options.size if options and hasattr(options, 'size') else "1024x1024",
|
||||
quality=options.quality if options and hasattr(options, 'quality') else "standard",
|
||||
style=options.style if options and hasattr(options, 'style') else "vivid"
|
||||
size=options.size if options and hasattr(options, "size") else "1024x1024",
|
||||
quality=options.quality if options and hasattr(options, "quality") else "auto",
|
||||
)
|
||||
|
||||
# Extract parameters from Pydantic model
|
||||
|
||||
prompt = promptModel.prompt
|
||||
size = promptModel.size or "1024x1024"
|
||||
quality = promptModel.quality or "standard"
|
||||
style = promptModel.style or "vivid"
|
||||
|
||||
rawQuality = promptModel.quality or "auto"
|
||||
quality = {"standard": "auto", "hd": "high"}.get(rawQuality, rawQuality)
|
||||
|
||||
logger.debug(f"Starting image generation with prompt: '{prompt[:100]}...'")
|
||||
|
||||
# DALL-E 3 API endpoint
|
||||
dalle_url = "https://api.openai.com/v1/images/generations"
|
||||
|
||||
|
||||
payload = {
|
||||
"model": "dall-e-3",
|
||||
"model": "gpt-image-1",
|
||||
"prompt": prompt,
|
||||
"size": size,
|
||||
"quality": quality,
|
||||
"style": style,
|
||||
"n": 1,
|
||||
"response_format": "b64_json" # Get base64 data directly instead of URLs
|
||||
}
|
||||
|
||||
# Use existing httpClient to benefit from connection pooling
|
||||
# This avoids TLS connection issues that can occur with fresh clients
|
||||
|
||||
response = await self.httpClient.post(
|
||||
dalle_url,
|
||||
json=payload
|
||||
"https://api.openai.com/v1/images/generations",
|
||||
json=payload,
|
||||
)
|
||||
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"DALL-E API error: {response.status_code} - {response.text}")
|
||||
logger.error(f"Image generation API error: {response.status_code} - {response.text}")
|
||||
return AiModelResponse(
|
||||
content="",
|
||||
success=False,
|
||||
error=f"DALL-E API error: {response.status_code} - {response.text}"
|
||||
error=f"Image generation API error: {response.status_code} - {response.text}",
|
||||
)
|
||||
|
||||
|
||||
responseJson = response.json()
|
||||
|
||||
|
||||
if "data" in responseJson and len(responseJson["data"]) > 0:
|
||||
image_data = responseJson["data"][0]["b64_json"]
|
||||
|
||||
logger.info(f"Successfully generated image: {len(image_data)} characters")
|
||||
imageData = responseJson["data"][0].get("b64_json", "")
|
||||
if not imageData:
|
||||
imageData = responseJson["data"][0].get("url", "")
|
||||
|
||||
logger.info(f"Successfully generated image: {len(imageData)} characters")
|
||||
return AiModelResponse(
|
||||
content=image_data,
|
||||
content=imageData,
|
||||
success=True,
|
||||
modelId="dall-e-3",
|
||||
modelId="gpt-image-1",
|
||||
metadata={
|
||||
"size": size,
|
||||
"quality": quality,
|
||||
"style": style,
|
||||
"response_id": responseJson.get("id", "")
|
||||
}
|
||||
"response_id": responseJson.get("id", ""),
|
||||
},
|
||||
)
|
||||
else:
|
||||
logger.error("No image data in DALL-E response")
|
||||
logger.error("No image data in generation response")
|
||||
return AiModelResponse(
|
||||
content="",
|
||||
success=False,
|
||||
error="No image data in DALL-E response"
|
||||
error="No image data in generation response",
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during image generation: {str(e)}", exc_info=True)
|
||||
return AiModelResponse(
|
||||
content="",
|
||||
success=False,
|
||||
error=f"Error during image generation: {str(e)}"
|
||||
error=f"Error during image generation: {str(e)}",
|
||||
)
|
||||
|
|
@ -311,7 +311,10 @@ class DatabaseConnector:
|
|||
# Establish connection to the database
|
||||
self._connect()
|
||||
|
||||
logger.info("PostgreSQL database system initialized successfully")
|
||||
logger.debug(
|
||||
"PostgreSQL database system initialized (db=%s, host=%s, port=%s)",
|
||||
self.dbDatabase, self.dbHost, self.dbPort,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"FATAL ERROR: Database system initialization failed: {e}")
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -19,6 +19,30 @@ from modules.shared.voiceCatalog import getDefaultVoice as _catalogDefaultVoice
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _buildPrimarySttRecognitionFields(
|
||||
*,
|
||||
model: str,
|
||||
lightweight: bool,
|
||||
) -> Dict[str, Any]:
|
||||
"""Shared fields for batch + streaming primary RecognitionConfig."""
|
||||
base: Dict[str, Any] = {
|
||||
"enable_automatic_punctuation": True,
|
||||
"model": model,
|
||||
}
|
||||
if lightweight:
|
||||
base["enable_word_time_offsets"] = False
|
||||
base["enable_word_confidence"] = False
|
||||
base["max_alternatives"] = 1
|
||||
base["use_enhanced"] = False
|
||||
else:
|
||||
base["enable_word_time_offsets"] = True
|
||||
base["enable_word_confidence"] = True
|
||||
base["max_alternatives"] = 3
|
||||
base["use_enhanced"] = True
|
||||
return base
|
||||
|
||||
|
||||
# Gemini-TTS speaker IDs from voices.list use short names (e.g. "Kore") and require
|
||||
# SynthesisInput.prompt + VoiceSelectionParams.model_name (google-cloud-texttospeech >= 2.24.0).
|
||||
_GEMINI_TTS_DEFAULT_MODEL = "gemini-2.5-flash-tts"
|
||||
|
|
@ -73,7 +97,10 @@ class ConnectorGoogleSpeech:
|
|||
sampleRate: int = None, channels: int = None,
|
||||
skipFallbacks: bool = False,
|
||||
phraseHints: Optional[list] = None,
|
||||
alternativeLanguages: Optional[list] = None) -> Dict:
|
||||
alternativeLanguages: Optional[list] = None,
|
||||
model: str = "latest_long",
|
||||
lightweight: bool = False,
|
||||
audioFormat: Optional[str] = None) -> Dict:
|
||||
"""
|
||||
Convert speech to text using Google Cloud Speech-to-Text API.
|
||||
|
||||
|
|
@ -82,6 +109,9 @@ class ConnectorGoogleSpeech:
|
|||
language: Language code (e.g., 'de-DE', 'en-US')
|
||||
sample_rate: Audio sample rate (auto-detected if None)
|
||||
channels: Number of audio channels (auto-detected if None)
|
||||
model: Google recognition model (e.g. latest_long, latest_short)
|
||||
lightweight: If True, omit word timings/confidence, single alternative, no enhanced model
|
||||
audioFormat: If set (webm_opus, linear16, mp3, flac, wav), skip auto-detection
|
||||
|
||||
Returns:
|
||||
Dict containing transcribed text, confidence, and metadata
|
||||
|
|
@ -92,8 +122,24 @@ class ConnectorGoogleSpeech:
|
|||
logger.warning(f"Invalid sampleRate={sampleRate}, treating as unknown for auto-detection")
|
||||
sampleRate = None
|
||||
|
||||
# Auto-detect audio format if not provided
|
||||
if sampleRate is None or channels is None:
|
||||
explicitFormat = (audioFormat or "").strip().lower() or None
|
||||
if explicitFormat:
|
||||
if channels is None:
|
||||
channels = 1
|
||||
if sampleRate is None:
|
||||
if explicitFormat == "webm_opus":
|
||||
sampleRate = 48000
|
||||
elif explicitFormat == "linear16":
|
||||
sampleRate = 16000
|
||||
elif explicitFormat in ("mp3", "flac"):
|
||||
sampleRate = 44100
|
||||
elif explicitFormat == "wav":
|
||||
sampleRate = 16000
|
||||
else:
|
||||
sampleRate = 16000
|
||||
audioFormat = explicitFormat
|
||||
logger.info(f"STT explicit format: {audioFormat}, {sampleRate}Hz, {channels}ch")
|
||||
elif sampleRate is None or channels is None:
|
||||
validation = self.validateAudioFormat(audioContent)
|
||||
if not validation["valid"]:
|
||||
return {
|
||||
|
|
@ -156,12 +202,7 @@ class ConnectorGoogleSpeech:
|
|||
"encoding": encoding,
|
||||
"audio_channel_count": channels,
|
||||
"language_code": language,
|
||||
"enable_automatic_punctuation": True,
|
||||
"model": "latest_long",
|
||||
"enable_word_time_offsets": True,
|
||||
"enable_word_confidence": True,
|
||||
"max_alternatives": 3,
|
||||
"use_enhanced": True,
|
||||
**_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
|
||||
}
|
||||
|
||||
if phraseHints:
|
||||
|
|
@ -205,8 +246,7 @@ class ConnectorGoogleSpeech:
|
|||
sample_rate_hertz=16000,
|
||||
audio_channel_count=1,
|
||||
language_code=language,
|
||||
enable_automatic_punctuation=True,
|
||||
model="latest_long"
|
||||
**_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
|
||||
)
|
||||
try:
|
||||
response = await asyncio.to_thread(
|
||||
|
|
@ -343,7 +383,7 @@ class ConnectorGoogleSpeech:
|
|||
"error": "No recognition results (silence or unclear audio)"
|
||||
}
|
||||
|
||||
models = ["latest_long", "phone_call", "latest_short"]
|
||||
models = list(dict.fromkeys([model, "latest_long", "phone_call", "latest_short"]))
|
||||
|
||||
for fallback_config in fallback_configs:
|
||||
for model in models:
|
||||
|
|
@ -419,6 +459,9 @@ class ConnectorGoogleSpeech:
|
|||
audioQueue: asyncio.Queue,
|
||||
language: str = "de-DE",
|
||||
phraseHints: Optional[list] = None,
|
||||
model: str = "latest_long",
|
||||
lightweight: bool = False,
|
||||
singleUtterance: bool = False,
|
||||
) -> AsyncGenerator[Dict[str, Any], None]:
|
||||
"""
|
||||
Stream audio chunks to Google Cloud Speech-to-Text Streaming API.
|
||||
|
|
@ -429,9 +472,13 @@ class ConnectorGoogleSpeech:
|
|||
Send (b"", True) to signal end of stream.
|
||||
language: Language code
|
||||
phraseHints: Optional boost phrases
|
||||
model: Google recognition model (e.g. latest_long, latest_short)
|
||||
lightweight: If True, use non-enhanced primary config (lower latency)
|
||||
singleUtterance: If True, end stream after first utterance (client should reconnect)
|
||||
|
||||
Yields:
|
||||
Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec
|
||||
Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec;
|
||||
optionally endOfSingleUtterance, reconnectRequired
|
||||
"""
|
||||
STREAM_LIMIT_SEC = 290
|
||||
streamStartTs = time.time()
|
||||
|
|
@ -442,9 +489,7 @@ class ConnectorGoogleSpeech:
|
|||
"sample_rate_hertz": 48000,
|
||||
"audio_channel_count": 1,
|
||||
"language_code": language,
|
||||
"enable_automatic_punctuation": True,
|
||||
"model": "latest_long",
|
||||
"use_enhanced": True,
|
||||
**_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
|
||||
}
|
||||
if phraseHints:
|
||||
configParams["speech_contexts"] = [speech.SpeechContext(phrases=phraseHints, boost=15.0)]
|
||||
|
|
@ -453,7 +498,7 @@ class ConnectorGoogleSpeech:
|
|||
streamingConfig = speech.StreamingRecognitionConfig(
|
||||
config=recognitionConfig,
|
||||
interim_results=True,
|
||||
single_utterance=False,
|
||||
single_utterance=singleUtterance,
|
||||
)
|
||||
|
||||
import queue as threadQueue
|
||||
|
|
@ -490,7 +535,22 @@ class ConnectorGoogleSpeech:
|
|||
)
|
||||
for response in responseStream:
|
||||
elapsed = time.time() - streamStartTs
|
||||
estimatedDurationSec = totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0
|
||||
|
||||
durationFromResults = 0.0
|
||||
for result in response.results:
|
||||
rt = getattr(result, "result_end_time", None)
|
||||
if rt is None:
|
||||
continue
|
||||
if hasattr(rt, "total_seconds"):
|
||||
durationFromResults = max(durationFromResults, float(rt.total_seconds()))
|
||||
else:
|
||||
durationFromResults = max(
|
||||
durationFromResults,
|
||||
float(getattr(rt, "seconds", 0)) + float(getattr(rt, "nanos", 0)) * 1e-9,
|
||||
)
|
||||
estimatedDurationSec = durationFromResults if durationFromResults > 0 else (
|
||||
totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0.0
|
||||
)
|
||||
|
||||
finalTexts = []
|
||||
interimTexts = []
|
||||
|
|
@ -524,6 +584,13 @@ class ConnectorGoogleSpeech:
|
|||
"stabilityScore": 0.0,
|
||||
"audioDurationSec": estimatedDurationSec,
|
||||
}), loop)
|
||||
|
||||
speechEvt = getattr(response, "speech_event_type", None)
|
||||
if speechEvt and "END_OF_SINGLE_UTTERANCE" in str(speechEvt):
|
||||
asyncio.run_coroutine_threadsafe(resultOutQ.put({
|
||||
"endOfSingleUtterance": True,
|
||||
"audioDurationSec": estimatedDurationSec,
|
||||
}), loop)
|
||||
if elapsed >= STREAM_LIMIT_SEC:
|
||||
logger.info("Streaming STT approaching 5-min limit, client should reconnect")
|
||||
asyncio.run_coroutine_threadsafe(resultOutQ.put({
|
||||
|
|
|
|||
|
|
@ -245,11 +245,10 @@ class AiCallPromptWebCrawl(BaseModel):
|
|||
|
||||
class AiCallPromptImage(BaseModel):
|
||||
"""Structured prompt format for image generation."""
|
||||
|
||||
|
||||
prompt: str = Field(description="Text description of the image to generate")
|
||||
size: Optional[str] = Field(default="1024x1024", description="Image size (1024x1024, 1792x1024, 1024x1792)")
|
||||
quality: Optional[str] = Field(default="standard", description="Image quality (standard, hd)")
|
||||
style: Optional[str] = Field(default="vivid", description="Image style (vivid, natural)")
|
||||
size: Optional[str] = Field(default="1024x1024", description="Image size (1024x1024, 1536x1024, 1024x1536)")
|
||||
quality: Optional[str] = Field(default="auto", description="Image quality (auto, high, medium, low)")
|
||||
|
||||
|
||||
class AiProcessParameters(BaseModel):
|
||||
|
|
|
|||
|
|
@ -62,15 +62,15 @@ class DataSource(PowerOnModel):
|
|||
description="Owner user ID",
|
||||
json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "UserInDB", "labelField": "username"}},
|
||||
)
|
||||
autoSync: bool = Field(
|
||||
ragIndexEnabled: bool = Field(
|
||||
default=False,
|
||||
description="Automatically sync on schedule",
|
||||
json_schema_extra={"label": "Auto-Sync"},
|
||||
description="When true this tree element is indexed into the RAG knowledge store",
|
||||
json_schema_extra={"label": "Im RAG indexieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
lastSynced: Optional[float] = Field(
|
||||
lastIndexed: Optional[float] = Field(
|
||||
default=None,
|
||||
description="Last sync timestamp",
|
||||
json_schema_extra={"label": "Letzter Sync", "frontend_type": "timestamp"},
|
||||
description="Timestamp of last successful RAG indexing run",
|
||||
json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"},
|
||||
)
|
||||
scope: str = Field(
|
||||
default="personal",
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ class PaginationParams(BaseModel):
|
|||
Omit or set to None for the default (ungrouped) view.
|
||||
"""
|
||||
page: int = Field(ge=1, description="Current page number (1-based)")
|
||||
pageSize: int = Field(ge=1, le=1000, description="Number of items per page")
|
||||
pageSize: int = Field(ge=1, le=10000, description="Number of items per page")
|
||||
sort: List[SortField] = Field(default_factory=list, description="List of sort fields in priority order")
|
||||
filters: Optional[Dict[str, Any]] = Field(
|
||||
default=None,
|
||||
|
|
|
|||
|
|
@ -484,10 +484,10 @@ class UserConnection(PowerOnModel):
|
|||
default=None,
|
||||
description=(
|
||||
"Per-connection knowledge ingestion preferences. schemaVersion=1 keys: "
|
||||
"neutralizeBeforeEmbed (bool), mailContentDepth (metadata|snippet|full), "
|
||||
"mailIndexAttachments (bool), filesIndexBinaries (bool), mimeAllowlist (list[str]), "
|
||||
"clickupScope (titles|title_description|with_comments), "
|
||||
"surfaceToggles (dict per authority), maxAgeDays (int)."
|
||||
"mailContentDepth (metadata|snippet|full), mailIndexAttachments (bool), "
|
||||
"filesIndexBinaries (bool), clickupScope (titles|title_description|with_comments), "
|
||||
"clickupIndexAttachments (bool), maxAgeDays (int). "
|
||||
"Neutralization is controlled per DataSource.neutralize (not here)."
|
||||
),
|
||||
json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False, "label": "Wissenspräferenzen"},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1080,6 +1080,8 @@ class CommcoachService:
|
|||
audioContent=audioContent,
|
||||
language=language,
|
||||
skipFallbacks=True,
|
||||
model="latest_short",
|
||||
lightweight=True,
|
||||
)
|
||||
|
||||
transcribedText = ""
|
||||
|
|
|
|||
|
|
@ -3,9 +3,147 @@
|
|||
|
||||
from modules.shared.i18nRegistry import t
|
||||
|
||||
LOOP_DONE_DATA_PICK_OPTIONS = [
|
||||
{
|
||||
"path": ["bodyResults"],
|
||||
"pickerLabel": t("Alle Schleifen-Ergebnisse"),
|
||||
"detail": t(
|
||||
"Ausgabe des letzten Schrittes im Schleifen-Rumpf pro Iteration als Liste, "
|
||||
"ein Eintrag pro Durchlauf. Ideal als Eingabe fuer Kontext zusammenfuehren."
|
||||
),
|
||||
"recommended": True,
|
||||
"type": "List[Any]",
|
||||
},
|
||||
{
|
||||
"path": ["items"],
|
||||
"pickerLabel": t("Iterierte Elemente"),
|
||||
"detail": t(
|
||||
"Liste der Schleifen-Elemente nach gewähltem Iterationsmodus (Kopie der Eingabeliste, gefiltert)."
|
||||
),
|
||||
"recommended": False,
|
||||
"type": "List[Any]",
|
||||
},
|
||||
{
|
||||
"path": ["count"],
|
||||
"pickerLabel": t("Anzahl Durchläufe"),
|
||||
"detail": t("Wie viele Iterationen die Schleife ausgeführt hat."),
|
||||
"recommended": False,
|
||||
"type": "int",
|
||||
},
|
||||
]
|
||||
|
||||
LOOP_ITEM_DATA_PICK_OPTIONS = [
|
||||
{
|
||||
"path": ["currentItem"],
|
||||
"pickerLabel": t("Aktuelles Element"),
|
||||
"detail": t("Das aktuelle Iterationselement."),
|
||||
"recommended": True,
|
||||
"type": "Any",
|
||||
},
|
||||
{
|
||||
"path": ["currentIndex"],
|
||||
"pickerLabel": t("Aktueller Index"),
|
||||
"detail": t("0-basierter Index der aktuellen Iteration."),
|
||||
"recommended": False,
|
||||
"type": "int",
|
||||
},
|
||||
{
|
||||
"path": ["items"],
|
||||
"pickerLabel": t("Alle Elemente"),
|
||||
"detail": t("Die vollständige Quellliste."),
|
||||
"recommended": False,
|
||||
"type": "List[Any]",
|
||||
},
|
||||
{
|
||||
"path": ["count"],
|
||||
"pickerLabel": t("Gesamtanzahl"),
|
||||
"detail": t("Anzahl der Elemente in der Schleife."),
|
||||
"recommended": False,
|
||||
"type": "int",
|
||||
},
|
||||
]
|
||||
|
||||
# Base paths when ``ActionResult.data`` uses envelope + ``_meta`` (context.extractContent-style clarity).
|
||||
CONTEXT_ENVELOPE_DATA_PICK_OPTIONS = [
|
||||
{
|
||||
"path": ["data"],
|
||||
"pickerLabel": t("Vollständiges data-Objekt"),
|
||||
"detail": t(
|
||||
"Versionierter Kontext-Umschlag: ``schemaVersion``, ``kind``, Nutzdatenfelder, ``_meta``."
|
||||
),
|
||||
"recommended": True,
|
||||
"type": "Dict",
|
||||
},
|
||||
{
|
||||
"path": ["data", "_meta"],
|
||||
"pickerLabel": t("Technische Metadaten (_meta)"),
|
||||
"detail": t(
|
||||
"`actionType`, Payload-Schema-Version; bei Transform/Merge keine großen Payloads."
|
||||
),
|
||||
"recommended": False,
|
||||
"type": "Any",
|
||||
},
|
||||
]
|
||||
|
||||
MERGE_RESULT_DATA_PICK_OPTIONS = [
|
||||
{
|
||||
"path": ["merged"],
|
||||
"pickerLabel": t("Zusammengeführt"),
|
||||
"detail": t("Zusammengeführtes Ergebnis (je nach Modus)."),
|
||||
"recommended": True,
|
||||
"type": "Dict",
|
||||
},
|
||||
{
|
||||
"path": ["first"],
|
||||
"pickerLabel": t("Erster Zweig"),
|
||||
"detail": t("Daten vom ersten verbundenen Eingang (Modus „first“)."),
|
||||
"recommended": False,
|
||||
"type": "Any",
|
||||
},
|
||||
{
|
||||
"path": ["inputs"],
|
||||
"pickerLabel": t("Alle Eingänge"),
|
||||
"detail": t("Dict der Eingabeobjekte nach Port-Index."),
|
||||
"recommended": False,
|
||||
"type": "Dict[int,Any]",
|
||||
},
|
||||
]
|
||||
|
||||
# Extended picker for ``context.mergeContext`` (ActionResult + ``surfaceDataAsTopLevel``): same
|
||||
# merge keys as ``flow.merge`` plus ``count`` from the action payload.
|
||||
CONTEXT_MERGE_ACTION_RESULT_DATA_PICK_OPTIONS = [
|
||||
*CONTEXT_ENVELOPE_DATA_PICK_OPTIONS,
|
||||
*MERGE_RESULT_DATA_PICK_OPTIONS,
|
||||
{
|
||||
"path": ["count"],
|
||||
"pickerLabel": t("Anzahl Einträge"),
|
||||
"detail": t("Wie viele Einträge zusammengeführt wurden."),
|
||||
"recommended": False,
|
||||
"type": "int",
|
||||
},
|
||||
]
|
||||
|
||||
_CONTEXT_BRANCH_DATA_PICK_OPTIONS = [
|
||||
{
|
||||
"path": ["items"],
|
||||
"pickerLabel": t("Gefilterte Elemente"),
|
||||
"detail": t("Empfohlen für Schleifen: je Eintrag ein Durchlauf (z. B. Bild-Slots)."),
|
||||
"recommended": True,
|
||||
"type": "List[Any]",
|
||||
},
|
||||
{
|
||||
"path": ["data"],
|
||||
"pickerLabel": t("Kontext (data)"),
|
||||
"detail": t("Gefilterter Presentation-Umschlag oder unveränderter Eingang auf dem Sonst-Zweig."),
|
||||
"recommended": False,
|
||||
"type": "Dict",
|
||||
},
|
||||
]
|
||||
|
||||
# Ports, die typische Schritt-Ausgaben durchreichen (nicht nur leerer Transit).
|
||||
_FLOW_INPUT_SCHEMAS = [
|
||||
"Transit",
|
||||
"ContextBranch",
|
||||
"FormPayload",
|
||||
"AiResult",
|
||||
"TextResult",
|
||||
|
|
@ -31,12 +169,23 @@ FLOW_NODES = [
|
|||
"Die Daten vom Eingangskanal werden an den gewählten Ausgang durchgereicht."
|
||||
),
|
||||
"parameters": [
|
||||
{
|
||||
"name": "Item",
|
||||
"type": "Any",
|
||||
"required": True,
|
||||
"frontendType": "dataRef",
|
||||
"description": t("Item, das auf die Bedingung getestet wird"),
|
||||
},
|
||||
{
|
||||
"name": "condition",
|
||||
"type": "json",
|
||||
"required": True,
|
||||
"frontendType": "condition",
|
||||
"description": t("Bedingung: Feld aus einem vorherigen Schritt und Vergleich"),
|
||||
"frontendOptions": {
|
||||
"dependsOn": "Item",
|
||||
"operatorCatalog": "condition",
|
||||
},
|
||||
"description": t("Bedingung auf das gewählte Item"),
|
||||
},
|
||||
],
|
||||
"inputs": 1,
|
||||
|
|
@ -52,8 +201,10 @@ FLOW_NODES = [
|
|||
"category": "flow",
|
||||
"label": t("Switch"),
|
||||
"description": t(
|
||||
"Mehrere Zweige nach einem Wert aus einem vorherigen Schritt (Data Picker). "
|
||||
"Definiere Fälle mit Vergleichsoperator; der Eingang wird an den ersten passenden Zweig durchgereicht."
|
||||
"Mehrere Zweige nach einem Wert aus einem vorherigen Schritt. "
|
||||
"Jeder Fall hat einen eigenen Ausgang mit passend gefiltertem Inhalt in ``items``; "
|
||||
"mehrere Kontext-Filter können gleichzeitig zutreffen (z. B. Text und Bilder). "
|
||||
"Der letzte Ausgang (Sonst) reicht den unveränderten Eingang durch."
|
||||
),
|
||||
"parameters": [
|
||||
{
|
||||
|
|
@ -68,13 +219,22 @@ FLOW_NODES = [
|
|||
"type": "array",
|
||||
"required": False,
|
||||
"frontendType": "caseList",
|
||||
"description": t("Fälle: Operator und Vergleichswert"),
|
||||
"frontendOptions": {
|
||||
"dependsOn": "value",
|
||||
"operatorCatalog": "condition",
|
||||
},
|
||||
"description": t("Fälle: Operator und Vergleichswert (abhängig vom gewählten Wert)"),
|
||||
},
|
||||
],
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": list(_FLOW_INPUT_SCHEMAS)}},
|
||||
"outputPorts": {0: {"schema": "Transit"}},
|
||||
"outputPorts": {
|
||||
0: {
|
||||
"schema": "ContextBranch",
|
||||
"dataPickOptions": _CONTEXT_BRANCH_DATA_PICK_OPTIONS,
|
||||
},
|
||||
},
|
||||
"executor": "flow",
|
||||
"meta": {"icon": "mdi-swap-horizontal", "color": "#FF9800", "usesAi": False},
|
||||
},
|
||||
|
|
@ -83,8 +243,10 @@ FLOW_NODES = [
|
|||
"category": "flow",
|
||||
"label": t("Schleife / Für jedes"),
|
||||
"description": t(
|
||||
"Iteriert über ein Array aus einem vorherigen Schritt (z. B. documente, Zeilen, Listeneinträge). "
|
||||
"Optional: UDM-Ebene für strukturierte Dokumente."
|
||||
"Zwei Ausgänge: „Schleife“ verbindet den Rumpf (pro Element); optional führt der Rumpf "
|
||||
"mit einem Rücklauf-Pfeil wieder zum **gleichen Eingang** wie der vorherige Schritt (wie in n8n). "
|
||||
"„Fertig“ führt genau einmal fort, wenn alle Iterationen beendet sind. "
|
||||
"Die zu durchlaufende Liste wählen Sie wie bisher; UDM-/Strukturdaten werden automatisch sinnvoll in Elemente aufgelöst."
|
||||
),
|
||||
"parameters": [
|
||||
{
|
||||
|
|
@ -95,13 +257,27 @@ FLOW_NODES = [
|
|||
"description": t("Liste oder Sammlung zum Durchlaufen (im Data Picker wählen)"),
|
||||
},
|
||||
{
|
||||
"name": "level",
|
||||
"name": "iterationMode",
|
||||
"type": "str",
|
||||
"required": False,
|
||||
"frontendType": "select",
|
||||
"frontendOptions": {"options": ["auto", "documents", "structuralNodes", "contentBlocks"]},
|
||||
"description": t("Nur bei UDM-Daten: welche Strukturebene als Elemente verwendet wird"),
|
||||
"default": "auto",
|
||||
"frontendOptions": {
|
||||
"options": ["all", "first", "last", "every_second", "every_third", "every_nth"],
|
||||
},
|
||||
"description": t(
|
||||
"Welche Elemente die Schleife besucht: alle, nur das erste/letzte, jedes zweite/dritte "
|
||||
"oder jedes n-te (Schritt dann unter „Schrittweite“)."
|
||||
),
|
||||
"default": "all",
|
||||
},
|
||||
{
|
||||
"name": "iterationStride",
|
||||
"type": "int",
|
||||
"required": False,
|
||||
"frontendType": "number",
|
||||
"frontendOptions": {"min": 2, "max": 100},
|
||||
"description": t("Nur bei „jedes n-te“: Schrittweite (z. B. 5 = jedes 5. Element ab Index 0)."),
|
||||
"default": 2,
|
||||
},
|
||||
{
|
||||
"name": "concurrency",
|
||||
|
|
@ -114,12 +290,18 @@ FLOW_NODES = [
|
|||
},
|
||||
],
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": [
|
||||
"Transit", "UdmDocument", "EmailList", "DocumentList", "FileList", "TaskList",
|
||||
"ActionResult", "AiResult", "QueryResult", "FormPayload",
|
||||
]}},
|
||||
"outputPorts": {0: {"schema": "LoopItem"}},
|
||||
"outputs": 2,
|
||||
"outputLabels": [t("Schleife"), t("Fertig")],
|
||||
"inputPorts": {
|
||||
0: {"accepts": [
|
||||
"Transit", "ContextBranch", "UdmDocument", "EmailList", "DocumentList", "FileList", "TaskList",
|
||||
"ActionResult", "AiResult", "QueryResult", "FormPayload", "LoopItem",
|
||||
]},
|
||||
},
|
||||
"outputPorts": {
|
||||
0: {"schema": "LoopItem", "dataPickOptions": LOOP_ITEM_DATA_PICK_OPTIONS},
|
||||
1: {"schema": "Transit", "dataPickOptions": LOOP_DONE_DATA_PICK_OPTIONS},
|
||||
},
|
||||
"executor": "flow",
|
||||
"meta": {"icon": "mdi-repeat", "color": "#FF9800", "usesAi": False},
|
||||
},
|
||||
|
|
@ -151,13 +333,19 @@ FLOW_NODES = [
|
|||
"default": 2,
|
||||
},
|
||||
],
|
||||
# ``inputs: 2`` is the static minimum / default topology. ``inputCount`` is a
|
||||
# frontend hint: the editor adds/removes input ports dynamically when the user
|
||||
# changes the value. ``FlowExecutor._merge`` collects whatever ports exist in
|
||||
# ``inputSources`` at runtime, so extra ports (3–5) work without further changes
|
||||
# to this definition. ``inputPorts`` below only type-declares the two minimum
|
||||
# ports; additional ports inherit the same ``_FLOW_INPUT_SCHEMAS`` accepts list.
|
||||
"inputs": 2,
|
||||
"outputs": 1,
|
||||
"inputPorts": {
|
||||
0: {"accepts": list(_FLOW_INPUT_SCHEMAS)},
|
||||
1: {"accepts": list(_FLOW_INPUT_SCHEMAS)},
|
||||
},
|
||||
"outputPorts": {0: {"schema": "MergeResult"}},
|
||||
"outputPorts": {0: {"schema": "MergeResult", "dataPickOptions": MERGE_RESULT_DATA_PICK_OPTIONS}},
|
||||
"executor": "flow",
|
||||
"meta": {"icon": "mdi-call-merge", "color": "#FF9800", "usesAi": False},
|
||||
},
|
||||
|
|
|
|||
|
|
@ -40,6 +40,8 @@ class BrowserBotConnector:
|
|||
botAccountPassword: Optional[str] = None,
|
||||
transferMode: str = "auto",
|
||||
debugMode: bool = False,
|
||||
avatarMediaData: Optional[str] = None,
|
||||
avatarMediaType: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Send join command to the Browser Bot service.
|
||||
|
|
@ -79,12 +81,16 @@ class BrowserBotConnector:
|
|||
"debugMode": debugMode,
|
||||
}
|
||||
|
||||
# Add authenticated join credentials if configured
|
||||
if botAccountEmail and botAccountPassword:
|
||||
payload["botAccountEmail"] = botAccountEmail
|
||||
payload["botAccountPassword"] = botAccountPassword
|
||||
logger.info(f"Bot will join authenticated as {botAccountEmail}")
|
||||
|
||||
if avatarMediaData and avatarMediaType:
|
||||
payload["avatarMediaData"] = avatarMediaData
|
||||
payload["avatarMediaType"] = avatarMediaType
|
||||
logger.info(f"Avatar media attached: {avatarMediaType}, {len(avatarMediaData)} chars")
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession(timeout=_BOT_TIMEOUT) as session:
|
||||
async with session.post(f"{self.botUrl}/api/bot", json=payload) as resp:
|
||||
|
|
|
|||
|
|
@ -111,6 +111,18 @@ class TeamsbotMeetingModule(PowerOnModel):
|
|||
defaultDirectorPrompts: Optional[str] = Field(default=None, description="JSON list of default director prompts")
|
||||
goals: Optional[str] = Field(default=None, description="Free-text goals")
|
||||
kpiTargets: Optional[str] = Field(default=None, description="JSON object with structured KPI targets")
|
||||
defaultMeetingLink: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Default Teams meeting URL for new sessions in this module (user can override)",
|
||||
)
|
||||
defaultBotName: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Default display name for the bot when starting a session from this module",
|
||||
)
|
||||
defaultAvatarFileId: Optional[str] = Field(
|
||||
default=None,
|
||||
description="FileItem ID for the default avatar image/video shown in the meeting",
|
||||
)
|
||||
status: TeamsbotModuleStatus = Field(default=TeamsbotModuleStatus.ACTIVE)
|
||||
|
||||
|
||||
|
|
@ -217,6 +229,7 @@ class TeamsbotUserSettings(PowerOnModel):
|
|||
triggerCooldownSeconds: Optional[int] = Field(default=None, description="Trigger cooldown override")
|
||||
contextWindowSegments: Optional[int] = Field(default=None, description="Context window override")
|
||||
debugMode: Optional[bool] = Field(default=None, description="Debug mode override")
|
||||
avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video override")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
|
@ -240,6 +253,7 @@ class TeamsbotConfig(BaseModel):
|
|||
triggerCooldownSeconds: int = Field(default=3, ge=1, le=30, description="Minimum seconds between AI calls")
|
||||
contextWindowSegments: int = Field(default=20, ge=5, le=100, description="Number of transcript segments to include in AI context")
|
||||
debugMode: bool = Field(default=False, description="Enable debug mode: screenshots at every join step for diagnostics")
|
||||
avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video shown in the meeting")
|
||||
|
||||
def _getEffectiveBrowserBotUrl(self) -> Optional[str]:
|
||||
"""Resolve the effective browser bot URL: per-instance config takes priority, then env variable."""
|
||||
|
|
@ -257,6 +271,7 @@ class TeamsbotStartSessionRequest(BaseModel):
|
|||
"""Request to start a new Teams Bot session."""
|
||||
meetingLink: str = Field(description="Teams meeting join link (e.g., https://teams.microsoft.com/l/meetup-join/...)")
|
||||
botName: Optional[str] = Field(default=None, description="Override bot name for this session")
|
||||
moduleId: Optional[str] = Field(default=None, description="Optional MeetingModule to attach this session to")
|
||||
connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access")
|
||||
joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.")
|
||||
sessionContext: Optional[str] = Field(default=None, description="Custom context/knowledge to provide to the bot for this session (e.g. meeting agenda, documents, background info)")
|
||||
|
|
@ -277,6 +292,9 @@ class CreateMeetingModuleRequest(BaseModel):
|
|||
defaultDirectorPrompts: Optional[str] = None
|
||||
goals: Optional[str] = None
|
||||
kpiTargets: Optional[str] = None
|
||||
defaultMeetingLink: Optional[str] = None
|
||||
defaultBotName: Optional[str] = None
|
||||
defaultAvatarFileId: Optional[str] = None
|
||||
|
||||
|
||||
class UpdateMeetingModuleRequest(BaseModel):
|
||||
|
|
@ -287,6 +305,9 @@ class UpdateMeetingModuleRequest(BaseModel):
|
|||
defaultDirectorPrompts: Optional[str] = None
|
||||
goals: Optional[str] = None
|
||||
kpiTargets: Optional[str] = None
|
||||
defaultMeetingLink: Optional[str] = None
|
||||
defaultBotName: Optional[str] = None
|
||||
defaultAvatarFileId: Optional[str] = None
|
||||
status: Optional[TeamsbotModuleStatus] = None
|
||||
|
||||
|
||||
|
|
@ -304,6 +325,7 @@ class TeamsbotConfigUpdateRequest(BaseModel):
|
|||
triggerCooldownSeconds: Optional[int] = None
|
||||
contextWindowSegments: Optional[int] = None
|
||||
debugMode: Optional[bool] = None
|
||||
avatarFileId: Optional[str] = None
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ from .datamodelTeamsbot import (
|
|||
TeamsbotDirectorPromptStatus,
|
||||
TeamsbotDirectorPromptMode,
|
||||
TeamsbotMeetingModule,
|
||||
TeamsbotModuleStatus,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -338,6 +339,8 @@ class TeamsbotObjects:
|
|||
def getModules(self, instanceId: str) -> List[Dict[str, Any]]:
|
||||
"""Get all meeting modules for a feature instance."""
|
||||
records = self.db.getRecordset(TeamsbotMeetingModule, recordFilter={"instanceId": instanceId})
|
||||
for r in records:
|
||||
r.setdefault("status", TeamsbotModuleStatus.ACTIVE.value)
|
||||
records.sort(key=lambda r: r.get("sysCreatedAt") or "", reverse=True)
|
||||
return records
|
||||
|
||||
|
|
|
|||
|
|
@ -290,6 +290,19 @@ def _runMigrations():
|
|||
|
||||
migrated = False
|
||||
|
||||
# M2: MeetingModule default meeting link / bot name (additive columns)
|
||||
if _tableExists("TeamsbotMeetingModule"):
|
||||
for col, sqlType in (
|
||||
("defaultMeetingLink", "TEXT"),
|
||||
("defaultBotName", "TEXT"),
|
||||
):
|
||||
if not _columnExists("TeamsbotMeetingModule", col):
|
||||
cur.execute(
|
||||
f'ALTER TABLE "TeamsbotMeetingModule" ADD COLUMN "{col}" {sqlType} NULL',
|
||||
)
|
||||
logger.info(f"Migration M2: Added TeamsbotMeetingModule.{col}")
|
||||
migrated = True
|
||||
|
||||
# M1: Create default Adhoc modules for orphaned sessions
|
||||
# (only runs if TeamsbotSession table exists with moduleId column
|
||||
# and there are sessions without a moduleId)
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ from .datamodelTeamsbot import (
|
|||
TeamsbotDirectorPromptMode,
|
||||
TeamsbotDirectorPromptStatus,
|
||||
TeamsbotMeetingModule,
|
||||
TeamsbotModuleStatus,
|
||||
CreateMeetingModuleRequest,
|
||||
UpdateMeetingModuleRequest,
|
||||
DIRECTOR_PROMPT_FILE_LIMIT,
|
||||
|
|
@ -203,6 +204,7 @@ async def createModule(
|
|||
data["instanceId"] = instanceId
|
||||
data["mandateId"] = mandateId
|
||||
data["ownerUserId"] = str(context.user.id)
|
||||
data.setdefault("status", TeamsbotModuleStatus.ACTIVE.value)
|
||||
module = interface.createModule(data)
|
||||
return {"module": module}
|
||||
|
||||
|
|
@ -280,6 +282,11 @@ async def startSession(
|
|||
mandateId = _validateInstanceAccess(instanceId, context)
|
||||
interface = _getInterface(context, instanceId)
|
||||
config = _getInstanceConfig(instanceId)
|
||||
|
||||
if body.moduleId:
|
||||
mod = interface.getModule(body.moduleId)
|
||||
if not mod or str(mod.get("instanceId") or "") != str(instanceId):
|
||||
raise HTTPException(status_code=400, detail="Invalid moduleId for this instance")
|
||||
|
||||
# Extract and validate meeting URL from user input (handles SafeLinks, invitation text, etc.)
|
||||
cleanMeetingUrl = _extractTeamsMeetingUrl(body.meetingLink)
|
||||
|
|
@ -288,6 +295,7 @@ async def startSession(
|
|||
sessionData = TeamsbotSession(
|
||||
instanceId=instanceId,
|
||||
mandateId=mandateId,
|
||||
moduleId=body.moduleId,
|
||||
meetingLink=cleanMeetingUrl,
|
||||
botName=body.botName or config.botName,
|
||||
sessionContext=body.sessionContext,
|
||||
|
|
@ -426,6 +434,54 @@ async def listSessions(
|
|||
return {"sessions": sessions}
|
||||
|
||||
|
||||
@router.get("/{instanceId}/dashboard/stream")
|
||||
@limiter.limit("60/minute")
|
||||
async def streamDashboard(
|
||||
request: Request,
|
||||
instanceId: str,
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""
|
||||
SSE channel for the Teamsbot dashboard: repeated snapshots of sessions and meeting modules.
|
||||
Push interval: 3s while any own session is pending/joining/active, otherwise 20s.
|
||||
Same session visibility rules as GET /sessions (own sessions unless platform admin).
|
||||
"""
|
||||
_validateInstanceAccess(instanceId, context)
|
||||
interface = _getInterface(context, instanceId)
|
||||
userId = None if context.isPlatformAdmin else str(context.user.id)
|
||||
activeStatuses = {
|
||||
TeamsbotSessionStatus.PENDING.value,
|
||||
TeamsbotSessionStatus.JOINING.value,
|
||||
TeamsbotSessionStatus.ACTIVE.value,
|
||||
}
|
||||
|
||||
async def eventGenerator():
|
||||
while True:
|
||||
sessionRows = []
|
||||
try:
|
||||
sessionRows = interface.getSessions(instanceId, includeEnded=True, userId=userId)
|
||||
moduleRows = interface.getModules(instanceId)
|
||||
payload = {"type": "dashboardState", "sessions": sessionRows, "modules": moduleRows}
|
||||
yield f"data: {json.dumps(payload, default=str)}\n\n"
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as ex:
|
||||
logger.warning("dashboard stream tick failed: %s", ex)
|
||||
yield f"data: {json.dumps({'type': 'error', 'message': 'dashboard_tick_failed'})}\n\n"
|
||||
hasActive = any((s.get("status") in activeStatuses) for s in sessionRows)
|
||||
await asyncio.sleep(3.0 if hasActive else 20.0)
|
||||
|
||||
return StreamingResponse(
|
||||
eventGenerator(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{instanceId}/sessions/{sessionId}")
|
||||
@limiter.limit("30/minute")
|
||||
async def getSession(
|
||||
|
|
@ -634,12 +690,10 @@ def _getEffectiveConfig(instanceId: str, userId: str, interface) -> TeamsbotConf
|
|||
if not userSettings:
|
||||
return baseConfig
|
||||
|
||||
# Merge: user settings override instance defaults (only non-None values)
|
||||
# Merge: user settings override instance defaults (only non-None values).
|
||||
# Derive mergeable fields from TeamsbotConfig so new fields are picked up automatically.
|
||||
overrides = {}
|
||||
for field in ["botName", "aiSystemPrompt", "responseMode",
|
||||
"responseChannel", "transferMode", "language", "voiceId",
|
||||
"triggerIntervalSeconds", "triggerCooldownSeconds", "contextWindowSegments",
|
||||
"debugMode"]:
|
||||
for field in TeamsbotConfig.model_fields:
|
||||
value = userSettings.get(field)
|
||||
if value is not None:
|
||||
overrides[field] = value
|
||||
|
|
|
|||
|
|
@ -83,10 +83,10 @@ _EPHEMERAL_PHRASE_INTENTS: Dict[str, str] = {
|
|||
),
|
||||
"agentRound": (
|
||||
"One short sentence (max ~14 words) the assistant says BETWEEN rounds "
|
||||
"of a longer agent task to signal that work is still in progress. "
|
||||
"Include the placeholder tokens '{round}' and '{maxRounds}' so the "
|
||||
"caller can substitute the actual numbers — e.g. 'Step {round} of "
|
||||
"{maxRounds}, still working.'"
|
||||
"of a longer agent task to update the audience on what it is doing. "
|
||||
"Include the placeholder token '{activity}' which will be filled with "
|
||||
"the current activity — e.g. 'I am {activity}, one moment...' or "
|
||||
"'Currently {activity}, almost there...'. Do NOT include step numbers."
|
||||
),
|
||||
}
|
||||
|
||||
|
|
@ -602,6 +602,13 @@ class TeamsbotService:
|
|||
self._lastTranscriptText: Optional[str] = None
|
||||
self._lastTranscriptId: Optional[str] = None
|
||||
self._lastSttTime: float = 0.0
|
||||
|
||||
# Audio chunk aggregation: collect chunks and send to STT only
|
||||
# after a speech pause or when the buffer reaches a target duration.
|
||||
self._audioBuffer: bytes = b""
|
||||
self._audioBufferStartTime: float = 0.0
|
||||
self._audioBufferLastChunkTime: float = 0.0
|
||||
self._audioBufferSampleRate: int = 16000
|
||||
self._lastBotResponseText: Optional[str] = None
|
||||
self._lastBotResponseTs: float = 0.0
|
||||
|
||||
|
|
@ -732,6 +739,12 @@ class TeamsbotService:
|
|||
hasAuth = bool(botAccountEmail and botAccountPassword)
|
||||
logger.info(f"Joining meeting for session {sessionId}: auth={hasAuth}, email={botAccountEmail or 'N/A'}, transferMode={self.config.transferMode}")
|
||||
|
||||
avatarMediaData = None
|
||||
avatarMediaType = None
|
||||
avatarFileId = self._resolveAvatarFileId(session, interface)
|
||||
if avatarFileId:
|
||||
avatarMediaData, avatarMediaType = self._loadAvatarFileData(avatarFileId, interface)
|
||||
|
||||
result = await self.browserBotConnector.joinMeeting(
|
||||
sessionId=sessionId,
|
||||
meetingUrl=meetingLink,
|
||||
|
|
@ -743,6 +756,8 @@ class TeamsbotService:
|
|||
botAccountPassword=botAccountPassword,
|
||||
transferMode=self.config.transferMode if hasattr(self.config, 'transferMode') else "auto",
|
||||
debugMode=self.config.debugMode if hasattr(self.config, 'debugMode') else False,
|
||||
avatarMediaData=avatarMediaData,
|
||||
avatarMediaType=avatarMediaType,
|
||||
)
|
||||
|
||||
if result.get("success"):
|
||||
|
|
@ -767,6 +782,37 @@ class TeamsbotService:
|
|||
})
|
||||
await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)})
|
||||
|
||||
def _resolveAvatarFileId(self, session, interface):
|
||||
"""Resolve avatarFileId: module override > config default."""
|
||||
moduleId = session.get("moduleId")
|
||||
if moduleId:
|
||||
module = interface.getModule(moduleId)
|
||||
if module and module.get("defaultAvatarFileId"):
|
||||
return module["defaultAvatarFileId"]
|
||||
return getattr(self.config, "avatarFileId", None)
|
||||
|
||||
def _loadAvatarFileData(self, fileId, _teamsbotInterface):
|
||||
"""Load avatar file as base64 data + mime type. Returns (data, mimeType) or (None, None)."""
|
||||
import base64
|
||||
from modules.interfaces import interfaceDbManagement
|
||||
try:
|
||||
mgmt = interfaceDbManagement.getInterface(self.currentUser, self.mandateId)
|
||||
fileRecord = mgmt.getFile(fileId)
|
||||
if not fileRecord:
|
||||
logger.warning(f"Avatar file {fileId} not found")
|
||||
return None, None
|
||||
mimeType = getattr(fileRecord, "mimeType", None) or "image/png"
|
||||
rawBytes = mgmt.getFileData(fileId)
|
||||
if not rawBytes:
|
||||
logger.warning(f"Avatar file {fileId} has no data")
|
||||
return None, None
|
||||
b64 = base64.b64encode(rawBytes).decode("ascii")
|
||||
logger.info(f"Avatar file loaded: {fileId}, {mimeType}, {len(b64)} chars base64")
|
||||
return b64, mimeType
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load avatar file {fileId}: {e}")
|
||||
return None, None
|
||||
|
||||
async def leaveMeeting(self, sessionId: str):
|
||||
"""Send leave command to the Browser Bot service."""
|
||||
from . import interfaceFeatureTeamsbot as interfaceDb
|
||||
|
|
@ -1164,6 +1210,14 @@ class TeamsbotService:
|
|||
interface.updateSession(sessionId, updates)
|
||||
await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
|
||||
|
||||
# Flush remaining audio buffer before generating summary
|
||||
if dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
|
||||
if self._audioBuffer:
|
||||
logger.info(f"[AudioChunk] Flushing remaining buffer on session end ({len(self._audioBuffer)} bytes)")
|
||||
self._audioBuffer = b""
|
||||
self._audioBufferStartTime = 0.0
|
||||
self._audioBufferLastChunkTime = 0.0
|
||||
|
||||
# Generate summary when session ends
|
||||
if dbStatus == TeamsbotSessionStatus.ENDED.value:
|
||||
asyncio.create_task(self._generateMeetingSummary(sessionId))
|
||||
|
|
@ -1178,11 +1232,18 @@ class TeamsbotService:
|
|||
voiceInterface,
|
||||
websocket: WebSocket,
|
||||
):
|
||||
"""Process an audio chunk from WebRTC capture — run STT and feed into transcript pipeline."""
|
||||
"""Process an audio chunk from WebRTC capture. The bot-side VAD
|
||||
(AudioWorklet / ScriptProcessor) already segments speech into 1-8s
|
||||
voiced chunks. Here we apply a minimum-duration safety net: very short
|
||||
chunks (<1s) are buffered until they reach 1s; everything else goes
|
||||
straight to STT. A wall-clock timeout flushes stale buffers."""
|
||||
import base64
|
||||
_MIN_CHUNK_SEC = 1.0
|
||||
_STALE_TIMEOUT_SEC = 3.0
|
||||
|
||||
try:
|
||||
audioBytes = base64.b64decode(audioBase64)
|
||||
if len(audioBytes) < 1000:
|
||||
if len(audioBytes) < 500:
|
||||
return
|
||||
|
||||
if captureDiagnostics:
|
||||
|
|
@ -1195,14 +1256,12 @@ class TeamsbotService:
|
|||
f"rms={rms}, nativeRate={nativeSampleRate}, bytes={len(audioBytes)}"
|
||||
)
|
||||
|
||||
# Use RMS from capture diagnostics to skip real silence.
|
||||
# Byte-variation heuristics produced false positives and dropped valid speech.
|
||||
isSilent = False
|
||||
if captureDiagnostics and captureDiagnostics.get("rms") is not None:
|
||||
try:
|
||||
rmsVal = float(captureDiagnostics.get("rms"))
|
||||
if rmsVal < 0.0003:
|
||||
logger.debug(f"[AudioChunk] Skipping silent audio ({len(audioBytes)} bytes, rms={rmsVal:.6f})")
|
||||
return
|
||||
isSilent = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -1210,21 +1269,51 @@ class TeamsbotService:
|
|||
logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}")
|
||||
return
|
||||
|
||||
# Treat sampleRate=0 as unknown (triggers auto-detection)
|
||||
effectiveSampleRate = sampleRate if sampleRate and sampleRate > 0 else None
|
||||
now = time.time()
|
||||
effectiveRate = sampleRate if sampleRate and sampleRate > 0 else 16000
|
||||
|
||||
if not isSilent:
|
||||
if not self._audioBuffer:
|
||||
self._audioBufferStartTime = now
|
||||
self._audioBuffer += audioBytes
|
||||
self._audioBufferLastChunkTime = now
|
||||
self._audioBufferSampleRate = effectiveRate
|
||||
|
||||
bufferDuration = len(self._audioBuffer) / (effectiveRate * 2) if self._audioBuffer else 0.0
|
||||
bufferAge = (now - self._audioBufferStartTime) if self._audioBuffer else 0.0
|
||||
|
||||
shouldFlush = (
|
||||
self._audioBuffer
|
||||
and (
|
||||
bufferDuration >= _MIN_CHUNK_SEC
|
||||
or (bufferAge >= _STALE_TIMEOUT_SEC and bufferDuration > 0.3)
|
||||
)
|
||||
)
|
||||
|
||||
if not shouldFlush:
|
||||
return
|
||||
|
||||
flushBytes = self._audioBuffer
|
||||
flushRate = self._audioBufferSampleRate
|
||||
self._audioBuffer = b""
|
||||
self._audioBufferStartTime = 0.0
|
||||
self._audioBufferLastChunkTime = 0.0
|
||||
|
||||
flushDuration = len(flushBytes) / (flushRate * 2)
|
||||
logger.info(f"[AudioChunk] Flushing buffer: {len(flushBytes)} bytes, {flushDuration:.1f}s, {flushRate}Hz")
|
||||
|
||||
phraseHints = list(self._knownSpeakers)
|
||||
if self.config.botName:
|
||||
phraseHints.append(self.config.botName)
|
||||
|
||||
sttResult = await voiceInterface.speechToText(
|
||||
audioContent=audioBytes,
|
||||
audioContent=flushBytes,
|
||||
language=self.config.language or "de-DE",
|
||||
sampleRate=effectiveSampleRate,
|
||||
sampleRate=flushRate,
|
||||
channels=1,
|
||||
skipFallbacks=True,
|
||||
phraseHints=phraseHints if phraseHints else None,
|
||||
alternativeLanguages=["en-US"],
|
||||
audioFormat="linear16",
|
||||
)
|
||||
|
||||
if sttResult and sttResult.get("success") and sttResult.get("text"):
|
||||
|
|
@ -1252,19 +1341,18 @@ class TeamsbotService:
|
|||
|
||||
def _registerSpeakerHint(self, speaker: str, text: str, sessionId: str = ""):
|
||||
"""Track current speaker from captions for STT attribution.
|
||||
When the first non-bot caption arrives, retroactively attributes
|
||||
any STT segments that were created before a speaker was known."""
|
||||
Retroactively attributes any unattributed STT segments whenever a
|
||||
new non-bot caption speaker arrives (not just the first time)."""
|
||||
if not speaker:
|
||||
return
|
||||
normalizedSpeaker = speaker.strip()
|
||||
if not normalizedSpeaker or self._isBotSpeaker(normalizedSpeaker):
|
||||
return
|
||||
|
||||
prevSpeaker = self._lastCaptionSpeaker
|
||||
self._lastCaptionSpeaker = normalizedSpeaker
|
||||
self._knownSpeakers.add(normalizedSpeaker)
|
||||
|
||||
if prevSpeaker is None and self._unattributedTranscriptIds:
|
||||
if self._unattributedTranscriptIds:
|
||||
from . import interfaceFeatureTeamsbot as interfaceDb
|
||||
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
||||
for tid in self._unattributedTranscriptIds:
|
||||
|
|
@ -3243,17 +3331,53 @@ class TeamsbotService:
|
|||
return await self._pickEphemeralPhrase("agentBusy")
|
||||
|
||||
async def _interimAgentRoundMessage(
|
||||
self, roundNum: int, maxRounds: int
|
||||
self, lastToolLabel: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Per-round progress notice for long agent runs (meeting voice /
|
||||
chat, ephemeral). Phrasing is AI-localised once per session;
|
||||
``{round}`` and ``{maxRounds}`` placeholders are substituted at
|
||||
render time. Returns ``None`` if generation failed."""
|
||||
return await self._pickEphemeralPhrase(
|
||||
"agentRound",
|
||||
substitutions={"round": roundNum, "maxRounds": maxRounds},
|
||||
chat, ephemeral). Generates a single short phrase in the bot's
|
||||
configured language that describes the current activity. Unlike
|
||||
the cached ephemeral phrases, this is a per-call AI generation
|
||||
to avoid mixing English displayLabels into non-English speech."""
|
||||
targetLang = (self.config.language or "").strip() or "en-US"
|
||||
botName = (self.config.botName or "the assistant").strip()
|
||||
activityHint = lastToolLabel or "working on the task"
|
||||
|
||||
prompt = (
|
||||
f"You are a meeting assistant named '{botName}'.\n"
|
||||
f"Target spoken language (BCP-47): {targetLang}\n\n"
|
||||
f"The assistant is currently busy with: {activityHint}\n\n"
|
||||
f"Generate ONE short sentence (max 12 words) in {targetLang} "
|
||||
f"that tells the audience what the assistant is doing right now. "
|
||||
f"Natural, spoken style. No step numbers. No quotes around the output.\n"
|
||||
f"Output ONLY the sentence, nothing else."
|
||||
)
|
||||
|
||||
try:
|
||||
aiService = createAiService(
|
||||
self.currentUser, self.mandateId, self.instanceId
|
||||
)
|
||||
await aiService.ensureAiObjectsInitialized()
|
||||
request = AiCallRequest(
|
||||
prompt=prompt,
|
||||
context="",
|
||||
options=AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||
priority=PriorityEnum.SPEED,
|
||||
),
|
||||
)
|
||||
response = await aiService.callAi(request)
|
||||
except Exception as aiErr:
|
||||
logger.debug(f"Agent round phrase generation failed: {aiErr}")
|
||||
return None
|
||||
|
||||
if not response or response.errorCount != 0 or not response.content:
|
||||
return None
|
||||
|
||||
result = response.content.strip().strip('"').strip("'")
|
||||
if len(result) > 200:
|
||||
result = result[:200]
|
||||
return result
|
||||
|
||||
async def _notifyMeetingEphemeral(self, sessionId: str, text: str) -> None:
|
||||
"""Deliver a short line to the meeting (TTS + chat per config) without
|
||||
persisting botResponses/transcripts, so the main agent answer stays the
|
||||
|
|
@ -3370,6 +3494,7 @@ class TeamsbotService:
|
|||
|
||||
finalText: str = ""
|
||||
rounds = 0
|
||||
lastToolLabel: Optional[str] = None
|
||||
try:
|
||||
async for event in agentService.runAgent(
|
||||
prompt=taskText,
|
||||
|
|
@ -3390,11 +3515,9 @@ class TeamsbotService:
|
|||
"round": roundNum,
|
||||
"maxRounds": maxR,
|
||||
})
|
||||
# Runde 1: schon allgemeiner Start-Hinweis; ab Runde 2 ins Meeting melden.
|
||||
# Director prompts bleiben still — keine Zwischen-Updates ins Meeting.
|
||||
if roundNum >= 2 and not directorPromptMode:
|
||||
try:
|
||||
roundText = await self._interimAgentRoundMessage(roundNum, maxR)
|
||||
roundText = await self._interimAgentRoundMessage(lastToolLabel)
|
||||
if roundText:
|
||||
await self._notifyMeetingEphemeral(sessionId, roundText)
|
||||
except Exception as roundNoticeErr:
|
||||
|
|
@ -3402,12 +3525,26 @@ class TeamsbotService:
|
|||
f"Session {sessionId}: Per-round agent notice failed: {roundNoticeErr}"
|
||||
)
|
||||
elif event.type == AgentEventTypeEnum.TOOL_CALL:
|
||||
toolName = (event.data or {}).get("toolName") if event.data else None
|
||||
evtData = event.data or {}
|
||||
toolName = evtData.get("toolName")
|
||||
lastToolLabel = evtData.get("displayLabel")
|
||||
await _emitSessionEvent(sessionId, "agentRun", {
|
||||
"source": sourceLabel,
|
||||
"promptId": promptId,
|
||||
"status": "toolCall",
|
||||
"toolName": toolName,
|
||||
"displayLabel": lastToolLabel,
|
||||
})
|
||||
elif event.type == AgentEventTypeEnum.TOOL_RESULT:
|
||||
evtData = event.data or {}
|
||||
resultSnippet = (evtData.get("data") or "")[:200]
|
||||
await _emitSessionEvent(sessionId, "agentRun", {
|
||||
"source": sourceLabel,
|
||||
"promptId": promptId,
|
||||
"status": "toolResult",
|
||||
"toolName": evtData.get("toolName", ""),
|
||||
"success": evtData.get("success", True),
|
||||
"summary": resultSnippet,
|
||||
})
|
||||
elif event.type == AgentEventTypeEnum.FILE_CREATED:
|
||||
await _emitSessionEvent(sessionId, "documentCreated", event.data or {})
|
||||
|
|
|
|||
|
|
@ -754,14 +754,35 @@ ANTI-PATTERNS (do NOT do this):
|
|||
"""
|
||||
|
||||
|
||||
# Parked for one release as a fallback while the ontology-based path rolls
|
||||
# out (see `trusteeOntology.getTrusteeOntology()`). Remove together with the
|
||||
# legacy ``_loadFeatureDomainHints`` path once Phase 2 is the only supplier
|
||||
# of the trustee prompt block.
|
||||
_AGENT_DOMAIN_HINTS_LEGACY = _AGENT_DOMAIN_HINTS
|
||||
|
||||
|
||||
def getAgentDomainHints() -> str:
|
||||
"""Return Trustee-specific guidance for the Feature Data Sub-Agent.
|
||||
|
||||
The text is appended verbatim to the sub-agent's system prompt by
|
||||
``featureDataAgent._buildSchemaContext``. Keep it concise and
|
||||
pattern-driven — every line costs tokens on every sub-agent call.
|
||||
Deprecated as of Phase 2 (2026-05). Prefer ``getAgentOntology()`` ->
|
||||
``ontologyToPromptCompiler.compileOntologyToPrompt(...)``. The legacy
|
||||
text remains available so callers that still go through
|
||||
``_buildSchemaContext()`` keep working during the migration window.
|
||||
"""
|
||||
return _AGENT_DOMAIN_HINTS
|
||||
return _AGENT_DOMAIN_HINTS_LEGACY
|
||||
|
||||
|
||||
def getAgentOntology():
|
||||
"""Return the structured ontology used by the Feature Data Sub-Agent.
|
||||
|
||||
Discovered by ``featureDataAgent._buildSchemaContext`` (Phase 2 path):
|
||||
when this hook is present, the agent compiles its domain block from
|
||||
the ontology instead of using the legacy free-text hints. The same
|
||||
descriptor feeds the validator's NEVER_AGGREGATE constraints, so
|
||||
prompt and validator stay in sync.
|
||||
"""
|
||||
from modules.features.trustee.trusteeOntology import getTrusteeOntology
|
||||
return getTrusteeOntology()
|
||||
|
||||
|
||||
def registerFeature(catalogService) -> bool:
|
||||
|
|
|
|||
295
modules/features/trustee/trusteeOntology.py
Normal file
295
modules/features/trustee/trusteeOntology.py
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
# Copyright (c) 2026 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Trustee feature ontology (Phase 2 pilot).
|
||||
|
||||
Replaces the hand-written ``_AGENT_DOMAIN_HINTS`` block with a structured
|
||||
ontology so the Feature Data Sub-Agent's QueryValidator AND the prompt
|
||||
compiler share the same source of truth: account-group conventions,
|
||||
period-bucket semantics, the NEVER_AGGREGATE constraints on already-
|
||||
aggregated columns, and canonical tool-call templates for the most
|
||||
frequent user intents.
|
||||
|
||||
Both the validator (deterministic enforcement) and the prompt compiler
|
||||
(LLM steering) read from this descriptor, so an LLM that follows the
|
||||
prompt patterns will never trigger a validator failure -- and one that
|
||||
ignores them gets a structured repair hint pointing back at the same
|
||||
constraint.
|
||||
|
||||
The legacy ``_AGENT_DOMAIN_HINTS_LEGACY`` block stays parked in
|
||||
``mainTrustee.py`` for one release as a fallback during rollout.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
|
||||
CanonicalQueryPattern,
|
||||
Cardinality,
|
||||
Constraint,
|
||||
ConstraintRule,
|
||||
Entity,
|
||||
Invariant,
|
||||
OntologyDescriptor,
|
||||
Relation,
|
||||
SemanticType,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Entities
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_ENTITIES = [
|
||||
Entity(
|
||||
name="Account",
|
||||
pythonClass="TrusteeDataAccount",
|
||||
semanticType=SemanticType.ACCOUNT,
|
||||
description=(
|
||||
"Chart-of-accounts row (Konto). One row per accountNumber per "
|
||||
"mandate. Identifies the account, never holds balances."
|
||||
),
|
||||
invariants=[
|
||||
Invariant(description="accountNumber is a stable string identifier (e.g. '1020', '5400')."),
|
||||
Invariant(description="accountType is one of: asset / liability / revenue / expense."),
|
||||
],
|
||||
),
|
||||
Entity(
|
||||
name="BankAccount",
|
||||
pythonClass="TrusteeDataAccount",
|
||||
semanticType=SemanticType.ACCOUNT,
|
||||
parentEntity="Account",
|
||||
description="Account subgroup with accountNumber LIKE '102%' (ZKB, PostFinance, UBS, ...).",
|
||||
),
|
||||
Entity(
|
||||
name="CashAccount",
|
||||
pythonClass="TrusteeDataAccount",
|
||||
semanticType=SemanticType.ACCOUNT,
|
||||
parentEntity="Account",
|
||||
description="Account subgroup with accountNumber LIKE '100%' (Hauptkasse, Nebenkassen).",
|
||||
),
|
||||
Entity(
|
||||
name="AccountBalance",
|
||||
pythonClass="TrusteeDataAccountBalance",
|
||||
semanticType=SemanticType.BALANCE_SNAPSHOT,
|
||||
description=(
|
||||
"Period-bucketed snapshot: one row per (account, year, month). "
|
||||
"closingBalance is THE balance at end of period -- already aggregated."
|
||||
),
|
||||
invariants=[
|
||||
Invariant(description="periodMonth=0 means annual total of periodYear (use for 'per 31.12.YYYY')."),
|
||||
Invariant(description="periodMonth in 1..12 means month-end snapshot."),
|
||||
Invariant(description="closingBalance is the balance at period end; openingBalance at period start."),
|
||||
Invariant(description="debitTotal/creditTotal are turnovers for the period, NOT balances."),
|
||||
],
|
||||
),
|
||||
Entity(
|
||||
name="JournalEntry",
|
||||
pythonClass="TrusteeDataJournalEntry",
|
||||
semanticType=SemanticType.TRANSACTION,
|
||||
description="One booking header (Beleg). Has a bookingDate (unix seconds float) and totalAmount.",
|
||||
invariants=[
|
||||
Invariant(description="bookingDate is a UTC unix-seconds float; never compare against ISO strings."),
|
||||
],
|
||||
),
|
||||
Entity(
|
||||
name="JournalLine",
|
||||
pythonClass="TrusteeDataJournalLine",
|
||||
semanticType=SemanticType.TRANSACTION,
|
||||
description="One booking line of a JournalEntry. Each line debits or credits exactly one account.",
|
||||
invariants=[
|
||||
Invariant(description="Per line either debitAmount > 0 (Soll) or creditAmount > 0 (Haben), not both."),
|
||||
],
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Relations
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_RELATIONS = [
|
||||
Relation(fromEntity="AccountBalance", toEntity="Account", cardinality=Cardinality.MANY_TO_ONE, via="accountNumber"),
|
||||
Relation(fromEntity="JournalLine", toEntity="JournalEntry", cardinality=Cardinality.MANY_TO_ONE, via="journalEntryId"),
|
||||
Relation(fromEntity="JournalLine", toEntity="Account", cardinality=Cardinality.MANY_TO_ONE, via="accountNumber"),
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constraints (validator-enforced)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CONSTRAINTS = [
|
||||
# closingBalance is the single biggest hallucination magnet -- it's a
|
||||
# balance per period, summing it across periods or accounts is meaningless.
|
||||
Constraint(
|
||||
appliesTo="TrusteeDataAccountBalance.closingBalance",
|
||||
rule=ConstraintRule.NEVER_AGGREGATE,
|
||||
message=(
|
||||
"closingBalance is per-period already; query with periodYear+periodMonth, never SUM/AVG it."
|
||||
),
|
||||
),
|
||||
Constraint(
|
||||
appliesTo="TrusteeDataAccountBalance.openingBalance",
|
||||
rule=ConstraintRule.NEVER_AGGREGATE,
|
||||
message="openingBalance is already a balance per period; do not SUM/AVG it across rows.",
|
||||
),
|
||||
Constraint(
|
||||
appliesTo="TrusteeDataAccountBalance.debitTotal",
|
||||
rule=ConstraintRule.NEVER_AGGREGATE,
|
||||
message=(
|
||||
"debitTotal is the period's debit TURNOVER; do not SUM it without an explicit period filter."
|
||||
),
|
||||
),
|
||||
Constraint(
|
||||
appliesTo="TrusteeDataAccountBalance.creditTotal",
|
||||
rule=ConstraintRule.NEVER_AGGREGATE,
|
||||
message="creditTotal is a per-period turnover; do not SUM it across periods without an explicit period filter.",
|
||||
),
|
||||
# AccountBalance queries without a period filter are almost always wrong --
|
||||
# they conflate annual and monthly snapshots. Phase 2 (REQUIRES_FILTER_ON)
|
||||
# is wired through to the validator in a later iteration; for now this
|
||||
# rule is rendered into the prompt compiler so the LLM sees it explicitly.
|
||||
Constraint(
|
||||
appliesTo="TrusteeDataAccountBalance",
|
||||
rule=ConstraintRule.REQUIRES_FILTER_ON,
|
||||
message=(
|
||||
"Always filter on periodYear AND periodMonth (use periodMonth=0 for end-of-year)."
|
||||
),
|
||||
params={"requiredFields": ["periodYear", "periodMonth"]},
|
||||
),
|
||||
Constraint(
|
||||
appliesTo="TrusteeDataAccountBalance",
|
||||
rule=ConstraintRule.PREFERRED_TABLE_FOR_INTENT,
|
||||
message="For 'Saldo per <date>' and 'Stand <year>' questions, prefer AccountBalance over JournalLine.",
|
||||
params={"intents": ["BANK_BALANCE_AT_DATE", "BALANCE_AT_YEAR_END"]},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Canonical query patterns (worked examples for the LLM)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CANONICAL_PATTERNS = [
|
||||
CanonicalQueryPattern(
|
||||
intent="BANK_BALANCE_AT_DATE",
|
||||
description="Saldo eines Bankkontos per Jahresende.",
|
||||
pattern={
|
||||
"tool": "queryTable",
|
||||
"tableName": "TrusteeDataAccountBalance",
|
||||
"filters": [
|
||||
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
|
||||
{"field": "periodYear", "op": "=", "value": "<year>"},
|
||||
{"field": "periodMonth", "op": "=", "value": 0},
|
||||
],
|
||||
"fields": ["closingBalance", "currency"],
|
||||
},
|
||||
),
|
||||
CanonicalQueryPattern(
|
||||
intent="BANK_GROUP_TOTAL_AT_DATE",
|
||||
description="Summe einer Kontogruppe (z. B. alle Bankkonten 102%) per Jahresende.",
|
||||
pattern={
|
||||
"tool": "queryTable",
|
||||
"tableName": "TrusteeDataAccountBalance",
|
||||
"filters": [
|
||||
{"field": "accountNumber", "op": "LIKE", "value": "<prefix>%"},
|
||||
{"field": "periodYear", "op": "=", "value": "<year>"},
|
||||
{"field": "periodMonth", "op": "=", "value": 0},
|
||||
],
|
||||
"fields": ["accountNumber", "closingBalance", "currency"],
|
||||
"_postProcessing": "Sum closingBalance values in your final answer; do NOT SUM via aggregateTable.",
|
||||
},
|
||||
),
|
||||
CanonicalQueryPattern(
|
||||
intent="BALANCE_HISTORY_PER_YEAR",
|
||||
description="Saldo-Verlauf eines Kontos ueber mehrere Jahre.",
|
||||
pattern={
|
||||
"tool": "queryTable",
|
||||
"tableName": "TrusteeDataAccountBalance",
|
||||
"filters": [
|
||||
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
|
||||
{"field": "periodMonth", "op": "=", "value": 0},
|
||||
],
|
||||
"fields": ["periodYear", "closingBalance", "currency"],
|
||||
"orderBy": "periodYear",
|
||||
},
|
||||
),
|
||||
CanonicalQueryPattern(
|
||||
intent="MONTHLY_BALANCE_SNAPSHOT",
|
||||
description="Saldo per Ende eines bestimmten Monats.",
|
||||
pattern={
|
||||
"tool": "queryTable",
|
||||
"tableName": "TrusteeDataAccountBalance",
|
||||
"filters": [
|
||||
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
|
||||
{"field": "periodYear", "op": "=", "value": "<year>"},
|
||||
{"field": "periodMonth", "op": "=", "value": "<month 1..12>"},
|
||||
],
|
||||
"fields": ["closingBalance", "currency"],
|
||||
},
|
||||
),
|
||||
CanonicalQueryPattern(
|
||||
intent="ACCOUNT_LIST_BY_TYPE_OR_PREFIX",
|
||||
description="Welche Konten gehoeren zu einer Gruppe (Typ oder Nummern-Prefix)?",
|
||||
pattern={
|
||||
"tool": "queryTable",
|
||||
"tableName": "TrusteeDataAccount",
|
||||
"filters": [
|
||||
{"field": "accountNumber", "op": "LIKE", "value": "<prefix>%"},
|
||||
],
|
||||
"fields": ["accountNumber", "label", "accountType"],
|
||||
},
|
||||
),
|
||||
CanonicalQueryPattern(
|
||||
intent="JOURNAL_SUM_AT_ACCOUNT",
|
||||
description="Summe der Soll- oder Haben-Buchungen auf einem Konto.",
|
||||
pattern={
|
||||
"tool": "aggregateTable",
|
||||
"tableName": "TrusteeDataJournalLine",
|
||||
"aggregate": "SUM",
|
||||
"field": "debitAmount",
|
||||
"filters": [
|
||||
{"field": "accountNumber", "op": "=", "value": "<accountNumber>"},
|
||||
],
|
||||
},
|
||||
),
|
||||
CanonicalQueryPattern(
|
||||
intent="COUNT_ROWS",
|
||||
description="Anzahl Buchungen / Buchungszeilen / Konten.",
|
||||
pattern={
|
||||
"tool": "aggregateTable",
|
||||
"tableName": "<table>",
|
||||
"aggregate": "COUNT",
|
||||
"field": "id",
|
||||
},
|
||||
),
|
||||
CanonicalQueryPattern(
|
||||
intent="JOURNAL_LINES_BY_AMOUNT",
|
||||
description="Buchungszeilen mit einem Betrag groesser/kleiner als einer Schwelle.",
|
||||
pattern={
|
||||
"tool": "queryTable",
|
||||
"tableName": "TrusteeDataJournalLine",
|
||||
"filters": [
|
||||
{"field": "debitAmount", "op": ">", "value": "<amount>"},
|
||||
],
|
||||
"fields": ["accountNumber", "debitAmount", "description"],
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
_TRUSTEE_ONTOLOGY = OntologyDescriptor(
|
||||
featureCode="trustee",
|
||||
entities=_ENTITIES,
|
||||
relations=_RELATIONS,
|
||||
constraints=_CONSTRAINTS,
|
||||
canonicalPatterns=_CANONICAL_PATTERNS,
|
||||
)
|
||||
|
||||
|
||||
def getTrusteeOntology() -> OntologyDescriptor:
|
||||
"""Public accessor for the trustee ontology.
|
||||
|
||||
Cached as a module-level singleton -- the descriptor is immutable and
|
||||
has no per-call state.
|
||||
"""
|
||||
return _TRUSTEE_ONTOLOGY
|
||||
|
|
@ -33,11 +33,6 @@ UI_OBJECTS = [
|
|||
"label": t("Einstellungen", context="UI"),
|
||||
"meta": {"area": "settings"}
|
||||
},
|
||||
{
|
||||
"objectKey": "ui.feature.workspace.rag-insights",
|
||||
"label": t("Wissens-Insights", context="UI"),
|
||||
"meta": {"area": "rag-insights"},
|
||||
},
|
||||
]
|
||||
|
||||
RESOURCE_OBJECTS = [
|
||||
|
|
@ -86,7 +81,6 @@ TEMPLATE_ROLES = [
|
|||
{"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
|
||||
{"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
|
||||
{"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
|
||||
{"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True},
|
||||
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "n", "update": "n", "delete": "n"},
|
||||
]
|
||||
},
|
||||
|
|
@ -97,7 +91,6 @@ TEMPLATE_ROLES = [
|
|||
{"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
|
||||
{"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
|
||||
{"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
|
||||
{"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True},
|
||||
{"context": "RESOURCE", "item": "resource.feature.workspace.start", "view": True},
|
||||
{"context": "RESOURCE", "item": "resource.feature.workspace.stop", "view": True},
|
||||
{"context": "RESOURCE", "item": "resource.feature.workspace.files", "view": True},
|
||||
|
|
|
|||
|
|
@ -2192,49 +2192,4 @@ async def putWorkspaceUserSettings(
|
|||
|
||||
# =========================================================================
|
||||
# RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
|
||||
# =========================================================================
|
||||
|
||||
def _collectWorkspaceFileIdsForStats(instanceId: str, mandateId: Optional[str]) -> List[str]:
|
||||
"""All FileItem ids for this feature instance (any user). Knowledge rows are often stored
|
||||
without featureInstanceId; we correlate by file id from the Management DB."""
|
||||
from modules.datamodels.datamodelFiles import FileItem
|
||||
from modules.interfaces.interfaceDbManagement import ComponentObjects
|
||||
|
||||
co = ComponentObjects()
|
||||
rows = co.db.getRecordset(FileItem, recordFilter={"featureInstanceId": instanceId})
|
||||
out: List[str] = []
|
||||
m = str(mandateId) if mandateId else ""
|
||||
for r in rows or []:
|
||||
rid = r.get("id") if isinstance(r, dict) else getattr(r, "id", None)
|
||||
if not rid:
|
||||
continue
|
||||
if m:
|
||||
mid = r.get("mandateId") if isinstance(r, dict) else getattr(r, "mandateId", "") or ""
|
||||
if mid and mid != m:
|
||||
continue
|
||||
out.append(str(rid))
|
||||
return out
|
||||
|
||||
|
||||
@router.get("/{instanceId}/rag-statistics")
|
||||
@limiter.limit("60/minute")
|
||||
async def getRagStatistics(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
days: int = Query(90, ge=7, le=365, description="Timeline window in days"),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Aggregated, non-identifying knowledge-store metrics for this workspace instance."""
|
||||
mandateId, _instanceConfig = _validateInstanceAccess(instanceId, context)
|
||||
workspaceFileIds = _collectWorkspaceFileIdsForStats(instanceId, mandateId)
|
||||
kdb = getKnowledgeInterface(context.user)
|
||||
stats = kdb.getRagStatisticsForInstance(
|
||||
featureInstanceId=instanceId,
|
||||
mandateId=str(mandateId) if mandateId else "",
|
||||
timelineDays=days,
|
||||
workspaceFileIds=workspaceFileIds,
|
||||
)
|
||||
if isinstance(stats, dict):
|
||||
stats.setdefault("scope", {})
|
||||
stats["scope"]["workspaceFileIdsResolved"] = len(workspaceFileIds)
|
||||
return JSONResponse(stats)
|
||||
|
|
|
|||
|
|
@ -133,6 +133,60 @@ class KnowledgeObjects:
|
|||
|
||||
return {"indexRows": indexCount, "chunks": chunkCount}
|
||||
|
||||
def deleteFileContentIndexByDataSource(self, dataSourceId: str) -> Dict[str, int]:
|
||||
"""Delete all FileContentIndex rows whose provenance.dataSourceId matches.
|
||||
|
||||
Used when a user disables ragIndexEnabled on a DataSource to purge
|
||||
only those chunks that were ingested from that specific tree element.
|
||||
"""
|
||||
if not dataSourceId:
|
||||
return {"indexRows": 0, "chunks": 0}
|
||||
|
||||
allRows = self.db.getRecordset(FileContentIndex)
|
||||
matchedRows = []
|
||||
for row in allRows:
|
||||
prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None)
|
||||
if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId:
|
||||
matchedRows.append(row)
|
||||
|
||||
mandateIds: set = set()
|
||||
chunkCount = 0
|
||||
indexCount = 0
|
||||
for row in matchedRows:
|
||||
fid = row.get("id") if isinstance(row, dict) else getattr(row, "id", None)
|
||||
mid = row.get("mandateId") if isinstance(row, dict) else getattr(row, "mandateId", "")
|
||||
if not fid:
|
||||
continue
|
||||
chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fid})
|
||||
for chunk in chunks:
|
||||
if self.db.recordDelete(ContentChunk, chunk["id"]):
|
||||
chunkCount += 1
|
||||
if self.db.recordDelete(FileContentIndex, fid):
|
||||
indexCount += 1
|
||||
if mid:
|
||||
mandateIds.add(str(mid))
|
||||
|
||||
for mid in mandateIds:
|
||||
try:
|
||||
from modules.interfaces.interfaceDbBilling import _getRootInterface
|
||||
_getRootInterface().reconcileMandateStorageBilling(mid)
|
||||
except Exception as ex:
|
||||
logger.warning("reconcileMandateStorageBilling after datasource purge failed: %s", ex)
|
||||
|
||||
return {"indexRows": indexCount, "chunks": chunkCount}
|
||||
|
||||
def listFileContentIndexByDataSource(self, dataSourceId: str) -> List[Dict[str, Any]]:
|
||||
"""List all FileContentIndex rows whose provenance.dataSourceId matches."""
|
||||
if not dataSourceId:
|
||||
return []
|
||||
allRows = self.db.getRecordset(FileContentIndex)
|
||||
out = []
|
||||
for row in allRows:
|
||||
prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None)
|
||||
if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId:
|
||||
out.append(dict(row) if not isinstance(row, dict) else row)
|
||||
return out
|
||||
|
||||
def deleteFileContentIndex(self, fileId: str) -> bool:
|
||||
"""Delete a FileContentIndex and all associated ContentChunks."""
|
||||
existing = self.getFileContentIndex(fileId)
|
||||
|
|
|
|||
|
|
@ -1274,17 +1274,20 @@ class ComponentObjects:
|
|||
if getattr(permissions, "update", None) != AccessLevel.ALL:
|
||||
raise PermissionError("Setting global scope requires ALL permission")
|
||||
|
||||
self.db.recordModify(FileFolder, folderId, {"scope": scope})
|
||||
allFolderIds = self._collectChildFolderIds(folderId)
|
||||
for fid in allFolderIds:
|
||||
self.db.recordModify(FileFolder, fid, {"scope": scope})
|
||||
|
||||
filesUpdated = 0
|
||||
if cascadeToFiles:
|
||||
items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId})
|
||||
for item in items:
|
||||
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
|
||||
if owner == self.userId:
|
||||
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
|
||||
self.db.recordModify(FileItem, iid, {"scope": scope})
|
||||
filesUpdated += 1
|
||||
for fid in allFolderIds:
|
||||
items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid})
|
||||
for item in items:
|
||||
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
|
||||
if owner == self.userId:
|
||||
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
|
||||
self.db.recordModify(FileItem, iid, {"scope": scope})
|
||||
filesUpdated += 1
|
||||
|
||||
return {"folderId": folderId, "scope": scope, "filesUpdated": filesUpdated}
|
||||
|
||||
|
|
@ -1294,16 +1297,19 @@ class ComponentObjects:
|
|||
raise FileNotFoundError(f"Folder {folderId} not found")
|
||||
self._requireFolderWriteAccess(folder, folderId, "update")
|
||||
|
||||
self.db.recordModify(FileFolder, folderId, {"neutralize": neutralize})
|
||||
allFolderIds = self._collectChildFolderIds(folderId)
|
||||
for fid in allFolderIds:
|
||||
self.db.recordModify(FileFolder, fid, {"neutralize": neutralize})
|
||||
|
||||
items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId})
|
||||
filesUpdated = 0
|
||||
for item in items:
|
||||
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
|
||||
if owner == self.userId:
|
||||
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
|
||||
self.db.recordModify(FileItem, iid, {"neutralize": neutralize})
|
||||
filesUpdated += 1
|
||||
for fid in allFolderIds:
|
||||
items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid})
|
||||
for item in items:
|
||||
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
|
||||
if owner == self.userId:
|
||||
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
|
||||
self.db.recordModify(FileItem, iid, {"neutralize": neutralize})
|
||||
filesUpdated += 1
|
||||
|
||||
return {"folderId": folderId, "neutralize": neutralize, "filesUpdated": filesUpdated}
|
||||
|
||||
|
|
|
|||
|
|
@ -69,7 +69,10 @@ class VoiceObjects:
|
|||
sampleRate: int = None, channels: int = None,
|
||||
skipFallbacks: bool = False,
|
||||
phraseHints: list = None,
|
||||
alternativeLanguages: list = None) -> Dict[str, Any]:
|
||||
alternativeLanguages: list = None,
|
||||
model: str = "latest_long",
|
||||
lightweight: bool = False,
|
||||
audioFormat: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert speech to text using Google Cloud Speech-to-Text API.
|
||||
|
||||
|
|
@ -81,6 +84,9 @@ class VoiceObjects:
|
|||
skipFallbacks: If True, skip fallback attempts (use when audio format is known)
|
||||
phraseHints: Optional list of phrases to boost recognition (names, terms)
|
||||
alternativeLanguages: Optional list of additional language codes for multi-language
|
||||
model: Google STT model (e.g. latest_long, latest_short)
|
||||
lightweight: If True, omit word-level features and enhanced model
|
||||
audioFormat: If set (webm_opus, linear16, ...), skip format auto-detection
|
||||
|
||||
Returns:
|
||||
Dict containing transcribed text, confidence, and metadata
|
||||
|
|
@ -97,6 +103,9 @@ class VoiceObjects:
|
|||
skipFallbacks=skipFallbacks,
|
||||
phraseHints=phraseHints,
|
||||
alternativeLanguages=alternativeLanguages,
|
||||
model=model,
|
||||
lightweight=lightweight,
|
||||
audioFormat=audioFormat,
|
||||
)
|
||||
|
||||
if result["success"]:
|
||||
|
|
@ -120,13 +129,23 @@ class VoiceObjects:
|
|||
audioQueue: asyncio.Queue,
|
||||
language: str = "de-DE",
|
||||
phraseHints: Optional[list] = None,
|
||||
model: str = "latest_long",
|
||||
lightweight: bool = False,
|
||||
singleUtterance: bool = False,
|
||||
) -> AsyncGenerator[Dict[str, Any], None]:
|
||||
"""
|
||||
Stream audio to Google Streaming STT and yield interim/final results.
|
||||
Billing is recorded for each final result.
|
||||
"""
|
||||
connector = self._getGoogleSpeechConnector()
|
||||
async for event in connector.streamingRecognize(audioQueue, language, phraseHints):
|
||||
async for event in connector.streamingRecognize(
|
||||
audioQueue,
|
||||
language,
|
||||
phraseHints,
|
||||
model=model,
|
||||
lightweight=lightweight,
|
||||
singleUtterance=singleUtterance,
|
||||
):
|
||||
if event.get("isFinal") and self.billingCallback:
|
||||
durationSec = event.get("audioDurationSec", 0)
|
||||
priceCHF = connector.calculateSttCostCHF(durationSec)
|
||||
|
|
|
|||
217
modules/routes/routeAdminSttBenchmark.py
Normal file
217
modules/routes/routeAdminSttBenchmark.py
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""STT Benchmark route — compare Speech-to-Text v1 (latest_long) vs v2 (Chirp 2).
|
||||
|
||||
Sysadmin-only page for evaluating STT model quality and latency.
|
||||
"""
|
||||
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Depends, Request, UploadFile, File, Form
|
||||
from modules.auth import limiter, getCurrentUser
|
||||
from modules.datamodels.datamodelUam import User
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/api/admin/stt-benchmark",
|
||||
tags=["Admin STT Benchmark"],
|
||||
responses={401: {"description": "Unauthorized"}, 403: {"description": "Forbidden"}},
|
||||
)
|
||||
|
||||
|
||||
def _requireSysAdmin(currentUser: User = Depends(getCurrentUser)) -> User:
|
||||
if not getattr(currentUser, "isSysAdmin", False) and not getattr(currentUser, "isPlatformAdmin", False):
|
||||
raise HTTPException(status_code=403, detail="SysAdmin required")
|
||||
return currentUser
|
||||
|
||||
|
||||
def _getCredentials():
|
||||
apiKey = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
|
||||
if not apiKey or apiKey.startswith("YOUR_"):
|
||||
raise HTTPException(status_code=500, detail="Google Speech API key not configured")
|
||||
from google.oauth2 import service_account
|
||||
return service_account.Credentials.from_service_account_info(json.loads(apiKey))
|
||||
|
||||
|
||||
def _runV1(audioBytes: bytes, language: str, model: str) -> Dict[str, Any]:
|
||||
"""Run Speech-to-Text v1 recognition."""
|
||||
from google.cloud import speech
|
||||
credentials = _getCredentials()
|
||||
client = speech.SpeechClient(credentials=credentials)
|
||||
|
||||
config = speech.RecognitionConfig(
|
||||
encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
|
||||
language_code=language,
|
||||
model=model,
|
||||
enable_automatic_punctuation=True,
|
||||
enable_word_time_offsets=True,
|
||||
enable_word_confidence=True,
|
||||
max_alternatives=3,
|
||||
use_enhanced=True,
|
||||
)
|
||||
audio = speech.RecognitionAudio(content=audioBytes)
|
||||
|
||||
t0 = time.perf_counter()
|
||||
response = client.recognize(config=config, audio=audio)
|
||||
elapsed = time.perf_counter() - t0
|
||||
|
||||
results = []
|
||||
for r in response.results:
|
||||
for alt in r.alternatives:
|
||||
results.append({
|
||||
"transcript": alt.transcript,
|
||||
"confidence": round(alt.confidence, 4),
|
||||
"words": len(alt.words) if alt.words else 0,
|
||||
})
|
||||
|
||||
return {
|
||||
"api": "v1",
|
||||
"model": model,
|
||||
"latencyMs": round(elapsed * 1000, 1),
|
||||
"results": results,
|
||||
"resultCount": len(response.results),
|
||||
}
|
||||
|
||||
|
||||
def _runV2(audioBytes: bytes, language: str, model: str, location: str) -> Dict[str, Any]:
|
||||
"""Run Speech-to-Text v2 recognition (Chirp 2)."""
|
||||
from google.cloud.speech_v2 import SpeechClient
|
||||
from google.cloud.speech_v2.types import cloud_speech
|
||||
|
||||
credentials = _getCredentials()
|
||||
credInfo = json.loads(APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET"))
|
||||
projectId = credInfo.get("project_id", "")
|
||||
|
||||
client = SpeechClient(
|
||||
credentials=credentials,
|
||||
client_options={"api_endpoint": f"{location}-speech.googleapis.com"},
|
||||
)
|
||||
|
||||
config = cloud_speech.RecognitionConfig(
|
||||
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
|
||||
language_codes=[language],
|
||||
model=model,
|
||||
features=cloud_speech.RecognitionFeatures(
|
||||
enable_automatic_punctuation=True,
|
||||
enable_word_time_offsets=True,
|
||||
enable_word_confidence=True,
|
||||
),
|
||||
)
|
||||
|
||||
recognizer = f"projects/{projectId}/locations/{location}/recognizers/_"
|
||||
|
||||
request = cloud_speech.RecognizeRequest(
|
||||
recognizer=recognizer,
|
||||
config=config,
|
||||
content=audioBytes,
|
||||
)
|
||||
|
||||
t0 = time.perf_counter()
|
||||
response = client.recognize(request=request)
|
||||
elapsed = time.perf_counter() - t0
|
||||
|
||||
results = []
|
||||
for r in response.results:
|
||||
for alt in r.alternatives:
|
||||
results.append({
|
||||
"transcript": alt.transcript,
|
||||
"confidence": round(alt.confidence, 4),
|
||||
"words": len(alt.words) if alt.words else 0,
|
||||
})
|
||||
|
||||
return {
|
||||
"api": "v2",
|
||||
"model": model,
|
||||
"location": location,
|
||||
"latencyMs": round(elapsed * 1000, 1),
|
||||
"results": results,
|
||||
"resultCount": len(getattr(response, "results", [])),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/run")
|
||||
@limiter.limit("10/minute")
|
||||
async def runBenchmark(
|
||||
request: Request,
|
||||
file: UploadFile = File(...),
|
||||
language: str = Form(default="de-DE"),
|
||||
v1Model: str = Form(default="latest_long"),
|
||||
v2Model: str = Form(default="chirp_2"),
|
||||
v2Location: str = Form(default="europe-west4"),
|
||||
currentUser: User = Depends(_requireSysAdmin),
|
||||
) -> Dict[str, Any]:
|
||||
"""Upload audio and compare v1 vs v2 STT results."""
|
||||
audioBytes = await file.read()
|
||||
if len(audioBytes) > 10 * 1024 * 1024:
|
||||
raise HTTPException(status_code=400, detail="Audio file too large (max 10 MB)")
|
||||
if len(audioBytes) < 100:
|
||||
raise HTTPException(status_code=400, detail="Audio file too small")
|
||||
|
||||
logger.info("STT benchmark: %s, %d bytes, language=%s, v1=%s, v2=%s@%s",
|
||||
file.filename, len(audioBytes), language, v1Model, v2Model, v2Location)
|
||||
|
||||
v1Result = None
|
||||
v1Error = None
|
||||
try:
|
||||
v1Result = _runV1(audioBytes, language, v1Model)
|
||||
except Exception as e:
|
||||
v1Error = str(e)
|
||||
logger.warning("STT v1 benchmark failed: %s", e)
|
||||
|
||||
v2Result = None
|
||||
v2Error = None
|
||||
try:
|
||||
v2Result = _runV2(audioBytes, language, v2Model, v2Location)
|
||||
except Exception as e:
|
||||
v2Error = str(e)
|
||||
logger.warning("STT v2 benchmark failed: %s", e)
|
||||
|
||||
return {
|
||||
"filename": file.filename,
|
||||
"fileSizeBytes": len(audioBytes),
|
||||
"language": language,
|
||||
"v1": v1Result or {"error": v1Error},
|
||||
"v2": v2Result or {"error": v2Error},
|
||||
}
|
||||
|
||||
|
||||
@router.get("/models")
|
||||
@limiter.limit("30/minute")
|
||||
async def getAvailableModels(
|
||||
request: Request,
|
||||
currentUser: User = Depends(_requireSysAdmin),
|
||||
) -> Dict[str, Any]:
|
||||
"""Return available STT models for the benchmark UI."""
|
||||
return {
|
||||
"v1Models": [
|
||||
{"value": "latest_long", "label": "latest_long (default)"},
|
||||
{"value": "latest_short", "label": "latest_short"},
|
||||
{"value": "phone_call", "label": "phone_call"},
|
||||
{"value": "video", "label": "video"},
|
||||
{"value": "command_and_search", "label": "command_and_search"},
|
||||
],
|
||||
"v2Models": [
|
||||
{"value": "chirp_2", "label": "Chirp 2 (recommended)"},
|
||||
{"value": "chirp", "label": "Chirp (original)"},
|
||||
{"value": "long", "label": "long"},
|
||||
{"value": "short", "label": "short"},
|
||||
],
|
||||
"locations": [
|
||||
{"value": "europe-west4", "label": "Europe West (NL)"},
|
||||
{"value": "us-central1", "label": "US Central"},
|
||||
{"value": "asia-southeast1", "label": "Asia Southeast"},
|
||||
],
|
||||
"languages": [
|
||||
{"value": "de-DE", "label": "Deutsch (DE)"},
|
||||
{"value": "de-CH", "label": "Deutsch (CH)"},
|
||||
{"value": "en-US", "label": "English (US)"},
|
||||
{"value": "en-GB", "label": "English (GB)"},
|
||||
{"value": "fr-FR", "label": "Francais (FR)"},
|
||||
{"value": "it-IT", "label": "Italiano (IT)"},
|
||||
],
|
||||
}
|
||||
|
|
@ -1986,10 +1986,10 @@ def getUserViewTransactions(
|
|||
if not pagination:
|
||||
raise HTTPException(status_code=400, detail="pagination required for groupSummary")
|
||||
import json as _json
|
||||
from collections import defaultdict
|
||||
from modules.interfaces.interfaceDbApp import getInterface as getAppInterface
|
||||
from modules.routes.routeHelpers import (
|
||||
applyViewToParams,
|
||||
build_group_summary_groups,
|
||||
effective_group_by_levels,
|
||||
resolveView,
|
||||
)
|
||||
|
|
@ -2018,28 +2018,7 @@ def getUserViewTransactions(
|
|||
summary_params,
|
||||
ctx.user,
|
||||
)
|
||||
counts: Dict[str, int] = defaultdict(int)
|
||||
labels: Dict[str, str] = {}
|
||||
null_key = "\x00NULL"
|
||||
for item in all_rows:
|
||||
raw = item.get(field)
|
||||
if raw is None or raw == "":
|
||||
nk = null_key
|
||||
labels[nk] = null_label
|
||||
else:
|
||||
nk = str(raw)
|
||||
if nk not in labels:
|
||||
labels[nk] = nk
|
||||
counts[nk] += 1
|
||||
groups_out: List[Dict[str, Any]] = []
|
||||
for nk in sorted(counts.keys(), key=lambda x: (x == null_key, labels.get(x, x).lower())):
|
||||
groups_out.append(
|
||||
{
|
||||
"value": None if nk == null_key else nk,
|
||||
"label": labels.get(nk, nk),
|
||||
"totalCount": counts[nk],
|
||||
}
|
||||
)
|
||||
groups_out = build_group_summary_groups(all_rows, field, null_label, groupByLevels=levels)
|
||||
return JSONResponse(content={"groups": groups_out})
|
||||
|
||||
paginationParams = None
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ def get_auth_authority_options(
|
|||
# ============================================================================
|
||||
|
||||
@router.get("/")
|
||||
@limiter.limit("30/minute")
|
||||
@limiter.limit("60/minute")
|
||||
async def get_connections(
|
||||
request: Request,
|
||||
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
|
||||
|
|
@ -197,7 +197,9 @@ async def get_connections(
|
|||
"lastChecked": connection.lastChecked,
|
||||
"expiresAt": connection.expiresAt,
|
||||
"tokenStatus": tokenStatus,
|
||||
"tokenExpiresAt": tokenExpiresAt
|
||||
"tokenExpiresAt": tokenExpiresAt,
|
||||
"knowledgeIngestionEnabled": getattr(connection, "knowledgeIngestionEnabled", False),
|
||||
"knowledgePreferences": getattr(connection, "knowledgePreferences", None) or {},
|
||||
})
|
||||
return items
|
||||
|
||||
|
|
@ -264,7 +266,7 @@ async def get_connections(
|
|||
})
|
||||
enrichRowsWithFkLabels(enhanced_connections_dict, UserConnection)
|
||||
filtered = apply_strategy_b_filters_and_sort(enhanced_connections_dict, paginationParams, currentUser)
|
||||
groups_out = build_group_summary_groups(filtered, field, null_label)
|
||||
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
|
||||
return JSONResponse(content={"groups": groups_out})
|
||||
|
||||
try:
|
||||
|
|
@ -724,4 +726,161 @@ def delete_connection(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to delete connection: {str(e)}"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Knowledge Consent & Control Endpoints
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _findOwnConnection(interface, userId: str, connectionId: str):
|
||||
"""Find a connection owned by the user. Returns None if not found."""
|
||||
connections = interface.getUserConnections(userId)
|
||||
for conn in connections:
|
||||
if conn.id == connectionId:
|
||||
return conn
|
||||
return None
|
||||
|
||||
|
||||
@router.patch("/{connectionId}/knowledge-consent")
|
||||
@limiter.limit("10/minute")
|
||||
async def _updateKnowledgeConsent(
|
||||
request: Request,
|
||||
connectionId: str = Path(..., description="Connection ID"),
|
||||
enabled: bool = Body(..., embed=True),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
) -> Dict[str, Any]:
|
||||
"""Master switch: can PowerOn ingest data from this connection into the RAG knowledge store?
|
||||
|
||||
enabled=False: purge ALL chunks for this connection + cancel running jobs.
|
||||
enabled=True: set flag; enqueue bootstrap only if rag-enabled DataSources exist.
|
||||
"""
|
||||
try:
|
||||
interface = getInterface(currentUser)
|
||||
connection = _findOwnConnection(interface, currentUser.id, connectionId)
|
||||
if not connection:
|
||||
raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
|
||||
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
rootIf = getRootInterface()
|
||||
rootIf.db.recordModify(UserConnection, connectionId, {"knowledgeIngestionEnabled": enabled})
|
||||
|
||||
purged = None
|
||||
cancelled = 0
|
||||
bootstrapEnqueued = False
|
||||
|
||||
if not enabled:
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||
purged = getKnowledgeInterface(None).deleteFileContentIndexByConnectionId(connectionId)
|
||||
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection
|
||||
cancelled = cancelJobsByConnection(connectionId)
|
||||
else:
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId, "ragIndexEnabled": True})
|
||||
if dataSources:
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
||||
authority = connection.authority.value if hasattr(connection.authority, "value") else str(connection.authority or "")
|
||||
await startJob(
|
||||
"connection.bootstrap",
|
||||
{"connectionId": connectionId, "authority": authority.lower()},
|
||||
triggeredBy=str(currentUser.id),
|
||||
)
|
||||
bootstrapEnqueued = True
|
||||
|
||||
import json as _json
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
from modules.datamodels.datamodelAudit import AuditCategory
|
||||
audit_logger.logEvent(
|
||||
userId=str(currentUser.id),
|
||||
mandateId=str(getattr(connection, "mandateId", "") or ""),
|
||||
category=AuditCategory.PERMISSION.value,
|
||||
action="knowledge_consent_changed",
|
||||
details=_json.dumps({"connectionId": connectionId, "enabled": enabled}),
|
||||
)
|
||||
|
||||
logger.info("Knowledge consent %s for connection %s by user %s",
|
||||
"enabled" if enabled else "disabled", connectionId, currentUser.id)
|
||||
return {
|
||||
"connectionId": connectionId,
|
||||
"knowledgeIngestionEnabled": enabled,
|
||||
"purged": purged,
|
||||
"cancelledJobs": cancelled,
|
||||
"bootstrapEnqueued": bootstrapEnqueued,
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error updating knowledge consent: %s", e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.patch("/{connectionId}/knowledge-preferences")
|
||||
@limiter.limit("20/minute")
|
||||
def _updateKnowledgePreferences(
|
||||
request: Request,
|
||||
connectionId: str = Path(..., description="Connection ID"),
|
||||
preferences: Dict[str, Any] = Body(..., embed=True),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
) -> Dict[str, Any]:
|
||||
"""Update per-connection knowledge ingestion preferences (mail depth, attachments, etc.)."""
|
||||
_ALLOWED_KEYS = {"mailContentDepth", "mailIndexAttachments", "filesIndexBinaries",
|
||||
"clickupScope", "clickupIndexAttachments", "maxAgeDays"}
|
||||
try:
|
||||
interface = getInterface(currentUser)
|
||||
connection = _findOwnConnection(interface, currentUser.id, connectionId)
|
||||
if not connection:
|
||||
raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
|
||||
|
||||
existing = getattr(connection, "knowledgePreferences", None) or {}
|
||||
cleaned = {k: v for k, v in preferences.items() if k in _ALLOWED_KEYS}
|
||||
merged = {**existing, **cleaned, "schemaVersion": 1}
|
||||
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
getRootInterface().db.recordModify(UserConnection, connectionId, {"knowledgePreferences": merged})
|
||||
|
||||
logger.info("Knowledge preferences updated for connection %s", connectionId)
|
||||
return {"connectionId": connectionId, "knowledgePreferences": merged, "updated": True}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error updating knowledge preferences: %s", e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/{connectionId}/knowledge-stop")
|
||||
@limiter.limit("10/minute")
|
||||
def _stopKnowledgeJobs(
|
||||
request: Request,
|
||||
connectionId: str = Path(..., description="Connection ID"),
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
) -> Dict[str, Any]:
|
||||
"""Cancel all running/pending bootstrap jobs for this connection."""
|
||||
try:
|
||||
interface = getInterface(currentUser)
|
||||
connection = _findOwnConnection(interface, currentUser.id, connectionId)
|
||||
if not connection:
|
||||
raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
|
||||
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection
|
||||
cancelled = cancelJobsByConnection(connectionId)
|
||||
|
||||
import json as _json
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
from modules.datamodels.datamodelAudit import AuditCategory
|
||||
audit_logger.logEvent(
|
||||
userId=str(currentUser.id),
|
||||
mandateId=str(getattr(connection, "mandateId", "") or ""),
|
||||
category=AuditCategory.PERMISSION.value,
|
||||
action="knowledge_jobs_stopped",
|
||||
details=_json.dumps({"connectionId": connectionId, "cancelledCount": cancelled}),
|
||||
)
|
||||
|
||||
logger.info("Stopped %d knowledge jobs for connection %s", cancelled, connectionId)
|
||||
return {"connectionId": connectionId, "cancelled": cancelled}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error stopping knowledge jobs: %s", e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
|
@ -413,7 +413,7 @@ def patch_folder_scope(
|
|||
scope = body.get("scope")
|
||||
if not scope:
|
||||
raise HTTPException(status_code=400, detail="scope is required")
|
||||
cascadeToFiles = body.get("cascadeToFiles", False)
|
||||
cascadeToFiles = body.get("cascadeChildren", body.get("cascadeToFiles", False))
|
||||
managementInterface = interfaceDbManagement.getInterface(
|
||||
currentUser,
|
||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||
|
|
@ -543,7 +543,7 @@ def get_files(
|
|||
FileItem,
|
||||
)
|
||||
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
|
||||
groups_out = build_group_summary_groups(filtered, field, null_label)
|
||||
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
|
||||
return JSONResponse(content={"groups": groups_out})
|
||||
|
||||
if mode == "filterValues":
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ def get_prompts(
|
|||
result if isinstance(result, list) else (result.items if hasattr(result, "items") else [])
|
||||
)
|
||||
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
|
||||
groups_out = build_group_summary_groups(filtered, field, null_label)
|
||||
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
|
||||
return JSONResponse(content={"groups": groups_out})
|
||||
|
||||
if mode == "filterValues":
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize tagging."""
|
||||
"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize/rag-index tagging."""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
|
@ -125,3 +125,69 @@ def _updateNeutralizeFields(
|
|||
except Exception as e:
|
||||
logger.error("Error updating neutralizeFields: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.patch("/{sourceId}/rag-index")
|
||||
@limiter.limit("30/minute")
|
||||
async def _updateDataSourceRagIndex(
|
||||
request: Request,
|
||||
sourceId: str = Path(..., description="ID of the DataSource"),
|
||||
ragIndexEnabled: bool = Body(..., embed=True),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> Dict[str, Any]:
|
||||
"""Toggle RAG indexing for a DataSource.
|
||||
|
||||
true: sets flag + enqueues mini-bootstrap for this DataSource only.
|
||||
false: sets flag + synchronously purges all chunks from this DataSource.
|
||||
|
||||
Must be `async def` so `await startJob(...)` registers `_runJob` in the
|
||||
main event loop. Sync route → worker thread → temporary loop closes
|
||||
before the task runs → job stays stuck forever.
|
||||
"""
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
rootIf = getRootInterface()
|
||||
rec = rootIf.db.getRecord(DataSource, sourceId)
|
||||
if not rec:
|
||||
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
||||
|
||||
rootIf.db.recordModify(DataSource, sourceId, {"ragIndexEnabled": ragIndexEnabled})
|
||||
logger.info("Updated ragIndexEnabled=%s for DataSource %s", ragIndexEnabled, sourceId)
|
||||
|
||||
if ragIndexEnabled:
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
||||
|
||||
connectionId = rec.get("connectionId") or rec.get("connection_id") or ""
|
||||
conn = rootIf.getUserConnectionById(connectionId) if connectionId else None
|
||||
authority = ""
|
||||
if conn:
|
||||
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
|
||||
|
||||
await startJob(
|
||||
"connection.bootstrap",
|
||||
{"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": [sourceId]},
|
||||
triggeredBy=str(context.user.id),
|
||||
)
|
||||
else:
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||
purgeResult = getKnowledgeInterface(None).deleteFileContentIndexByDataSource(sourceId)
|
||||
logger.info("Purged %d index rows / %d chunks for DataSource %s",
|
||||
purgeResult.get("indexRows", 0), purgeResult.get("chunks", 0), sourceId)
|
||||
|
||||
import json
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
from modules.datamodels.datamodelAudit import AuditCategory
|
||||
audit_logger.logEvent(
|
||||
userId=str(context.user.id),
|
||||
mandateId=context.mandateId,
|
||||
category=AuditCategory.PERMISSION.value,
|
||||
action="rag_index_toggled",
|
||||
details=json.dumps({"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled}),
|
||||
)
|
||||
|
||||
return {"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled, "updated": True}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error updating datasource ragIndexEnabled: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
|
|
|||
|
|
@ -825,45 +825,106 @@ def build_group_summary_groups(
|
|||
items: List[Dict[str, Any]],
|
||||
field: str,
|
||||
null_label: str = "—",
|
||||
groupByLevels: List[Dict[str, Any]] | None = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Build {"value", "label", "totalCount"} for mode=groupSummary (single grouping level).
|
||||
Build {"value", "label", "totalCount"} summaries for mode=groupSummary.
|
||||
|
||||
When *groupByLevels* contains more than one level the function produces one
|
||||
entry per unique combination of all level values (flat permutations).
|
||||
``value`` becomes a ``///``-joined composite key and ``label`` the ``/``-joined
|
||||
human-readable label so the frontend can split them back.
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
counts: Dict[str, int] = defaultdict(int)
|
||||
display_by_key: Dict[str, str] = {}
|
||||
null_key = "\x00NULL"
|
||||
label_attr = f"{field}Label"
|
||||
fields: list[dict] = []
|
||||
if groupByLevels and len(groupByLevels) > 1:
|
||||
for lvl in groupByLevels:
|
||||
f = lvl.get("field", "")
|
||||
nl = str(lvl.get("nullLabel") or null_label)
|
||||
if f:
|
||||
fields.append({"field": f, "nullLabel": nl})
|
||||
if not fields:
|
||||
fields = [{"field": field, "nullLabel": null_label}]
|
||||
|
||||
nullKey = "\x00NULL"
|
||||
|
||||
if len(fields) == 1:
|
||||
f = fields[0]["field"]
|
||||
nl = fields[0]["nullLabel"]
|
||||
counts: Dict[str, int] = defaultdict(int)
|
||||
displayByKey: Dict[str, str] = {}
|
||||
labelAttr = f"{f}Label"
|
||||
for item in items:
|
||||
raw = item.get(f)
|
||||
if raw is None or raw == "":
|
||||
nk = nullKey
|
||||
display = nl
|
||||
else:
|
||||
nk = str(raw)
|
||||
display = None
|
||||
lbl = item.get(labelAttr)
|
||||
if lbl is not None and lbl != "":
|
||||
display = str(lbl)
|
||||
if display is None:
|
||||
display = nk
|
||||
counts[nk] += 1
|
||||
if nk not in displayByKey:
|
||||
displayByKey[nk] = display
|
||||
orderedKeys = sorted(
|
||||
counts.keys(),
|
||||
key=lambda x: (x == nullKey, str(displayByKey.get(x, x)).lower()),
|
||||
)
|
||||
return [
|
||||
{
|
||||
"value": None if nk == nullKey else nk,
|
||||
"label": displayByKey.get(nk, nk),
|
||||
"totalCount": counts[nk],
|
||||
}
|
||||
for nk in orderedKeys
|
||||
]
|
||||
|
||||
counts = defaultdict(int)
|
||||
displayByComposite: Dict[str, list] = {}
|
||||
filtersByComposite: Dict[str, dict] = {}
|
||||
for item in items:
|
||||
raw = item.get(field)
|
||||
if raw is None or raw == "":
|
||||
nk = null_key
|
||||
display = null_label
|
||||
else:
|
||||
nk = str(raw)
|
||||
display = None
|
||||
lbl = item.get(label_attr)
|
||||
if lbl is not None and lbl != "":
|
||||
display = str(lbl)
|
||||
if display is None:
|
||||
display = nk
|
||||
counts[nk] += 1
|
||||
if nk not in display_by_key:
|
||||
display_by_key[nk] = display
|
||||
parts: list[str] = []
|
||||
labels: list[str] = []
|
||||
filterMap: dict = {}
|
||||
for fd in fields:
|
||||
f = fd["field"]
|
||||
nl = fd["nullLabel"]
|
||||
labelAttr = f"{f}Label"
|
||||
raw = item.get(f)
|
||||
if raw is None or raw == "":
|
||||
parts.append(nullKey)
|
||||
labels.append(nl)
|
||||
filterMap[f] = None
|
||||
else:
|
||||
parts.append(str(raw))
|
||||
lbl = item.get(labelAttr)
|
||||
labels.append(str(lbl) if lbl not in (None, "") else str(raw))
|
||||
filterMap[f] = str(raw)
|
||||
compositeKey = "///".join(parts)
|
||||
counts[compositeKey] += 1
|
||||
if compositeKey not in displayByComposite:
|
||||
displayByComposite[compositeKey] = labels
|
||||
filtersByComposite[compositeKey] = filterMap
|
||||
|
||||
ordered_keys = sorted(
|
||||
orderedKeys = sorted(
|
||||
counts.keys(),
|
||||
key=lambda x: (x == null_key, str(display_by_key.get(x, x)).lower()),
|
||||
key=lambda x: tuple(
|
||||
(seg == nullKey, seg.lower()) for seg in x.split("///")
|
||||
),
|
||||
)
|
||||
return [
|
||||
{
|
||||
"value": None if nk == null_key else nk,
|
||||
"label": display_by_key.get(nk, nk),
|
||||
"totalCount": counts[nk],
|
||||
"value": ck.replace(nullKey, "__null__") if nullKey in ck else ck,
|
||||
"label": " / ".join(displayByComposite[ck]),
|
||||
"totalCount": counts[ck],
|
||||
"filters": filtersByComposite[ck],
|
||||
}
|
||||
for nk in ordered_keys
|
||||
for ck in orderedKeys
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
302
modules/routes/routeRagInventory.py
Normal file
302
modules/routes/routeRagInventory.py
Normal file
|
|
@ -0,0 +1,302 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""RAG Inventory API — global knowledge-store visibility for users, admins, platform."""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Depends, Request
|
||||
from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext
|
||||
from modules.datamodels.datamodelUam import User
|
||||
from modules.shared.i18nRegistry import apiRouteContext
|
||||
|
||||
routeApiMsg = apiRouteContext("routeRagInventory")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/api/rag/inventory",
|
||||
tags=["RAG Inventory"],
|
||||
responses={
|
||||
401: {"description": "Unauthorized"},
|
||||
403: {"description": "Forbidden"},
|
||||
500: {"description": "Internal server error"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> List[Dict[str, Any]]:
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
||||
|
||||
out = []
|
||||
for conn in connections:
|
||||
connectionId = str(conn.id)
|
||||
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||
|
||||
connIndexRows = knowledgeIf.db.getRecordset(FileContentIndex, recordFilter={"connectionId": connectionId})
|
||||
connChunkTotal = len(connIndexRows)
|
||||
|
||||
chunksByDs: Dict[str, int] = {}
|
||||
unassigned = 0
|
||||
for idx in connIndexRows:
|
||||
struct = (idx.get("structure") if isinstance(idx, dict) else getattr(idx, "structure", None)) or {}
|
||||
ingestion = struct.get("_ingestion") or {} if isinstance(struct, dict) else {}
|
||||
prov = ingestion.get("provenance") or {} if isinstance(ingestion, dict) else {}
|
||||
dsIdRef = prov.get("dataSourceId", "") if isinstance(prov, dict) else ""
|
||||
if dsIdRef:
|
||||
chunksByDs[dsIdRef] = chunksByDs.get(dsIdRef, 0) + 1
|
||||
else:
|
||||
unassigned += 1
|
||||
|
||||
seen: Dict[str, bool] = {}
|
||||
dsItems = []
|
||||
for ds in dataSources:
|
||||
dsId = ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")
|
||||
dsPath = ds.get("path") if isinstance(ds, dict) else getattr(ds, "path", "")
|
||||
if dsPath in seen:
|
||||
continue
|
||||
seen[dsPath] = True
|
||||
dsItems.append({
|
||||
"id": dsId,
|
||||
"label": ds.get("label") if isinstance(ds, dict) else getattr(ds, "label", ""),
|
||||
"path": dsPath,
|
||||
"sourceType": ds.get("sourceType") if isinstance(ds, dict) else getattr(ds, "sourceType", ""),
|
||||
"ragIndexEnabled": ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False),
|
||||
"neutralize": ds.get("neutralize") if isinstance(ds, dict) else getattr(ds, "neutralize", False),
|
||||
"lastIndexed": ds.get("lastIndexed") if isinstance(ds, dict) else getattr(ds, "lastIndexed", None),
|
||||
"chunkCount": chunksByDs.get(dsId, 0),
|
||||
})
|
||||
|
||||
if unassigned > 0 and len(dsItems) > 0:
|
||||
perDs = unassigned // len(dsItems)
|
||||
remainder = unassigned % len(dsItems)
|
||||
for i, item in enumerate(dsItems):
|
||||
item["chunkCount"] += perDs + (1 if i < remainder else 0)
|
||||
|
||||
# Pull a wider window than the previous 5 so the "last successful
|
||||
# sync" is found even if a connection has many recent jobs queued.
|
||||
jobs = jobService.listJobs(jobType="connection.bootstrap", limit=50)
|
||||
connJobs = [j for j in jobs if (j.get("payload") or {}).get("connectionId") == connectionId]
|
||||
runningJobs = [
|
||||
{"jobId": j["id"], "progress": j.get("progress", 0), "progressMessage": j.get("progressMessage", "")}
|
||||
for j in connJobs
|
||||
if j.get("status") in ("PENDING", "RUNNING")
|
||||
]
|
||||
lastError: Optional[Dict[str, Any]] = None
|
||||
lastSuccess: Optional[Dict[str, Any]] = None
|
||||
for j in connJobs:
|
||||
status = j.get("status")
|
||||
if status == "ERROR" and lastError is None:
|
||||
lastError = {
|
||||
"jobId": j["id"],
|
||||
"errorMessage": j.get("errorMessage", ""),
|
||||
"finishedAt": j.get("finishedAt"),
|
||||
}
|
||||
elif status == "SUCCESS" and lastSuccess is None:
|
||||
result = j.get("result") or {}
|
||||
lastSuccess = {
|
||||
"jobId": j["id"],
|
||||
"finishedAt": j.get("finishedAt"),
|
||||
"indexed": result.get("indexed", 0),
|
||||
"skippedDuplicate": result.get("skippedDuplicate", 0),
|
||||
"skippedPolicy": result.get("skippedPolicy", 0),
|
||||
"failed": result.get("failed", 0),
|
||||
"durationMs": result.get("durationMs", 0),
|
||||
}
|
||||
if lastError and lastSuccess:
|
||||
break
|
||||
|
||||
out.append({
|
||||
"id": connectionId,
|
||||
"authority": conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority),
|
||||
"externalEmail": getattr(conn, "externalEmail", ""),
|
||||
"knowledgeIngestionEnabled": getattr(conn, "knowledgeIngestionEnabled", False),
|
||||
"preferences": getattr(conn, "knowledgePreferences", None) or {},
|
||||
"dataSources": dsItems,
|
||||
"totalChunks": connChunkTotal,
|
||||
"runningJobs": runningJobs,
|
||||
"lastError": lastError,
|
||||
"lastSuccess": lastSuccess,
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
@router.get("/me")
|
||||
@limiter.limit("30/minute")
|
||||
def _getInventoryMe(
|
||||
request: Request,
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
) -> Dict[str, Any]:
|
||||
"""Personal RAG inventory: own connections + DataSources + chunk counts."""
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
|
||||
|
||||
rootIf = getRootInterface()
|
||||
knowledgeIf = getKnowledgeInterface(None)
|
||||
connections = rootIf.getUserConnections(currentUser.id)
|
||||
|
||||
items = _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService)
|
||||
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
||||
|
||||
return {"connections": items, "totals": {"chunks": totalChunks}}
|
||||
except Exception as e:
|
||||
logger.error("Error in RAG inventory /me: %s", e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/mandate")
|
||||
@limiter.limit("20/minute")
|
||||
def _getInventoryMandate(
|
||||
request: Request,
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> Dict[str, Any]:
|
||||
"""Mandate-level RAG aggregation (requires mandate membership)."""
|
||||
if not context.mandateId:
|
||||
raise HTTPException(status_code=403, detail=routeApiMsg("Mandate context required"))
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface, aggregateMandateRagTotalBytes
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
|
||||
|
||||
rootIf = getRootInterface()
|
||||
knowledgeIf = getKnowledgeInterface(None)
|
||||
mandateId = str(context.mandateId) if context.mandateId else ""
|
||||
|
||||
from modules.datamodels.datamodelUam import UserConnection
|
||||
allConnections = rootIf.db.getRecordset(UserConnection, recordFilter={"mandateId": mandateId})
|
||||
connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
|
||||
|
||||
items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
|
||||
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
||||
totalBytes = aggregateMandateRagTotalBytes(mandateId)
|
||||
|
||||
return {"connections": items, "totals": {"chunks": totalChunks, "bytes": totalBytes}}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error in RAG inventory /mandate: %s", e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/platform")
|
||||
@limiter.limit("10/minute")
|
||||
def _getInventoryPlatform(
|
||||
request: Request,
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
) -> Dict[str, Any]:
|
||||
"""Platform-wide RAG statistics (sysadmin only)."""
|
||||
if not context.isSysAdmin:
|
||||
raise HTTPException(status_code=403, detail=routeApiMsg("Platform admin required"))
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
|
||||
from modules.datamodels.datamodelUam import UserConnection
|
||||
|
||||
rootIf = getRootInterface()
|
||||
knowledgeIf = getKnowledgeInterface(None)
|
||||
allConnections = rootIf.db.getRecordset(UserConnection)
|
||||
connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
|
||||
|
||||
items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
|
||||
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
||||
|
||||
return {"connections": items, "totals": {"chunks": totalChunks}}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error in RAG inventory /platform: %s", e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/reindex/{connectionId}")
|
||||
@limiter.limit("10/minute")
|
||||
async def _reindexConnection(
|
||||
request: Request,
|
||||
connectionId: str,
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
) -> Dict[str, Any]:
|
||||
"""Re-trigger bootstrap for a connection (re-index all ragIndexEnabled DataSources).
|
||||
|
||||
Submits a new connection.bootstrap job, regardless of previous failures.
|
||||
|
||||
Must be `async def` so `await startJob(...)` registers the `_runJob` task
|
||||
in FastAPI's main event loop. A sync route would land in the worker
|
||||
threadpool and `asyncio.run` would tear down the temporary loop right
|
||||
after `create_task`, leaving the job stuck in PENDING forever.
|
||||
"""
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
|
||||
rootIf = getRootInterface()
|
||||
conn = rootIf.getUserConnectionById(connectionId)
|
||||
if conn is None:
|
||||
raise HTTPException(status_code=404, detail="Connection not found")
|
||||
|
||||
if str(conn.userId) != str(currentUser.id):
|
||||
raise HTTPException(status_code=403, detail="Not your connection")
|
||||
|
||||
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||
ragDs = [ds for ds in dataSources if (ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False))]
|
||||
if not ragDs:
|
||||
return {"status": "skipped", "reason": "no_rag_enabled_datasources"}
|
||||
|
||||
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
|
||||
dsIds = [(ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")) for ds in ragDs]
|
||||
|
||||
jobId = await startJob(
|
||||
"connection.bootstrap",
|
||||
{"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": dsIds},
|
||||
triggeredBy=str(currentUser.id),
|
||||
)
|
||||
|
||||
logger.info("Reindex triggered for connection %s (%d DataSources, jobId=%s)", connectionId, len(dsIds), jobId)
|
||||
return {"status": "queued", "connectionId": connectionId, "dataSourceCount": len(dsIds), "jobId": jobId}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error triggering reindex: %s", e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/jobs")
|
||||
@limiter.limit("60/minute")
|
||||
def _getActiveJobs(
|
||||
request: Request,
|
||||
currentUser: User = Depends(getCurrentUser),
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Active RAG jobs for the current user (used by header badge)."""
|
||||
try:
|
||||
from modules.serviceCenter.services.serviceBackgroundJobs import listJobs
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
|
||||
rootIf = getRootInterface()
|
||||
connections = rootIf.getUserConnections(currentUser.id)
|
||||
connectionMap = {str(c.id): c for c in connections}
|
||||
connectionIds = set(connectionMap.keys())
|
||||
|
||||
jobs = listJobs(jobType="connection.bootstrap", limit=50)
|
||||
active = []
|
||||
for j in jobs:
|
||||
if j.get("status") not in ("PENDING", "RUNNING"):
|
||||
continue
|
||||
payload = j.get("payload") or {}
|
||||
connId = payload.get("connectionId")
|
||||
if connId in connectionIds:
|
||||
conn = connectionMap[connId]
|
||||
active.append({
|
||||
"jobId": j["id"],
|
||||
"connectionId": connId,
|
||||
"connectionLabel": getattr(conn, "displayLabel", None) or getattr(conn, "authority", connId),
|
||||
"jobType": j.get("jobType", "connection.bootstrap"),
|
||||
"progress": j.get("progress", 0),
|
||||
"progressMessage": j.get("progressMessage", ""),
|
||||
})
|
||||
return active
|
||||
except Exception as e:
|
||||
logger.error("Error in RAG inventory /jobs: %s", e, exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
|
@ -155,12 +155,13 @@ async def sttStream(
|
|||
|
||||
Protocol:
|
||||
Client sends JSON:
|
||||
{"type": "open", "language": "de-DE"}
|
||||
{"type": "open", "language": "de-DE", "model": "latest_short", "lightweight": true, "singleUtterance": true}
|
||||
{"type": "audio", "chunk": "<base64>"}
|
||||
{"type": "close"}
|
||||
Server sends JSON:
|
||||
{"type": "interim", "text": "..."}
|
||||
{"type": "final", "text": "...", "confidence": 0.95}
|
||||
{"type": "end_of_single_utterance", "audioDurationSec": 0.0}
|
||||
{"type": "error", "message": "..."}
|
||||
{"type": "closed"}
|
||||
"""
|
||||
|
|
@ -205,7 +206,12 @@ async def sttStream(
|
|||
logger.warning(f"STT billing pre-flight skipped: {e}")
|
||||
|
||||
audioQueue: asyncio.Queue = asyncio.Queue()
|
||||
language = "de-DE"
|
||||
sttOpenOptions: Dict[str, Any] = {
|
||||
"language": "de-DE",
|
||||
"model": "latest_long",
|
||||
"lightweight": False,
|
||||
"singleUtterance": False,
|
||||
}
|
||||
streamingTask: Optional[asyncio.Task] = None
|
||||
voiceInterface: Optional[VoiceObjects] = None
|
||||
|
||||
|
|
@ -233,10 +239,23 @@ async def sttStream(
|
|||
voiceInterface.billingCallback = _billingCb
|
||||
|
||||
try:
|
||||
async for event in voiceInterface.streamingSpeechToText(audioQueue, language):
|
||||
async for event in voiceInterface.streamingSpeechToText(
|
||||
audioQueue,
|
||||
sttOpenOptions["language"],
|
||||
phraseHints=None,
|
||||
model=sttOpenOptions["model"],
|
||||
lightweight=sttOpenOptions["lightweight"],
|
||||
singleUtterance=sttOpenOptions["singleUtterance"],
|
||||
):
|
||||
if event.get("reconnectRequired"):
|
||||
await _sendJson({"type": "reconnect_required"})
|
||||
return
|
||||
if event.get("endOfSingleUtterance"):
|
||||
await _sendJson({
|
||||
"type": "end_of_single_utterance",
|
||||
"audioDurationSec": event.get("audioDurationSec", 0.0),
|
||||
})
|
||||
continue
|
||||
if event.get("isFinal"):
|
||||
if event.get("transcript"):
|
||||
await _sendJson({"type": "final", "text": event["transcript"], "confidence": event.get("confidence", 0.0)})
|
||||
|
|
@ -258,7 +277,10 @@ async def sttStream(
|
|||
msgType = (msg.get("type") or "").strip()
|
||||
|
||||
if msgType == "open":
|
||||
language = msg.get("language") or "de-DE"
|
||||
sttOpenOptions["language"] = msg.get("language") or "de-DE"
|
||||
sttOpenOptions["model"] = msg.get("model") or "latest_long"
|
||||
sttOpenOptions["lightweight"] = bool(msg.get("lightweight"))
|
||||
sttOpenOptions["singleUtterance"] = bool(msg.get("singleUtterance"))
|
||||
if streamingTask and not streamingTask.done():
|
||||
await audioQueue.put((b"", True))
|
||||
streamingTask.cancel()
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import logging
|
|||
import time
|
||||
import json
|
||||
import re
|
||||
from typing import List, Dict, Any, Optional, AsyncGenerator, Callable, Awaitable
|
||||
from typing import List, Dict, Any, Optional, AsyncGenerator, Callable, Awaitable, Tuple
|
||||
|
||||
from modules.datamodels.datamodelAi import (
|
||||
AiCallRequest, AiCallOptions, AiCallResponse, OperationTypeEnum
|
||||
|
|
@ -335,9 +335,14 @@ async def runAgentLoop(
|
|||
|
||||
# Execute tool calls
|
||||
for tc in toolCalls:
|
||||
toolDef = toolRegistry.getTool(tc.name)
|
||||
yield AgentEvent(
|
||||
type=AgentEventTypeEnum.TOOL_CALL,
|
||||
data={"toolName": tc.name, "args": tc.args}
|
||||
data={
|
||||
"toolName": tc.name,
|
||||
"displayLabel": toolDef.displayLabel if toolDef else None,
|
||||
"args": tc.args,
|
||||
}
|
||||
)
|
||||
|
||||
results = await _executeToolCalls(toolCalls, toolRegistry, {
|
||||
|
|
@ -355,12 +360,18 @@ async def runAgentLoop(
|
|||
state.totalToolCalls += len(results)
|
||||
|
||||
for result in results:
|
||||
validationCode = None
|
||||
if isinstance(result.errorDetails, dict):
|
||||
code = result.errorDetails.get("code")
|
||||
if isinstance(code, str):
|
||||
validationCode = code
|
||||
roundLog.toolCalls.append(ToolCallLog(
|
||||
toolName=result.toolName,
|
||||
args=next((tc.args for tc in toolCalls if tc.id == result.toolCallId), {}),
|
||||
success=result.success,
|
||||
durationMs=result.durationMs,
|
||||
error=result.error,
|
||||
validationFailureCode=validationCode,
|
||||
resultData=result.data[:300] if result.data else "",
|
||||
))
|
||||
if not result.success:
|
||||
|
|
@ -438,6 +449,11 @@ async def runAgentLoop(
|
|||
trace.totalCostCHF = state.totalCostCHF
|
||||
trace.abortReason = state.abortReason
|
||||
|
||||
validationFailures, repairAttempts, successAfterRepair = _computeRepairCounters(trace.rounds)
|
||||
trace.validationFailures = validationFailures
|
||||
trace.repairAttempts = repairAttempts
|
||||
trace.successAfterRepair = successAfterRepair
|
||||
|
||||
artifactSummary = _buildArtifactSummary(trace.rounds)
|
||||
|
||||
yield AgentEvent(
|
||||
|
|
@ -451,6 +467,9 @@ async def runAgentLoop(
|
|||
"status": state.status.value,
|
||||
"abortReason": state.abortReason,
|
||||
"artifacts": artifactSummary,
|
||||
"validationFailures": validationFailures,
|
||||
"repairAttempts": repairAttempts,
|
||||
"successAfterRepair": successAfterRepair,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -715,6 +734,41 @@ def classifyToolResult(
|
|||
return None
|
||||
|
||||
|
||||
def _computeRepairCounters(rounds: List[AgentRoundLog]) -> Tuple[int, int, int]:
|
||||
"""Aggregate repair-loop telemetry across all rounds.
|
||||
|
||||
Returns ``(validationFailures, repairAttempts, successAfterRepair)``.
|
||||
|
||||
* `validationFailures` -- total tool calls rejected by a pre-execute
|
||||
validator (any round, counts every occurrence).
|
||||
* `repairAttempts` -- tool calls in **later** rounds whose `toolName`
|
||||
had been rejected in some **earlier** round. Multiple retries of the
|
||||
same tool count multiple times. We intentionally do not count
|
||||
sibling calls within the same round, since the LLM has not yet seen
|
||||
the first one's result when emitting the second.
|
||||
* `successAfterRepair` -- the subset of `repairAttempts` that passed
|
||||
the validator (``validationFailureCode is None``).
|
||||
"""
|
||||
validationFailures = 0
|
||||
repairAttempts = 0
|
||||
successAfterRepair = 0
|
||||
rejectedTools: set = set()
|
||||
|
||||
for roundLog in rounds:
|
||||
rejectedFromPriorRounds = set(rejectedTools)
|
||||
for tc in roundLog.toolCalls:
|
||||
wasRejectedBefore = tc.toolName in rejectedFromPriorRounds
|
||||
if tc.validationFailureCode is not None:
|
||||
validationFailures += 1
|
||||
if wasRejectedBefore:
|
||||
repairAttempts += 1
|
||||
rejectedTools.add(tc.toolName)
|
||||
elif wasRejectedBefore:
|
||||
repairAttempts += 1
|
||||
successAfterRepair += 1
|
||||
return validationFailures, repairAttempts, successAfterRepair
|
||||
|
||||
|
||||
_ARTIFACT_TOOLS = {"writeFile", "replaceInFile", "deleteFile", "renameFile", "copyFile",
|
||||
"createFolder", "deleteFolder", "renderDocument", "generateImage"}
|
||||
|
||||
|
|
|
|||
|
|
@ -184,4 +184,5 @@ def _registerConnectionTools(registry: ToolRegistry, services):
|
|||
"required": ["connectionId", "to", "subject", "body"],
|
||||
},
|
||||
readOnly=False,
|
||||
displayLabel="composing an email",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -297,6 +297,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
|||
},
|
||||
},
|
||||
readOnly=False,
|
||||
displayLabel="creating a document",
|
||||
)
|
||||
|
||||
# ── textToSpeech tool ──────────────────────────────────────────────
|
||||
|
|
@ -573,6 +574,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
|||
"required": ["prompt"],
|
||||
},
|
||||
readOnly=False,
|
||||
displayLabel="generating an image",
|
||||
)
|
||||
|
||||
# ── createChart tool ─────────────────────────────────────────────────
|
||||
|
|
@ -770,6 +772,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
|||
"required": ["datasets"],
|
||||
},
|
||||
readOnly=False,
|
||||
displayLabel="creating a chart",
|
||||
)
|
||||
|
||||
# ── Phase 3: speechToText, detectLanguage, neutralizeData, executeCode ──
|
||||
|
|
@ -917,5 +920,6 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
|||
},
|
||||
"required": ["code"]
|
||||
},
|
||||
readOnly=True
|
||||
readOnly=True,
|
||||
displayLabel="running calculations",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,20 @@ from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_STALE_EXTRACTION_PATTERNS = (
|
||||
"requires the extract-msg package",
|
||||
"extraction requires the",
|
||||
"will be treated as binary",
|
||||
)
|
||||
|
||||
|
||||
def _isStaleExtractionResult(text: str) -> bool:
|
||||
"""Detect cached extraction results that are just error/warning placeholders."""
|
||||
if len(text) > 500:
|
||||
return False
|
||||
textLower = text.lower()
|
||||
return any(p in textLower for p in _STALE_EXTRACTION_PATTERNS)
|
||||
|
||||
|
||||
import uuid as _uuid
|
||||
|
||||
|
|
@ -62,15 +76,16 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
]
|
||||
if textChunks:
|
||||
assembled = "\n\n".join(c["data"] for c in textChunks)
|
||||
chunked = _applyOffsetLimit(assembled, offset, limit)
|
||||
if chunked is not None:
|
||||
return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked)
|
||||
if len(assembled) > _MAX_TOOL_RESULT_CHARS:
|
||||
assembled = assembled[:_MAX_TOOL_RESULT_CHARS] + f"\n\n[Truncated – showing first {_MAX_TOOL_RESULT_CHARS} chars of {len(assembled)}. Use offset/limit to read specific sections.]"
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="readFile", success=True,
|
||||
data=assembled,
|
||||
)
|
||||
if not _isStaleExtractionResult(assembled):
|
||||
chunked = _applyOffsetLimit(assembled, offset, limit)
|
||||
if chunked is not None:
|
||||
return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked)
|
||||
if len(assembled) > _MAX_TOOL_RESULT_CHARS:
|
||||
assembled = assembled[:_MAX_TOOL_RESULT_CHARS] + f"\n\n[Truncated – showing first {_MAX_TOOL_RESULT_CHARS} chars of {len(assembled)}. Use offset/limit to read specific sections.]"
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="readFile", success=True,
|
||||
data=assembled,
|
||||
)
|
||||
elif fileStatus in ("processing", "embedding", "extracted"):
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="readFile", success=True,
|
||||
|
|
@ -101,12 +116,31 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
isBinary = _looksLikeBinary(rawBytes)
|
||||
|
||||
if isBinary:
|
||||
extractionService = services.getService("extraction") if hasattr(services, "getService") else None
|
||||
if extractionService:
|
||||
try:
|
||||
extracted = extractionService.extractContentFromBytes(
|
||||
rawBytes, fileName, mimeType, documentId=fileId,
|
||||
)
|
||||
textParts = [
|
||||
p.data for p in (extracted.parts or [])
|
||||
if getattr(p, "contentType", "") != "image" and getattr(p, "data", None)
|
||||
]
|
||||
if textParts:
|
||||
assembled = "\n\n".join(textParts)
|
||||
chunked = _applyOffsetLimit(assembled, offset, limit)
|
||||
if chunked is not None:
|
||||
return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked)
|
||||
if len(assembled) > _MAX_TOOL_RESULT_CHARS:
|
||||
assembled = assembled[:_MAX_TOOL_RESULT_CHARS] + f"\n\n[Truncated – showing first {_MAX_TOOL_RESULT_CHARS} chars of {len(assembled)}. Use offset/limit to read specific sections.]"
|
||||
return ToolResult(toolCallId="", toolName="readFile", success=True, data=assembled)
|
||||
except Exception as extractErr:
|
||||
logger.warning("readFile: inline extraction failed for %s: %s", fileId, extractErr)
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="readFile", success=True,
|
||||
data=(
|
||||
f"[File '{fileName}' ({mimeType}) is not yet indexed "
|
||||
f"(status: {fileStatus or 'unknown'}). Indexing runs automatically "
|
||||
f"on upload. Please wait a few seconds and retry, or re-upload the file. "
|
||||
f"[File '{fileName}' ({mimeType}) is binary and could not be extracted "
|
||||
f"(status: {fileStatus or 'unknown'}). "
|
||||
f"For visual content use describeImage(fileId='{fileId}').]"
|
||||
),
|
||||
)
|
||||
|
|
@ -310,11 +344,15 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
return ToolResult(toolCallId="", toolName="writeFile", success=False, error="name is required for mode=create")
|
||||
fileItem, _ = dbMgmt.saveUploadedFile(content.encode("utf-8"), name)
|
||||
fiId = context.get("featureInstanceId") or (services.featureInstanceId if services else "")
|
||||
updateFields: Dict[str, Any] = {}
|
||||
if fiId:
|
||||
dbMgmt.updateFile(fileItem.id, {"featureInstanceId": fiId})
|
||||
# File group tree removed — groupId arg and instance-group assignment no longer apply
|
||||
updateFields["featureInstanceId"] = fiId
|
||||
if args.get("folderId"):
|
||||
updateFields["folderId"] = args["folderId"]
|
||||
if args.get("tags"):
|
||||
dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
|
||||
updateFields["tags"] = args["tags"]
|
||||
if updateFields:
|
||||
dbMgmt.updateFile(fileItem.id, updateFields)
|
||||
|
||||
chatDocId = _attachFileAsChatDocument(
|
||||
services, fileItem,
|
||||
|
|
@ -359,7 +397,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
},
|
||||
"required": ["fileId"]
|
||||
},
|
||||
readOnly=True
|
||||
readOnly=True,
|
||||
displayLabel="reviewing a document",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
|
|
@ -406,7 +445,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
"properties": {"query": {"type": "string", "description": "Search query"}},
|
||||
"required": ["query"]
|
||||
},
|
||||
readOnly=True
|
||||
readOnly=True,
|
||||
displayLabel="researching on the web",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
|
|
@ -427,7 +467,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
"writeFile", _writeFile,
|
||||
description=(
|
||||
"Create, append, or overwrite a file. Modes:\n"
|
||||
"- create (default): create a new file (name required).\n"
|
||||
"- create (default): create a new file (name required). Use folderId to place it in a specific folder.\n"
|
||||
"- append: append content to an existing file (fileId required). "
|
||||
"Use for large content that exceeds a single tool call (~8000 chars per call).\n"
|
||||
"- overwrite: replace entire file content (fileId required).\n"
|
||||
|
|
@ -443,7 +483,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
"content": {"type": "string", "description": "Content to write/append"},
|
||||
"mode": {"type": "string", "enum": ["create", "append", "overwrite"], "description": "Write mode (default: create)"},
|
||||
"fileId": {"type": "string", "description": "File ID (required for mode=append/overwrite)"},
|
||||
"groupId": {"type": "string", "description": "Group ID to place the file in (mode=create only). Omit to use the instance default group."},
|
||||
"folderId": {"type": "string", "description": "Folder ID to place the file in (mode=create only). Use listFolders to find IDs. Omit for root."},
|
||||
"tags": {"type": "array", "items": {"type": "string"}, "description": "Tags (mode=create only)"},
|
||||
},
|
||||
"required": ["content"]
|
||||
|
|
@ -581,7 +621,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
},
|
||||
"required": ["url"]
|
||||
},
|
||||
readOnly=True
|
||||
readOnly=True,
|
||||
displayLabel="reading a webpage",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
|
|
@ -701,7 +742,147 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
|||
readOnly=False
|
||||
)
|
||||
|
||||
# Group tree tools removed — file grouping now uses view-based display grouping (TableListView)
|
||||
# ---- Folder management tools ----
|
||||
|
||||
async def _createFolder(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
name = args.get("name", "")
|
||||
parentId = args.get("parentId") or None
|
||||
if not name:
|
||||
return ToolResult(toolCallId="", toolName="createFolder", success=False, error="name is required")
|
||||
try:
|
||||
chatService = services.chat
|
||||
dbMgmt = chatService.interfaceDbComponent
|
||||
folder = dbMgmt.createFolder(name, parentId=parentId)
|
||||
folderId = folder.get("id") if isinstance(folder, dict) else getattr(folder, "id", None)
|
||||
folderName = folder.get("name") if isinstance(folder, dict) else getattr(folder, "name", name)
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="createFolder", success=True,
|
||||
data=f"Folder '{folderName}' created (id: {folderId})" + (f" inside parent {parentId}" if parentId else ""),
|
||||
sideEvents=[{"type": "folderCreated", "data": {"folderId": folderId, "folderName": folderName, "parentId": parentId}}],
|
||||
)
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="createFolder", success=False, error=str(e))
|
||||
|
||||
async def _listFolders(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
try:
|
||||
chatService = services.chat
|
||||
dbMgmt = chatService.interfaceDbComponent
|
||||
folders = dbMgmt.getOwnFolderTree()
|
||||
if not folders:
|
||||
return ToolResult(toolCallId="", toolName="listFolders", success=True, data="No folders found.")
|
||||
lines = []
|
||||
folderMap: Dict[Optional[str], List] = {}
|
||||
for f in folders:
|
||||
pid = f.get("parentId") if isinstance(f, dict) else getattr(f, "parentId", None)
|
||||
folderMap.setdefault(pid, []).append(f)
|
||||
|
||||
def _walk(parentId: Optional[str], indent: int):
|
||||
for f in sorted(folderMap.get(parentId, []), key=lambda x: (x.get("name") if isinstance(x, dict) else getattr(x, "name", "")).lower()):
|
||||
fId = f.get("id") if isinstance(f, dict) else getattr(f, "id", "")
|
||||
fName = f.get("name") if isinstance(f, dict) else getattr(f, "name", "")
|
||||
prefix = " " * indent
|
||||
lines.append(f"{prefix}- {fName} (id: {fId})")
|
||||
_walk(fId, indent + 1)
|
||||
|
||||
_walk(None, 0)
|
||||
return ToolResult(toolCallId="", toolName="listFolders", success=True, data="\n".join(lines))
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="listFolders", success=False, error=str(e))
|
||||
|
||||
async def _moveFile(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
fileId = args.get("fileId", "")
|
||||
folderId = args.get("folderId")
|
||||
if not fileId:
|
||||
return ToolResult(toolCallId="", toolName="moveFile", success=False, error="fileId is required")
|
||||
try:
|
||||
chatService = services.chat
|
||||
dbMgmt = chatService.interfaceDbComponent
|
||||
file = dbMgmt.getFile(fileId)
|
||||
if not file:
|
||||
return ToolResult(toolCallId="", toolName="moveFile", success=False, error=f"File {fileId} not found")
|
||||
dbMgmt.updateFile(fileId, {"folderId": folderId or None})
|
||||
targetLabel = f"folder {folderId}" if folderId else "root"
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="moveFile", success=True,
|
||||
data=f"File '{file.fileName}' (id: {fileId}) moved to {targetLabel}",
|
||||
sideEvents=[{"type": "fileUpdated", "data": {"fileId": fileId, "fileName": file.fileName}}],
|
||||
)
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="moveFile", success=False, error=str(e))
|
||||
|
||||
registry.register(
|
||||
"createFolder", _createFolder,
|
||||
description=(
|
||||
"Create a new folder in the workspace file tree. "
|
||||
"Use parentId to create nested folders. Returns the new folder ID."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string", "description": "Folder name"},
|
||||
"parentId": {"type": "string", "description": "Parent folder ID for nesting. Omit to create at root level."},
|
||||
},
|
||||
"required": ["name"]
|
||||
},
|
||||
readOnly=False
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"listFolders", _listFolders,
|
||||
description=(
|
||||
"List all folders in the workspace as an indented tree. "
|
||||
"Use to find folder IDs for createFolder (parentId), writeFile (folderId), or moveFile."
|
||||
),
|
||||
parameters={"type": "object", "properties": {}},
|
||||
readOnly=True
|
||||
)
|
||||
|
||||
async def _renameFolder(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
folderId = args.get("folderId", "")
|
||||
newName = args.get("newName", "")
|
||||
if not folderId or not newName:
|
||||
return ToolResult(toolCallId="", toolName="renameFolder", success=False, error="folderId and newName are required")
|
||||
try:
|
||||
chatService = services.chat
|
||||
dbMgmt = chatService.interfaceDbComponent
|
||||
folder = dbMgmt.renameFolder(folderId, newName)
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="renameFolder", success=True,
|
||||
data=f"Folder {folderId} renamed to '{newName}'",
|
||||
sideEvents=[{"type": "folderUpdated", "data": {"folderId": folderId, "folderName": newName}}],
|
||||
)
|
||||
except Exception as e:
|
||||
return ToolResult(toolCallId="", toolName="renameFolder", success=False, error=str(e))
|
||||
|
||||
registry.register(
|
||||
"renameFolder", _renameFolder,
|
||||
description="Rename an existing folder in the workspace file tree.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"folderId": {"type": "string", "description": "The folder ID to rename"},
|
||||
"newName": {"type": "string", "description": "New folder name"},
|
||||
},
|
||||
"required": ["folderId", "newName"]
|
||||
},
|
||||
readOnly=False
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"moveFile", _moveFile,
|
||||
description=(
|
||||
"Move a file into a specific folder. Set folderId to null or omit to move the file back to the root level."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"fileId": {"type": "string", "description": "The file ID to move"},
|
||||
"folderId": {"type": "string", "description": "Target folder ID. Omit or null to move to root."},
|
||||
},
|
||||
"required": ["fileId"]
|
||||
},
|
||||
readOnly=False
|
||||
)
|
||||
|
||||
registry.register(
|
||||
"replaceInFile", _replaceInFile,
|
||||
|
|
|
|||
|
|
@ -41,6 +41,12 @@ class ToolDefinition(BaseModel):
|
|||
"""Schema for a tool available to the agent."""
|
||||
name: str = Field(description="Unique tool name")
|
||||
description: str = Field(description="What this tool does")
|
||||
displayLabel: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Short human-readable activity phrase (e.g. 'researching on the web'). "
|
||||
"Used for live progress messages in meetings. English gerund phrase; "
|
||||
"localised by the caller."
|
||||
)
|
||||
parameters: Dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="JSON Schema for tool parameters"
|
||||
|
|
@ -73,6 +79,14 @@ class ToolResult(BaseModel):
|
|||
success: bool = True
|
||||
data: str = ""
|
||||
error: Optional[str] = None
|
||||
errorDetails: Optional[Dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Structured, machine-readable error payload for the LLM (e.g. validation "
|
||||
"repair hints with code/field/suggestion/hint). `error` remains the short "
|
||||
"human-readable text for logs and audit."
|
||||
),
|
||||
)
|
||||
durationMs: int = 0
|
||||
sideEvents: Optional[List[Dict[str, Any]]] = None
|
||||
|
||||
|
|
@ -135,6 +149,14 @@ class ToolCallLog(BaseModel):
|
|||
success: bool = True
|
||||
durationMs: int = 0
|
||||
error: Optional[str] = None
|
||||
validationFailureCode: Optional[str] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"If the tool call was rejected by a pre-execute validator (e.g. "
|
||||
"QueryValidator), the structured error code (e.g. FIELD_NOT_FOUND). "
|
||||
"None when the call ran cleanly or failed for other reasons."
|
||||
),
|
||||
)
|
||||
resultData: str = Field(default="", description="Short result summary for artifact tracking")
|
||||
|
||||
|
||||
|
|
@ -161,6 +183,24 @@ class AgentTrace(BaseModel):
|
|||
totalToolCalls: int = 0
|
||||
totalCostCHF: float = 0.0
|
||||
abortReason: Optional[str] = None
|
||||
validationFailures: int = Field(
|
||||
default=0,
|
||||
description="Total tool calls rejected by a pre-execute validator across the run.",
|
||||
)
|
||||
repairAttempts: int = Field(
|
||||
default=0,
|
||||
description=(
|
||||
"Number of times the LLM retried a previously rejected tool (same toolName) "
|
||||
"in a later round. Counted by `agentLoop` from per-round ToolCallLog entries."
|
||||
),
|
||||
)
|
||||
successAfterRepair: int = Field(
|
||||
default=0,
|
||||
description=(
|
||||
"Number of repair attempts that produced a clean (validationFailureCode=None) "
|
||||
"result. Combined with `repairAttempts` this gives the repair conversion rate."
|
||||
),
|
||||
)
|
||||
rounds: List[AgentRoundLog] = Field(default_factory=list)
|
||||
|
||||
|
||||
|
|
|
|||
203
modules/serviceCenter/services/serviceAgent/datamodelOntology.py
Normal file
203
modules/serviceCenter/services/serviceAgent/datamodelOntology.py
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
# Copyright (c) 2026 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Ontology data model for feature data sub-agents.
|
||||
|
||||
This module defines the data structures that describe a feature's data
|
||||
ontology -- entities, relations, constraints, canonical query patterns --
|
||||
plus the validation error payload used by the QueryValidator.
|
||||
|
||||
Phase 1 (Repair-Loop) only needs `QueryValidationError`, `Constraint`,
|
||||
`ConstraintRule` and `ValidationErrorCode`; the richer `Entity`/`Relation`/
|
||||
`OntologyDescriptor` types are defined here so Phase 2 (Trustee ontology
|
||||
pilot) can plug in without a second data-model change.
|
||||
|
||||
See `wiki/c-work/2-build/2026-05-feature-data-agent-ontology-and-repair.md`.
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ValidationErrorCode(str, Enum):
|
||||
"""Stable codes for validator failures.
|
||||
|
||||
The LLM sees these codes verbatim in `ToolResult.errorDetails["code"]`
|
||||
and is expected to react to them deterministically (e.g. inspect the
|
||||
schema via browseTable when FIELD_NOT_FOUND, drop the SUM when
|
||||
INVALID_AGGREGATE_TARGET, add a period filter when MISSING_REQUIRED_FILTER).
|
||||
"""
|
||||
FIELD_NOT_FOUND = "FIELD_NOT_FOUND"
|
||||
INVALID_AGGREGATE_TARGET = "INVALID_AGGREGATE_TARGET"
|
||||
WRONG_TABLE_FOR_PURPOSE = "WRONG_TABLE_FOR_PURPOSE"
|
||||
TYPE_MISMATCH = "TYPE_MISMATCH"
|
||||
OPERATOR_INCOMPATIBLE = "OPERATOR_INCOMPATIBLE"
|
||||
MISSING_REQUIRED_FILTER = "MISSING_REQUIRED_FILTER"
|
||||
ORDER_BY_INVALID = "ORDER_BY_INVALID"
|
||||
|
||||
|
||||
class QueryValidationError(BaseModel):
|
||||
"""Structured pre-execute validation error.
|
||||
|
||||
Serialized into `ToolResult.errorDetails` (machine-readable) and
|
||||
summarized into `ToolResult.error` (short human-readable string).
|
||||
"""
|
||||
code: ValidationErrorCode
|
||||
field: Optional[str] = Field(
|
||||
default=None,
|
||||
description="The offending field name (when applicable).",
|
||||
)
|
||||
suggestion: Optional[str] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Best-effort suggestion (e.g. fuzzy-matched valid field name). "
|
||||
"None when no useful suggestion exists."
|
||||
),
|
||||
)
|
||||
hint: str = Field(
|
||||
description="Short corrective hint, max ~80 chars. Surfaced to the LLM verbatim.",
|
||||
max_length=160,
|
||||
)
|
||||
|
||||
def toShortError(self) -> str:
|
||||
"""Build the short `error` string for logs/audit.
|
||||
|
||||
Format: `<CODE>: <hint>` (or with field when present).
|
||||
"""
|
||||
if self.field:
|
||||
return f"{self.code.value}: {self.field}: {self.hint}"
|
||||
return f"{self.code.value}: {self.hint}"
|
||||
|
||||
def toErrorDetails(self) -> Dict[str, Any]:
|
||||
"""Build the dict for `ToolResult.errorDetails`."""
|
||||
return {
|
||||
"code": self.code.value,
|
||||
"field": self.field,
|
||||
"suggestion": self.suggestion,
|
||||
"hint": self.hint,
|
||||
}
|
||||
|
||||
|
||||
class ConstraintRule(str, Enum):
|
||||
"""High-level rule kinds that can be attached to a field or table."""
|
||||
NEVER_AGGREGATE = "NEVER_AGGREGATE"
|
||||
REQUIRES_FILTER_ON = "REQUIRES_FILTER_ON"
|
||||
TYPE_MISMATCH_GUARD = "TYPE_MISMATCH_GUARD"
|
||||
PREFERRED_TABLE_FOR_INTENT = "PREFERRED_TABLE_FOR_INTENT"
|
||||
|
||||
|
||||
class Constraint(BaseModel):
|
||||
"""A single rule the validator and the prompt compiler both consume.
|
||||
|
||||
Phase 1 uses constraints declared inline by the validator (defaults
|
||||
derived from naming conventions like ``*Balance`` / ``*Total``).
|
||||
Phase 2 sources them from feature ontologies, replacing the
|
||||
convention-based defaults.
|
||||
"""
|
||||
appliesTo: str = Field(
|
||||
description=(
|
||||
"Target identifier, format depends on rule: `<Table>.<field>` for "
|
||||
"field-level constraints, `<Table>` for table-level."
|
||||
),
|
||||
)
|
||||
rule: ConstraintRule
|
||||
message: str = Field(
|
||||
description="Short hint forwarded to the LLM if the constraint fires.",
|
||||
max_length=160,
|
||||
)
|
||||
params: Dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description=(
|
||||
"Rule-specific extras, e.g. {'requiredFields': ['periodYear', 'periodMonth']} "
|
||||
"for REQUIRES_FILTER_ON."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class SemanticType(str, Enum):
|
||||
"""High-level semantic category an entity belongs to.
|
||||
|
||||
Coarser than the underlying Pydantic type -- used so the prompt compiler
|
||||
can group entities ("here are your ACCOUNT-like tables") without the LLM
|
||||
having to read the full schema.
|
||||
"""
|
||||
ACCOUNT = "ACCOUNT"
|
||||
BALANCE_SNAPSHOT = "BALANCE_SNAPSHOT"
|
||||
TRANSACTION = "TRANSACTION"
|
||||
DOCUMENT = "DOCUMENT"
|
||||
PARTY = "PARTY"
|
||||
PERIOD = "PERIOD"
|
||||
OTHER = "OTHER"
|
||||
|
||||
|
||||
class Cardinality(str, Enum):
|
||||
ONE_TO_ONE = "ONE_TO_ONE"
|
||||
ONE_TO_MANY = "ONE_TO_MANY"
|
||||
MANY_TO_ONE = "MANY_TO_ONE"
|
||||
MANY_TO_MANY = "MANY_TO_MANY"
|
||||
|
||||
|
||||
class Invariant(BaseModel):
|
||||
"""Free-form invariant attached to an entity.
|
||||
|
||||
Phase 1 leaves these as opaque text consumed by the prompt compiler.
|
||||
Future phases may add a structured rule kind.
|
||||
"""
|
||||
description: str = Field(max_length=200)
|
||||
|
||||
|
||||
class Entity(BaseModel):
|
||||
"""One semantic entity in the ontology (often backed by a Pydantic table)."""
|
||||
name: str
|
||||
pythonClass: Optional[str] = Field(
|
||||
default=None,
|
||||
description="MODEL_REGISTRY key when the entity is DB-backed (e.g. 'TrusteeDataAccountBalance').",
|
||||
)
|
||||
semanticType: SemanticType = SemanticType.OTHER
|
||||
parentEntity: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Name of a broader entity this one specializes (e.g. 'BankAccount' parentEntity 'Account').",
|
||||
)
|
||||
description: str = ""
|
||||
invariants: List[Invariant] = Field(default_factory=list)
|
||||
|
||||
|
||||
class Relation(BaseModel):
|
||||
fromEntity: str
|
||||
toEntity: str
|
||||
cardinality: Cardinality
|
||||
via: Optional[str] = Field(
|
||||
default=None,
|
||||
description="FK-Feldname auf der fromEntity-Seite (z. B. 'journalEntryId').",
|
||||
)
|
||||
|
||||
|
||||
class CanonicalQueryPattern(BaseModel):
|
||||
"""Tool-call skeleton for a recurring user intent.
|
||||
|
||||
The prompt compiler renders these as worked examples so the LLM has a
|
||||
template to mimic instead of inventing a query shape.
|
||||
"""
|
||||
intent: str = Field(description="Short label, e.g. 'BANK_BALANCE_AT_DATE'.")
|
||||
description: str = Field(default="", description="Human-readable when to use this pattern.")
|
||||
pattern: Dict[str, Any] = Field(
|
||||
description="Tool-call shape with placeholders, e.g. {'tool': 'queryTable', 'tableName': '...', 'filters': [...]}",
|
||||
)
|
||||
|
||||
|
||||
class OntologyDescriptor(BaseModel):
|
||||
"""Top-level container exported by `getAgentOntology()` per feature."""
|
||||
featureCode: str
|
||||
entities: List[Entity] = Field(default_factory=list)
|
||||
relations: List[Relation] = Field(default_factory=list)
|
||||
constraints: List[Constraint] = Field(default_factory=list)
|
||||
canonicalPatterns: List[CanonicalQueryPattern] = Field(default_factory=list)
|
||||
|
||||
def constraintsForTable(self, tableName: str) -> List[Constraint]:
|
||||
"""Return constraints whose ``appliesTo`` targets the given table or one of its fields."""
|
||||
prefix = f"{tableName}."
|
||||
return [
|
||||
c for c in self.constraints
|
||||
if c.appliesTo == tableName or c.appliesTo.startswith(prefix)
|
||||
]
|
||||
|
|
@ -15,6 +15,7 @@ invoked outside an agent loop (e.g. in tests).
|
|||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Callable, Awaitable, Dict, List, Optional
|
||||
|
||||
from modules.datamodels.datamodelAi import (
|
||||
|
|
@ -25,6 +26,10 @@ from modules.serviceCenter.services.serviceAgent.agentLoop import runAgentLoop
|
|||
from modules.serviceCenter.services.serviceAgent.datamodelAgent import (
|
||||
AgentConfig, AgentEvent, AgentEventTypeEnum, ToolResult,
|
||||
)
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
|
||||
QueryValidationError,
|
||||
)
|
||||
from modules.serviceCenter.services.serviceAgent.queryValidator import QueryValidator
|
||||
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
|
||||
from modules.serviceCenter.services.serviceAgent.featureDataProvider import FeatureDataProvider
|
||||
from modules.shared.i18nRegistry import resolveText
|
||||
|
|
@ -83,7 +88,8 @@ async def runFeatureDataAgent(
|
|||
"""
|
||||
|
||||
provider = FeatureDataProvider(dbConnector, neutralizeFields=neutralizeFields)
|
||||
registry = _buildSubAgentTools(provider, featureInstanceId, mandateId, tableFilters or {})
|
||||
validator = _buildValidatorForFeature(featureCode)
|
||||
registry = _buildSubAgentTools(provider, featureInstanceId, mandateId, tableFilters or {}, validator=validator)
|
||||
|
||||
for tbl in selectedTables:
|
||||
meta = tbl.get("meta", {})
|
||||
|
|
@ -153,10 +159,19 @@ def _buildSubAgentTools(
|
|||
featureInstanceId: str,
|
||||
mandateId: str,
|
||||
tableFilters: Dict[str, Dict[str, str]] = None,
|
||||
validator: Optional[QueryValidator] = None,
|
||||
) -> ToolRegistry:
|
||||
"""Register browseTable and queryTable as sub-agent tools."""
|
||||
"""Register browseTable and queryTable as sub-agent tools.
|
||||
|
||||
The optional ``validator`` runs **before** the provider on every call.
|
||||
When it returns a structured error, the tool result carries
|
||||
``errorDetails`` (machine-readable repair hint for the LLM) plus the
|
||||
short ``error`` string for logs/audit. No provider call happens in that
|
||||
case, so the database is never reached with a known-bad query.
|
||||
"""
|
||||
registry = ToolRegistry()
|
||||
_tableFilters = tableFilters or {}
|
||||
_validator = validator or QueryValidator()
|
||||
|
||||
def _recordFilterToList(tableName: str) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Convert a recordFilter dict to a list of {field, op, value} filter dicts."""
|
||||
|
|
@ -165,6 +180,14 @@ def _buildSubAgentTools(
|
|||
return None
|
||||
return [{"field": k, "op": "=", "value": v} for k, v in rf.items()]
|
||||
|
||||
def _validationToolResult(toolName: str, err: QueryValidationError) -> ToolResult:
|
||||
return ToolResult(
|
||||
toolCallId="", toolName=toolName,
|
||||
success=False,
|
||||
error=err.toShortError(),
|
||||
errorDetails=err.toErrorDetails(),
|
||||
)
|
||||
|
||||
async def _browseTable(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
tableName = args.get("tableName", "")
|
||||
limit = args.get("limit", 50)
|
||||
|
|
@ -172,6 +195,9 @@ def _buildSubAgentTools(
|
|||
fields = args.get("fields")
|
||||
if not tableName:
|
||||
return ToolResult(toolCallId="", toolName="browseTable", success=False, error="tableName required")
|
||||
validationErr = _validator.validateBrowseQuery(tableName, args)
|
||||
if validationErr is not None:
|
||||
return _validationToolResult("browseTable", validationErr)
|
||||
result = provider.browseTable(
|
||||
tableName=tableName,
|
||||
featureInstanceId=featureInstanceId,
|
||||
|
|
@ -197,6 +223,9 @@ def _buildSubAgentTools(
|
|||
offset = args.get("offset", 0)
|
||||
if not tableName:
|
||||
return ToolResult(toolCallId="", toolName="queryTable", success=False, error="tableName required")
|
||||
validationErr = _validator.validateQueryTable(tableName, args)
|
||||
if validationErr is not None:
|
||||
return _validationToolResult("queryTable", validationErr)
|
||||
result = provider.queryTable(
|
||||
tableName=tableName,
|
||||
featureInstanceId=featureInstanceId,
|
||||
|
|
@ -220,12 +249,19 @@ def _buildSubAgentTools(
|
|||
aggregate = args.get("aggregate", "")
|
||||
field = args.get("field", "")
|
||||
groupBy = args.get("groupBy")
|
||||
filters = args.get("filters") or []
|
||||
if not tableName:
|
||||
return ToolResult(toolCallId="", toolName="aggregateTable", success=False, error="tableName required")
|
||||
if not aggregate:
|
||||
return ToolResult(toolCallId="", toolName="aggregateTable", success=False, error="aggregate required (SUM, COUNT, AVG, MIN, MAX)")
|
||||
if not field:
|
||||
return ToolResult(toolCallId="", toolName="aggregateTable", success=False, error="field required")
|
||||
validationErr = _validator.validateAggregateQuery(tableName, args)
|
||||
if validationErr is not None:
|
||||
return _validationToolResult("aggregateTable", validationErr)
|
||||
combinedFilters = list(filters)
|
||||
recordFilters = _recordFilterToList(tableName) or []
|
||||
combinedFilters.extend(recordFilters)
|
||||
result = provider.aggregateTable(
|
||||
tableName=tableName,
|
||||
featureInstanceId=featureInstanceId,
|
||||
|
|
@ -233,7 +269,7 @@ def _buildSubAgentTools(
|
|||
aggregate=aggregate,
|
||||
field=field,
|
||||
groupBy=groupBy,
|
||||
extraFilters=_recordFilterToList(tableName),
|
||||
extraFilters=combinedFilters or None,
|
||||
)
|
||||
return ToolResult(
|
||||
toolCallId="", toolName="aggregateTable",
|
||||
|
|
@ -246,8 +282,12 @@ def _buildSubAgentTools(
|
|||
"aggregateTable", _aggregateTable,
|
||||
description=(
|
||||
"Run an aggregate query on a feature data table. "
|
||||
"Supports SUM, COUNT, AVG, MIN, MAX with optional GROUP BY. "
|
||||
"Example: aggregateTable(tableName='TrusteeDataJournalLine', aggregate='SUM', field='debitAmount', groupBy='costCenter')"
|
||||
"Supports SUM, COUNT, AVG, MIN, MAX with optional GROUP BY and filters. "
|
||||
"Example: aggregateTable(tableName='TrusteeDataJournalLine', aggregate='SUM', "
|
||||
"field='debitAmount', filters=[{'field':'accountNumber','op':'=','value':'5400'}]). "
|
||||
"On validation failure the tool returns success=False with errorDetails={code, field, suggestion, hint} -- "
|
||||
"read errorDetails and correct the next call (e.g. drop the SUM, switch to queryTable with period filters, "
|
||||
"or use the suggested field name)."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
|
|
@ -256,6 +296,22 @@ def _buildSubAgentTools(
|
|||
"aggregate": {"type": "string", "enum": ["SUM", "COUNT", "AVG", "MIN", "MAX"], "description": "Aggregate function"},
|
||||
"field": {"type": "string", "description": "Field to aggregate (e.g. debitAmount, creditAmount)"},
|
||||
"groupBy": {"type": "string", "description": "Optional field to group by (e.g. costCenter, accountNumber)"},
|
||||
"filters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"field": {"type": "string"},
|
||||
"op": {"type": "string"},
|
||||
"value": {},
|
||||
},
|
||||
},
|
||||
"description": (
|
||||
"Optional filter conditions applied before the aggregate. Same shape as queryTable's "
|
||||
"filters. Required whenever you want to aggregate only a subset (e.g. SUM debits on "
|
||||
"ONE account, COUNT rows in ONE year)."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["tableName", "aggregate", "field"],
|
||||
},
|
||||
|
|
@ -264,7 +320,11 @@ def _buildSubAgentTools(
|
|||
|
||||
registry.register(
|
||||
"browseTable", _browseTable,
|
||||
description="List rows from a feature data table with pagination.",
|
||||
description=(
|
||||
"List rows from a feature data table with pagination. "
|
||||
"On validation failure the tool returns success=False with errorDetails={code, field, suggestion, hint} -- "
|
||||
"use errorDetails to correct the next call."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -286,7 +346,10 @@ def _buildSubAgentTools(
|
|||
description=(
|
||||
"Query a feature data table with filters, field selection, and ordering. "
|
||||
"Filters: [{\"field\": \"status\", \"op\": \"=\", \"value\": \"active\"}]. "
|
||||
"Operators: =, !=, >, <, >=, <=, LIKE, ILIKE, IS NULL, IS NOT NULL."
|
||||
"Operators: =, !=, >, <, >=, <=, LIKE, ILIKE, IS NULL, IS NOT NULL. "
|
||||
"On validation failure the tool returns success=False with errorDetails={code, field, suggestion, hint} -- "
|
||||
"common codes: FIELD_NOT_FOUND (use the suggestion or call browseTable), OPERATOR_INCOMPATIBLE "
|
||||
"(switch to a compatible operator for that field type), ORDER_BY_INVALID."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
|
|
@ -410,13 +473,94 @@ def _buildSchemaContext(
|
|||
"- Keep your answer SHORT. The caller is a machine, not a human.",
|
||||
]
|
||||
|
||||
domainHints = _loadFeatureDomainHints(featureCode)
|
||||
if domainHints:
|
||||
parts.extend(["", domainHints.strip()])
|
||||
domainBlock = ""
|
||||
if not _isOntologyDisabled():
|
||||
domainBlock = _loadFeatureOntologyBlock(featureCode)
|
||||
if not domainBlock:
|
||||
domainBlock = _loadFeatureDomainHints(featureCode)
|
||||
if domainBlock:
|
||||
parts.extend(["", domainBlock.strip()])
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _isOntologyDisabled() -> bool:
|
||||
"""Eval-only escape hatch.
|
||||
|
||||
Set ``POWERON_DISABLE_FEATURE_ONTOLOGY=1`` in the environment to force
|
||||
``_buildSchemaContext`` back onto the legacy ``getAgentDomainHints()``
|
||||
path. Used by the Phase 1.5 benchmark to measure ``baseline`` and
|
||||
``phase1`` accuracy WITHOUT the ontology-driven prompt block. Never
|
||||
set this flag in production.
|
||||
"""
|
||||
return os.environ.get("POWERON_DISABLE_FEATURE_ONTOLOGY", "").strip() in ("1", "true", "TRUE", "yes")
|
||||
|
||||
|
||||
def _buildValidatorForFeature(featureCode: str) -> QueryValidator:
|
||||
"""Construct a QueryValidator wired with the feature ontology (when present).
|
||||
|
||||
Without an ontology the validator falls back to its convention-based
|
||||
constraints (``*Balance`` / ``*Total`` are NEVER_AGGREGATE). With an
|
||||
ontology the descriptor's constraints take precedence -- the validator
|
||||
and the prompt block then share the same source of truth.
|
||||
"""
|
||||
ontology = _loadFeatureOntology(featureCode)
|
||||
return QueryValidator(ontology=ontology)
|
||||
|
||||
|
||||
def _loadFeatureOntology(featureCode: str):
|
||||
"""Return the feature's OntologyDescriptor or None when no hook is exposed."""
|
||||
if not featureCode:
|
||||
return None
|
||||
try:
|
||||
from modules.system.registry import loadFeatureMainModules
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
try:
|
||||
mainModules = loadFeatureMainModules() or {}
|
||||
except Exception as exc:
|
||||
logger.debug("Ontology lookup: cannot load main modules (%s)", exc)
|
||||
return None
|
||||
|
||||
module = mainModules.get(featureCode) or mainModules.get(featureCode.lower())
|
||||
if module is None:
|
||||
return None
|
||||
hook = getattr(module, "getAgentOntology", None)
|
||||
if not callable(hook):
|
||||
return None
|
||||
try:
|
||||
return hook()
|
||||
except Exception as exc:
|
||||
logger.warning("Feature '%s' getAgentOntology() raised: %s", featureCode, exc)
|
||||
return None
|
||||
|
||||
|
||||
def _loadFeatureOntologyBlock(featureCode: str) -> str:
|
||||
"""Return the ontology-derived prompt block when the feature exposes one.
|
||||
|
||||
Each feature can expose ``getAgentOntology() -> OntologyDescriptor`` in
|
||||
its ``mainXxx.py``. When present, the descriptor is compiled via
|
||||
:func:`ontologyToPromptCompiler.compileOntologyToPrompt` and the result
|
||||
replaces the legacy ``getAgentDomainHints()`` text block. This keeps
|
||||
one single source of truth for the validator AND the prompt.
|
||||
|
||||
Failures are swallowed (missing hook, exceptions in compilation) so the
|
||||
caller can fall back to the legacy domain-hints path.
|
||||
"""
|
||||
ontology = _loadFeatureOntology(featureCode)
|
||||
if ontology is None:
|
||||
return ""
|
||||
try:
|
||||
from modules.serviceCenter.services.serviceAgent.ontologyToPromptCompiler import (
|
||||
compileOntologyToPrompt,
|
||||
)
|
||||
return compileOntologyToPrompt(ontology)
|
||||
except Exception as exc:
|
||||
logger.warning("Ontology compile failed for '%s': %s", featureCode, exc)
|
||||
return ""
|
||||
|
||||
|
||||
def _loadFeatureDomainHints(featureCode: str) -> str:
|
||||
"""Pull optional domain-specific hints from the feature's main module.
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,140 @@
|
|||
# Copyright (c) 2026 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Deterministic compiler: OntologyDescriptor -> sub-agent prompt block.
|
||||
|
||||
Phase 2 replaces a feature's hand-written ``_AGENT_DOMAIN_HINTS`` text
|
||||
with a structured :class:`OntologyDescriptor`. This compiler renders the
|
||||
descriptor into a stable, terse Markdown-ish block that the sub-agent
|
||||
appends to its system prompt -- the same source of truth the
|
||||
:class:`QueryValidator` consults.
|
||||
|
||||
The output is intentionally:
|
||||
* short (every token costs every call)
|
||||
* deterministic (no f-string ordering bugs, no Python dict iteration)
|
||||
* free of internal jargon ('canonicalQueryPattern' is rendered as
|
||||
'CANONICAL PATTERN' for the LLM)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterable, List
|
||||
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
|
||||
CanonicalQueryPattern,
|
||||
Constraint,
|
||||
ConstraintRule,
|
||||
Entity,
|
||||
OntologyDescriptor,
|
||||
Relation,
|
||||
)
|
||||
|
||||
|
||||
def compileOntologyToPrompt(ontology: OntologyDescriptor) -> str:
|
||||
"""Render *ontology* into a sub-agent prompt block.
|
||||
|
||||
The output starts with a stable marker line (``DOMAIN ONTOLOGY (...)``)
|
||||
so downstream tooling can find/replace it deterministically.
|
||||
"""
|
||||
lines: List[str] = []
|
||||
lines.append(f"DOMAIN ONTOLOGY ({ontology.featureCode}):")
|
||||
lines.append("")
|
||||
lines.extend(_renderEntities(ontology.entities))
|
||||
relationLines = _renderRelations(ontology.relations)
|
||||
if relationLines:
|
||||
lines.append("")
|
||||
lines.extend(relationLines)
|
||||
constraintLines = _renderConstraints(ontology.constraints)
|
||||
if constraintLines:
|
||||
lines.append("")
|
||||
lines.extend(constraintLines)
|
||||
patternLines = _renderPatterns(ontology.canonicalPatterns)
|
||||
if patternLines:
|
||||
lines.append("")
|
||||
lines.extend(patternLines)
|
||||
return "\n".join(lines).rstrip() + "\n"
|
||||
|
||||
|
||||
def _renderEntities(entities: Iterable[Entity]) -> List[str]:
|
||||
out: List[str] = ["ENTITIES:"]
|
||||
for e in entities:
|
||||
head = f"- {e.name}"
|
||||
if e.parentEntity:
|
||||
head += f" (specializes {e.parentEntity})"
|
||||
if e.pythonClass:
|
||||
head += f" [table: {e.pythonClass}]"
|
||||
out.append(head)
|
||||
if e.description:
|
||||
out.append(f" {e.description}")
|
||||
for inv in e.invariants:
|
||||
out.append(f" * {inv.description}")
|
||||
return out
|
||||
|
||||
|
||||
def _renderRelations(relations: Iterable[Relation]) -> List[str]:
|
||||
rels = list(relations)
|
||||
if not rels:
|
||||
return []
|
||||
out: List[str] = ["RELATIONS:"]
|
||||
for r in rels:
|
||||
line = f"- {r.fromEntity} -> {r.toEntity} ({r.cardinality.value}"
|
||||
if r.via:
|
||||
line += f" via {r.via}"
|
||||
line += ")"
|
||||
out.append(line)
|
||||
return out
|
||||
|
||||
|
||||
def _renderConstraints(constraints: Iterable[Constraint]) -> List[str]:
|
||||
cons = list(constraints)
|
||||
if not cons:
|
||||
return []
|
||||
out: List[str] = ["CONSTRAINTS (validator-enforced):"]
|
||||
for c in cons:
|
||||
rule = _ruleLabel(c.rule)
|
||||
line = f"- {rule} on {c.appliesTo}: {c.message}"
|
||||
params = c.params or {}
|
||||
required = params.get("requiredFields")
|
||||
if isinstance(required, list) and required:
|
||||
line += f" (required filters: {', '.join(required)})"
|
||||
intents = params.get("intents")
|
||||
if isinstance(intents, list) and intents:
|
||||
line += f" (intents: {', '.join(intents)})"
|
||||
out.append(line)
|
||||
return out
|
||||
|
||||
|
||||
def _ruleLabel(rule: ConstraintRule) -> str:
|
||||
return rule.value.replace("_", " ").lower()
|
||||
|
||||
|
||||
def _renderPatterns(patterns: Iterable[CanonicalQueryPattern]) -> List[str]:
|
||||
pats = list(patterns)
|
||||
if not pats:
|
||||
return []
|
||||
out: List[str] = ["CANONICAL QUERY PATTERNS (mimic these tool calls):"]
|
||||
for i, p in enumerate(pats, start=1):
|
||||
out.append(f"{i}) intent={p.intent}: {p.description}")
|
||||
out.append(f" call: {_renderPatternCall(p.pattern)}")
|
||||
extra = p.pattern.get("_postProcessing") if isinstance(p.pattern, dict) else None
|
||||
if isinstance(extra, str):
|
||||
out.append(f" note: {extra}")
|
||||
return out
|
||||
|
||||
|
||||
def _renderPatternCall(pattern: dict) -> str:
|
||||
"""Render the pattern as a compact one-line tool call signature."""
|
||||
tool = pattern.get("tool", "?")
|
||||
parts: List[str] = []
|
||||
for key in ("tableName", "aggregate", "field", "groupBy", "orderBy"):
|
||||
if key in pattern and pattern[key] is not None and not str(key).startswith("_"):
|
||||
parts.append(f"{key}={pattern[key]!r}")
|
||||
if "fields" in pattern and pattern["fields"]:
|
||||
parts.append(f"fields={pattern['fields']}")
|
||||
if "filters" in pattern and pattern["filters"]:
|
||||
compact = ", ".join(
|
||||
f"{f.get('field')}{f.get('op','=')}{f.get('value')!r}"
|
||||
for f in pattern["filters"]
|
||||
if isinstance(f, dict)
|
||||
)
|
||||
parts.append(f"filters=[{compact}]")
|
||||
return f"{tool}({', '.join(parts)})"
|
||||
311
modules/serviceCenter/services/serviceAgent/queryValidator.py
Normal file
311
modules/serviceCenter/services/serviceAgent/queryValidator.py
Normal file
|
|
@ -0,0 +1,311 @@
|
|||
# Copyright (c) 2026 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Pre-execute query validator for the Feature Data Sub-Agent.
|
||||
|
||||
Sits between the LLM tool call and `FeatureDataProvider`. Catches the four
|
||||
high-impact hallucination classes deterministically so the LLM gets an
|
||||
actionable repair hint instead of a raw SQL exception:
|
||||
|
||||
* invented field names -> FIELD_NOT_FOUND (+ fuzzy suggestion)
|
||||
* operator/type mismatches -> OPERATOR_INCOMPATIBLE
|
||||
* SUM/AVG on already-aggregated -> INVALID_AGGREGATE_TARGET
|
||||
balance/total columns
|
||||
* orderBy on invented fields -> ORDER_BY_INVALID
|
||||
|
||||
The validator reads the canonical schema from
|
||||
`modules.datamodels.datamodelBase.MODEL_REGISTRY`. When an
|
||||
`OntologyDescriptor` is provided (Phase 2), its constraints override the
|
||||
convention-based defaults (e.g. NEVER_AGGREGATE on closingBalance).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import difflib
|
||||
import logging
|
||||
import re
|
||||
import typing
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from modules.datamodels.datamodelBase import MODEL_REGISTRY
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
|
||||
Constraint,
|
||||
ConstraintRule,
|
||||
OntologyDescriptor,
|
||||
QueryValidationError,
|
||||
ValidationErrorCode,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_STRING_ONLY_OPERATORS = {"LIKE", "ILIKE"}
|
||||
_COMPARISON_OPERATORS = {">", "<", ">=", "<="}
|
||||
_VALUELESS_OPERATORS = {"IS NULL", "IS NOT NULL"}
|
||||
_AGGREGATES_THAT_SUM = {"SUM", "AVG"}
|
||||
_AGGREGATE_BLACKLIST_SUFFIXES_DEFAULT: Tuple[str, ...] = ("Balance", "Total")
|
||||
|
||||
|
||||
class QueryValidator:
|
||||
"""Validate sub-agent tool arguments against the schema (+ optional ontology).
|
||||
|
||||
Stateless per call -- holding only the optional ontology. Each
|
||||
`validateXxx` method returns ``None`` on success or a
|
||||
:class:`QueryValidationError` to be surfaced to the LLM.
|
||||
"""
|
||||
|
||||
def __init__(self, ontology: Optional[OntologyDescriptor] = None):
|
||||
self._ontology = ontology
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# public API: one method per sub-agent tool
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def validateBrowseQuery(
|
||||
self, tableName: str, args: Dict[str, Any]
|
||||
) -> Optional[QueryValidationError]:
|
||||
"""Validate browseTable arguments.
|
||||
|
||||
Phase 1 scope: only `fields` (whitelist) is LLM-driven; `limit`/`offset`
|
||||
are sanitized by the tool wrapper.
|
||||
"""
|
||||
modelFields = _getModelFields(tableName)
|
||||
if modelFields is None:
|
||||
return None
|
||||
|
||||
fieldsErr = self._validateFieldList(args.get("fields"), modelFields)
|
||||
if fieldsErr is not None:
|
||||
return fieldsErr
|
||||
return None
|
||||
|
||||
def validateQueryTable(
|
||||
self, tableName: str, args: Dict[str, Any]
|
||||
) -> Optional[QueryValidationError]:
|
||||
"""Validate queryTable arguments (filters + fields + orderBy)."""
|
||||
modelFields = _getModelFields(tableName)
|
||||
if modelFields is None:
|
||||
return None
|
||||
|
||||
fieldsErr = self._validateFieldList(args.get("fields"), modelFields)
|
||||
if fieldsErr is not None:
|
||||
return fieldsErr
|
||||
|
||||
for f in args.get("filters") or []:
|
||||
filterErr = self._validateFilter(f, modelFields)
|
||||
if filterErr is not None:
|
||||
return filterErr
|
||||
|
||||
orderBy = args.get("orderBy")
|
||||
if orderBy is not None and not _isPlainNone(orderBy):
|
||||
if orderBy not in modelFields:
|
||||
return QueryValidationError(
|
||||
code=ValidationErrorCode.ORDER_BY_INVALID,
|
||||
field=orderBy,
|
||||
suggestion=_suggestFieldName(orderBy, modelFields),
|
||||
hint="orderBy must be a real field of this table.",
|
||||
)
|
||||
return None
|
||||
|
||||
def validateAggregateQuery(
|
||||
self, tableName: str, args: Dict[str, Any]
|
||||
) -> Optional[QueryValidationError]:
|
||||
"""Validate aggregateTable arguments.
|
||||
|
||||
Catches the highest-impact hallucination in the codebase:
|
||||
``SUM(closingBalance)`` (and friends) across periods -- closing
|
||||
balances are already per-period, summing them produces nonsense.
|
||||
"""
|
||||
modelFields = _getModelFields(tableName)
|
||||
if modelFields is None:
|
||||
return None
|
||||
|
||||
field = args.get("field")
|
||||
aggregate = (args.get("aggregate") or "").upper()
|
||||
|
||||
if not field:
|
||||
return None # tool wrapper rejects empty field already
|
||||
|
||||
if field not in modelFields:
|
||||
return QueryValidationError(
|
||||
code=ValidationErrorCode.FIELD_NOT_FOUND,
|
||||
field=field,
|
||||
suggestion=_suggestFieldName(field, modelFields),
|
||||
hint="Use browseTable to inspect this table's columns.",
|
||||
)
|
||||
|
||||
if aggregate in _AGGREGATES_THAT_SUM and self._isAggregateBlacklisted(tableName, field):
|
||||
return QueryValidationError(
|
||||
code=ValidationErrorCode.INVALID_AGGREGATE_TARGET,
|
||||
field=field,
|
||||
suggestion=None,
|
||||
hint=(
|
||||
f"{field} is already aggregated per period; do not {aggregate} it "
|
||||
"across rows. Use queryTable with period filters instead."
|
||||
),
|
||||
)
|
||||
|
||||
if aggregate in _AGGREGATES_THAT_SUM and not _isNumericAnnotation(modelFields[field]):
|
||||
return QueryValidationError(
|
||||
code=ValidationErrorCode.TYPE_MISMATCH,
|
||||
field=field,
|
||||
suggestion=None,
|
||||
hint=f"{aggregate} requires a numeric field; {field} is not numeric.",
|
||||
)
|
||||
|
||||
groupBy = args.get("groupBy")
|
||||
if groupBy is not None and not _isPlainNone(groupBy):
|
||||
if groupBy not in modelFields:
|
||||
return QueryValidationError(
|
||||
code=ValidationErrorCode.FIELD_NOT_FOUND,
|
||||
field=groupBy,
|
||||
suggestion=_suggestFieldName(groupBy, modelFields),
|
||||
hint="groupBy must be a real field of this table.",
|
||||
)
|
||||
|
||||
# filters validation matches queryTable so the LLM gets consistent
|
||||
# repair hints regardless of which tool it picked.
|
||||
for f in args.get("filters") or []:
|
||||
filterErr = self._validateFilter(f, modelFields)
|
||||
if filterErr is not None:
|
||||
return filterErr
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# internals
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _validateFieldList(
|
||||
self, fields: Optional[List[str]], modelFields: Dict[str, Any]
|
||||
) -> Optional[QueryValidationError]:
|
||||
if not fields:
|
||||
return None
|
||||
for f in fields:
|
||||
if not isinstance(f, str):
|
||||
continue
|
||||
if f not in modelFields:
|
||||
return QueryValidationError(
|
||||
code=ValidationErrorCode.FIELD_NOT_FOUND,
|
||||
field=f,
|
||||
suggestion=_suggestFieldName(f, modelFields),
|
||||
hint="Use browseTable to inspect this table's columns.",
|
||||
)
|
||||
return None
|
||||
|
||||
def _validateFilter(
|
||||
self, filterEntry: Any, modelFields: Dict[str, Any]
|
||||
) -> Optional[QueryValidationError]:
|
||||
if not isinstance(filterEntry, dict):
|
||||
return None
|
||||
field = filterEntry.get("field")
|
||||
op = (filterEntry.get("op") or "=").upper()
|
||||
|
||||
if not isinstance(field, str) or not field:
|
||||
return None # tool wrapper passes these straight through
|
||||
|
||||
if field not in modelFields:
|
||||
return QueryValidationError(
|
||||
code=ValidationErrorCode.FIELD_NOT_FOUND,
|
||||
field=field,
|
||||
suggestion=_suggestFieldName(field, modelFields),
|
||||
hint="Use browseTable to inspect this table's columns.",
|
||||
)
|
||||
|
||||
annotation = modelFields[field]
|
||||
|
||||
if op in _STRING_ONLY_OPERATORS and not _isStringAnnotation(annotation):
|
||||
return QueryValidationError(
|
||||
code=ValidationErrorCode.OPERATOR_INCOMPATIBLE,
|
||||
field=field,
|
||||
suggestion=None,
|
||||
hint=f"{op} only works on string fields; {field} is not a string.",
|
||||
)
|
||||
|
||||
if op in _COMPARISON_OPERATORS and not _isComparableAnnotation(annotation):
|
||||
return QueryValidationError(
|
||||
code=ValidationErrorCode.OPERATOR_INCOMPATIBLE,
|
||||
field=field,
|
||||
suggestion=None,
|
||||
hint=f"{op} requires a numeric or date field; {field} is not comparable.",
|
||||
)
|
||||
return None
|
||||
|
||||
def _isAggregateBlacklisted(self, tableName: str, fieldName: str) -> bool:
|
||||
"""Check whether a field is marked NEVER_AGGREGATE.
|
||||
|
||||
Phase 2 (ontology present): consult the descriptor.
|
||||
Phase 1 fallback: naming convention (``*Balance`` / ``*Total``).
|
||||
"""
|
||||
if self._ontology is not None:
|
||||
target = f"{tableName}.{fieldName}"
|
||||
for c in self._ontology.constraintsForTable(tableName):
|
||||
if c.rule == ConstraintRule.NEVER_AGGREGATE and c.appliesTo == target:
|
||||
return True
|
||||
|
||||
for suffix in _AGGREGATE_BLACKLIST_SUFFIXES_DEFAULT:
|
||||
if fieldName.endswith(suffix):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _getModelFields(tableName: str) -> Optional[Dict[str, Any]]:
|
||||
"""Return ``{fieldName: annotation}`` for a registered Pydantic table model.
|
||||
|
||||
None when the table is not in MODEL_REGISTRY (e.g. pure UDB tables in
|
||||
early-startup contexts). The validator is a best-effort layer -- when
|
||||
the schema is unknown we let the request through and rely on the
|
||||
downstream SQL layer for safety.
|
||||
"""
|
||||
modelClass = MODEL_REGISTRY.get(tableName)
|
||||
if modelClass is None:
|
||||
return None
|
||||
return {
|
||||
name: info.annotation for name, info in modelClass.model_fields.items()
|
||||
}
|
||||
|
||||
|
||||
def _suggestFieldName(badName: str, modelFields: Dict[str, Any]) -> Optional[str]:
|
||||
"""Return the closest valid field name, or None if nothing reasonable."""
|
||||
if not badName or not modelFields:
|
||||
return None
|
||||
matches = difflib.get_close_matches(badName, list(modelFields.keys()), n=1, cutoff=0.6)
|
||||
return matches[0] if matches else None
|
||||
|
||||
|
||||
def _isPlainNone(value: Any) -> bool:
|
||||
"""LLMs sometimes pass the literal string 'None' -- treat both as None."""
|
||||
return value is None or (isinstance(value, str) and value.strip().lower() == "none")
|
||||
|
||||
|
||||
def _unwrapAnnotation(annotation: Any) -> Tuple[Any, ...]:
|
||||
"""Flatten Optional/Union annotations into their constituent types."""
|
||||
origin = typing.get_origin(annotation)
|
||||
if origin is None:
|
||||
return (annotation,)
|
||||
return tuple(a for a in typing.get_args(annotation) if a is not type(None))
|
||||
|
||||
|
||||
def _isStringAnnotation(annotation: Any) -> bool:
|
||||
return any(a is str for a in _unwrapAnnotation(annotation))
|
||||
|
||||
|
||||
def _isNumericAnnotation(annotation: Any) -> bool:
|
||||
numericTypes = (int, float)
|
||||
return any(a in numericTypes for a in _unwrapAnnotation(annotation))
|
||||
|
||||
|
||||
def _isComparableAnnotation(annotation: Any) -> bool:
|
||||
"""Numeric types are the comparable shape we see in feature tables.
|
||||
|
||||
Booleans count as int in Python's type hierarchy but the comparison
|
||||
operators ``>``/``<`` on bool columns are almost never meaningful, so we
|
||||
treat bool as non-comparable for validator purposes.
|
||||
"""
|
||||
for a in _unwrapAnnotation(annotation):
|
||||
if a is bool:
|
||||
continue
|
||||
if a in (int, float):
|
||||
return True
|
||||
return False
|
||||
|
|
@ -98,14 +98,17 @@ class _VirtualFS:
|
|||
|
||||
def _makeReadFile(services):
|
||||
"""Create a readFile(fileId) closure bound to the current services context."""
|
||||
def readFile(fileId: str) -> str:
|
||||
def readFile(fileId: str, encoding: str = "utf-8") -> str:
|
||||
mgmt = getattr(services, 'interfaceDbComponent', None) if services else None
|
||||
if not mgmt:
|
||||
raise RuntimeError("readFile: no file store available in this session")
|
||||
data = mgmt.getFileData(str(fileId))
|
||||
if data is None:
|
||||
raise FileNotFoundError(f"File '{fileId}' not found in workspace")
|
||||
return data.decode("utf-8")
|
||||
try:
|
||||
return data.decode(encoding)
|
||||
except (UnicodeDecodeError, LookupError):
|
||||
return data.decode("utf-8", errors="replace")
|
||||
return readFile
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ class ToolRegistry:
|
|||
def register(self, name: str, handler: Callable[..., Awaitable[ToolResult]],
|
||||
description: str = "", parameters: Dict[str, Any] = None,
|
||||
readOnly: bool = False, featureType: str = None,
|
||||
toolSet: str = None):
|
||||
toolSet: str = None, displayLabel: str = None):
|
||||
"""Register a tool with its handler function."""
|
||||
if name in self._tools:
|
||||
logger.warning(f"Tool '{name}' already registered, overwriting")
|
||||
|
|
@ -31,6 +31,7 @@ class ToolRegistry:
|
|||
self._tools[name] = ToolDefinition(
|
||||
name=name,
|
||||
description=description,
|
||||
displayLabel=displayLabel,
|
||||
parameters=parameters or {},
|
||||
readOnly=readOnly,
|
||||
featureType=featureType,
|
||||
|
|
|
|||
|
|
@ -567,11 +567,14 @@ mit Web-Recherche, E-Mail-Versand, Dokumenten-Erzeugung und Datenquellen-Zugriff
|
|||
|
||||
Setze "needsAgent": true und "agentReason": "<kurze Beschreibung der Aufgabe in einem Satz>"
|
||||
WENN die Aufgabe eines oder mehrere dieser Merkmale hat:
|
||||
- Recherche im Internet noetig (z.B. "recherchier was im Internet ueber XY", "schau mal nach", "google das")
|
||||
- E-Mail an Teilnehmer/Kontakte versenden
|
||||
- Dokument (PDF, Word, Excel) generieren oder im SharePoint/Drive ablegen
|
||||
- Mehrere Schritte oder Tool-Aufrufe noetig (Zusammenfassung + Versand, Recherche + Empfehlung etc.)
|
||||
- Daten aus externen Quellen abrufen (Outlook-Kontakte, SharePoint-Dateien, Kalender etc.)
|
||||
- Recherche im Internet oder aktuelle Informationen noetig
|
||||
- Informationen beschaffen die du NICHT im Transkript oder in deinem Vorwissen hast
|
||||
- E-Mail versenden
|
||||
- Dokument generieren oder in einer Datenquelle ablegen
|
||||
- Mehrere Schritte oder Tool-Aufrufe noetig
|
||||
- Daten aus externen Quellen abrufen
|
||||
|
||||
Wenn du den gewuenschten Inhalt nicht selbst liefern kannst, setze needsAgent=true.
|
||||
|
||||
Wenn needsAgent=true:
|
||||
- Setze shouldRespond=false (der Agent uebernimmt; du sprichst NICHT eigenstaendig).
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@ from modules.shared.jsonContinuation import getContexts
|
|||
from modules.shared.jsonUtils import buildContinuationContext, tryParseJson
|
||||
from modules.shared.jsonUtils import closeJsonStructures
|
||||
from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText
|
||||
from modules.shared.jsonUtils import extractJsonString, repairBrokenJson
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -447,7 +448,6 @@ class AiCallLooper:
|
|||
extracted = extractJsonString(contexts.completePart)
|
||||
parsed, parseErr, _ = tryParseJson(extracted)
|
||||
if parseErr is not None:
|
||||
from modules.shared.jsonUtils import repairBrokenJson
|
||||
repaired = repairBrokenJson(extracted)
|
||||
if repaired:
|
||||
parsed = repaired
|
||||
|
|
@ -470,9 +470,10 @@ class AiCallLooper:
|
|||
return useCase.finalResultHandler(
|
||||
result, normalized, extracted, debugPrefix, self.services
|
||||
)
|
||||
except Exception as e:
|
||||
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
||||
logger.warning(
|
||||
f"Iteration {iteration}: completePart not serializable after getContexts success: {e}"
|
||||
f"Iteration {iteration}: completePart not serializable after getContexts success: "
|
||||
f"{type(e).__name__}: {e}"
|
||||
)
|
||||
mergeFailCount += 1
|
||||
if mergeFailCount >= MAX_MERGE_FAILS:
|
||||
|
|
@ -491,6 +492,15 @@ class AiCallLooper:
|
|||
)
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Iteration {iteration}: unexpected error during completePart processing "
|
||||
f"(re-raising, NOT a pipeline-mismatch retry): {type(e).__name__}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogFinish(iterationOperationId, False)
|
||||
raise
|
||||
|
||||
elif contexts.jsonParsingSuccess and contexts.overlapContext != "":
|
||||
# JSON parseable but has cut point - CONTINUE to next iteration
|
||||
|
|
|
|||
|
|
@ -7,6 +7,9 @@ from .mainBackgroundJobService import (
|
|||
startJob,
|
||||
getJobStatus,
|
||||
listJobs,
|
||||
cancelJob,
|
||||
cancelJobsByConnection,
|
||||
isTerminalStatus,
|
||||
JobProgressCallback,
|
||||
)
|
||||
|
||||
|
|
@ -15,5 +18,8 @@ __all__ = [
|
|||
"startJob",
|
||||
"getJobStatus",
|
||||
"listJobs",
|
||||
"cancelJob",
|
||||
"cancelJobsByConnection",
|
||||
"isTerminalStatus",
|
||||
"JobProgressCallback",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -30,10 +30,11 @@ clear message. No silent zombies.
|
|||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Awaitable, Callable, Dict, List, Optional
|
||||
|
||||
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
||||
from modules.connectors.connectorDbPostgre import DatabaseConnector, getCachedConnector
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.shared.dbRegistry import registerDatabase
|
||||
from modules.datamodels.datamodelBackgroundJob import (
|
||||
|
|
@ -49,7 +50,46 @@ JOBS_DATABASE = APP_CONFIG.get("DB_DATABASE", "poweron_app")
|
|||
registerDatabase(JOBS_DATABASE)
|
||||
|
||||
|
||||
JobProgressCallback = Callable[[int, Optional[str]], None]
|
||||
_CANCEL_CHECK_INTERVAL_S = 3.0
|
||||
|
||||
|
||||
class JobProgressCallback:
|
||||
"""Callable progress reporter with cooperative cancel-check for long-running walkers."""
|
||||
|
||||
def __init__(self, jobId: str):
|
||||
self._jobId = jobId
|
||||
self._cancelledCache: Optional[bool] = None
|
||||
self._lastCheckedAt: float = 0.0
|
||||
|
||||
def __call__(self, progress: int, message: Optional[str] = None) -> None:
|
||||
try:
|
||||
clamped = max(0, min(100, int(progress)))
|
||||
fields: Dict[str, Any] = {"progress": clamped}
|
||||
if message is not None:
|
||||
fields["progressMessage"] = message[:500]
|
||||
_updateJob(self._jobId, fields)
|
||||
except Exception as ex:
|
||||
logger.warning("Progress update failed for job %s: %s", self._jobId, ex)
|
||||
|
||||
def isCancelled(self) -> bool:
|
||||
"""Check if this job was cancelled. Reads DB at most every 3s to limit load."""
|
||||
now = time.time()
|
||||
if self._cancelledCache is True:
|
||||
return True
|
||||
if now - self._lastCheckedAt < _CANCEL_CHECK_INTERVAL_S:
|
||||
return self._cancelledCache or False
|
||||
self._lastCheckedAt = now
|
||||
try:
|
||||
job = _loadJob(self._jobId)
|
||||
if job and job.get("status") == BackgroundJobStatusEnum.CANCELLED.value:
|
||||
self._cancelledCache = True
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
self._cancelledCache = False
|
||||
return False
|
||||
|
||||
|
||||
JobHandler = Callable[[Dict[str, Any], JobProgressCallback], Awaitable[Optional[Dict[str, Any]]]]
|
||||
|
||||
|
||||
|
|
@ -64,7 +104,13 @@ def registerJobHandler(jobType: str, handler: JobHandler) -> None:
|
|||
|
||||
|
||||
def _getDb() -> DatabaseConnector:
|
||||
return DatabaseConnector(
|
||||
"""Return the shared cached connector for the jobs DB.
|
||||
|
||||
Reuses the same connector across all job CRUD calls instead of opening a
|
||||
fresh psycopg2 connection (and re-running `_create_database_if_not_exists`
|
||||
+ `_create_tables` + `_initializeSystemTable`) on every operation.
|
||||
"""
|
||||
return getCachedConnector(
|
||||
dbDatabase=JOBS_DATABASE,
|
||||
dbHost=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||
dbPort=int(APP_CONFIG.get("DB_PORT", "5432")),
|
||||
|
|
@ -155,16 +201,7 @@ def _markError(jobId: str, errorMessage: str) -> None:
|
|||
|
||||
|
||||
def _makeProgressCallback(jobId: str) -> JobProgressCallback:
|
||||
def _cb(progress: int, message: Optional[str] = None) -> None:
|
||||
try:
|
||||
clamped = max(0, min(100, int(progress)))
|
||||
fields: Dict[str, Any] = {"progress": clamped}
|
||||
if message is not None:
|
||||
fields["progressMessage"] = message[:500]
|
||||
_updateJob(jobId, fields)
|
||||
except Exception as ex:
|
||||
logger.warning("Progress update failed for job %s: %s", jobId, ex)
|
||||
return _cb
|
||||
return JobProgressCallback(jobId)
|
||||
|
||||
|
||||
async def _runJob(jobId: str) -> None:
|
||||
|
|
@ -220,12 +257,51 @@ def isTerminalStatus(status: str) -> bool:
|
|||
return status in {s.value for s in TERMINAL_JOB_STATUSES}
|
||||
|
||||
|
||||
def cancelJob(jobId: str, *, reason: str = "user_requested") -> bool:
|
||||
"""Mark a job as CANCELLED. Walkers detect this via JobProgressCallback.isCancelled().
|
||||
|
||||
Returns False if the job is already in a terminal state or does not exist.
|
||||
"""
|
||||
job = _loadJob(jobId)
|
||||
if not job:
|
||||
return False
|
||||
if isTerminalStatus(job.get("status", "")):
|
||||
return False
|
||||
_updateJob(jobId, {
|
||||
"status": BackgroundJobStatusEnum.CANCELLED.value,
|
||||
"errorMessage": f"cancelled: {reason}"[:1000],
|
||||
"finishedAt": datetime.now(timezone.utc).timestamp(),
|
||||
})
|
||||
logger.info("BackgroundJob %s cancelled (reason=%s)", jobId, reason)
|
||||
return True
|
||||
|
||||
|
||||
def cancelJobsByConnection(connectionId: str, *, jobType: str = "connection.bootstrap") -> int:
|
||||
"""Cancel all RUNNING/PENDING jobs whose payload.connectionId matches.
|
||||
|
||||
Returns count of jobs marked as cancelled.
|
||||
"""
|
||||
db = _getDb()
|
||||
rows = db.getRecordset(BackgroundJob, recordFilter={"jobType": jobType})
|
||||
count = 0
|
||||
for row in rows:
|
||||
status = row.get("status", "")
|
||||
if status not in (BackgroundJobStatusEnum.PENDING.value, BackgroundJobStatusEnum.RUNNING.value):
|
||||
continue
|
||||
payload = row.get("payload") or {}
|
||||
if payload.get("connectionId") == connectionId:
|
||||
if cancelJob(row["id"], reason=f"connection_stop:{connectionId[:8]}"):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def recoverInterruptedJobs() -> int:
|
||||
"""Flip any RUNNING jobs to ERROR (called at worker boot).
|
||||
|
||||
A RUNNING job in the DB after process restart means the previous worker
|
||||
died mid-execution; the asyncio task is gone and the job will never
|
||||
finish on its own.
|
||||
finish on its own. The daily scheduler or manual "Neu indexieren"
|
||||
button handles retry — no automatic re-queue to avoid infinite loops.
|
||||
"""
|
||||
db = _getDb()
|
||||
try:
|
||||
|
|
@ -243,3 +319,61 @@ def recoverInterruptedJobs() -> int:
|
|||
if count:
|
||||
logger.warning("Recovered %d interrupted background job(s) after restart", count)
|
||||
return count
|
||||
|
||||
|
||||
_ZOMBIE_MAX_AGE_SECONDS = 30 * 60
|
||||
|
||||
|
||||
def killZombieJobs(maxAgeSeconds: int = _ZOMBIE_MAX_AGE_SECONDS) -> int:
|
||||
"""Kill RUNNING jobs that have not been updated within `maxAgeSeconds`.
|
||||
|
||||
Detects walkers that are stuck in a sync call without progress updates.
|
||||
A live job updates progress at least every few seconds via JobProgressCallback.
|
||||
Anything older than maxAgeSeconds without finishing is considered hung.
|
||||
"""
|
||||
db = _getDb()
|
||||
try:
|
||||
rows = db.getRecordset(BackgroundJob, recordFilter={"status": BackgroundJobStatusEnum.RUNNING.value})
|
||||
except Exception as ex:
|
||||
logger.warning("killZombieJobs: failed to scan RUNNING jobs: %s", ex)
|
||||
return 0
|
||||
now = time.time()
|
||||
threshold = now - maxAgeSeconds
|
||||
count = 0
|
||||
for row in rows:
|
||||
started = row.get("startedAt") or row.get("createdAt")
|
||||
if not started or started > threshold:
|
||||
continue
|
||||
ageMin = (now - started) / 60
|
||||
try:
|
||||
_markError(row["id"], f"Zombie killed (stuck >{maxAgeSeconds // 60}min, no progress)")
|
||||
count += 1
|
||||
payload = row.get("payload") or {}
|
||||
logger.warning(
|
||||
"killZombieJobs: killed %s (type=%s connId=%s ageMin=%.1f)",
|
||||
row["id"], row.get("jobType"), payload.get("connectionId", "")[:12], ageMin,
|
||||
)
|
||||
except Exception as ex:
|
||||
logger.warning("killZombieJobs: could not kill %s: %s", row.get("id"), ex)
|
||||
return count
|
||||
|
||||
|
||||
def registerZombieKillerScheduler(*, intervalMinutes: int = 5) -> None:
|
||||
"""Register a recurring cron job that kills stuck RUNNING jobs.
|
||||
|
||||
Idempotent. Runs every `intervalMinutes` minutes.
|
||||
"""
|
||||
try:
|
||||
from modules.shared.eventManagement import eventManager
|
||||
|
||||
async def _runKiller():
|
||||
killZombieJobs()
|
||||
|
||||
eventManager.registerCron(
|
||||
jobId="background_jobs.zombie_killer",
|
||||
func=_runKiller,
|
||||
cronKwargs={"minute": f"*/{intervalMinutes}"},
|
||||
)
|
||||
logger.info("Zombie-killer scheduler registered (every %d min)", intervalMinutes)
|
||||
except Exception as ex:
|
||||
logger.warning("Zombie-killer scheduler registration failed (non-critical): %s", ex)
|
||||
|
|
|
|||
|
|
@ -532,8 +532,16 @@ class ChatService:
|
|||
self, connectionId: str, sourceType: str, path: str, label: str,
|
||||
featureInstanceId: str = None, displayPath: str = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Create a new external data source reference."""
|
||||
"""Create a new external data source reference.
|
||||
|
||||
Returns existing record if connectionId + path already exists (upsert semantics).
|
||||
"""
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
existing = self.interfaceDbApp.db.getRecordset(
|
||||
DataSource, recordFilter={"connectionId": connectionId, "path": path}
|
||||
)
|
||||
if existing:
|
||||
return existing[0] if isinstance(existing[0], dict) else existing[0].model_dump()
|
||||
ds = DataSource(
|
||||
connectionId=connectionId,
|
||||
sourceType=sourceType,
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ class ContainerExtractor(Extractor):
|
|||
"""Extract by recursively unpacking the container."""
|
||||
fileName = context.get("fileName", "archive")
|
||||
mimeType = context.get("mimeType", "application/octet-stream")
|
||||
cascadeDepth = context.get("_cascadeDepth", 0)
|
||||
|
||||
rootId = makeId()
|
||||
parts: List[ContentPart] = [
|
||||
|
|
@ -97,7 +98,7 @@ class ContainerExtractor(Extractor):
|
|||
parts.extend(lazy)
|
||||
return parts
|
||||
|
||||
state = {"totalSize": 0, "fileCount": 0}
|
||||
state = {"totalSize": 0, "fileCount": 0, "cascadeDepth": cascadeDepth}
|
||||
try:
|
||||
childParts = _resolveContainerRecursive(
|
||||
fileBytes, mimeType, fileName, rootId, "", 0, state
|
||||
|
|
@ -209,7 +210,12 @@ def _addFilePart(
|
|||
|
||||
if extractor and not isinstance(extractor, ContainerExtractor):
|
||||
try:
|
||||
childParts = extractor.extract(data, {"fileName": fileName, "mimeType": detectedMime})
|
||||
cascadeDepth = state.get("cascadeDepth", 0)
|
||||
childParts = extractor.extract(data, {
|
||||
"fileName": fileName,
|
||||
"mimeType": detectedMime,
|
||||
"_cascadeDepth": cascadeDepth + 1,
|
||||
})
|
||||
for part in childParts:
|
||||
part.parentId = parentId
|
||||
if not part.metadata:
|
||||
|
|
|
|||
|
|
@ -53,12 +53,13 @@ class EmailExtractor(Extractor):
|
|||
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
|
||||
fileName = context.get("fileName", "email")
|
||||
lower = (fileName or "").lower()
|
||||
depth = context.get("_cascadeDepth", 0)
|
||||
|
||||
if lower.endswith(".msg"):
|
||||
return self._extractMsg(fileBytes, fileName)
|
||||
return self._extractEml(fileBytes, fileName)
|
||||
return self._extractMsg(fileBytes, fileName, depth)
|
||||
return self._extractEml(fileBytes, fileName, depth)
|
||||
|
||||
def _extractEml(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
|
||||
def _extractEml(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]:
|
||||
"""Parse standard EML (RFC 822) using stdlib email."""
|
||||
rootId = makeId()
|
||||
parts: List[ContentPart] = []
|
||||
|
|
@ -91,7 +92,7 @@ class EmailExtractor(Extractor):
|
|||
attachName = part.get_filename() or "attachment"
|
||||
attachData = part.get_payload(decode=True)
|
||||
if attachData:
|
||||
parts.extend(_delegateAttachment(attachData, attachName, rootId))
|
||||
parts.extend(_delegateAttachment(attachData, attachName, rootId, depth))
|
||||
continue
|
||||
|
||||
if contentType == "text/plain":
|
||||
|
|
@ -113,7 +114,7 @@ class EmailExtractor(Extractor):
|
|||
|
||||
return parts
|
||||
|
||||
def _extractMsg(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
|
||||
def _extractMsg(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]:
|
||||
"""Parse Outlook MSG files using extract-msg (optional)."""
|
||||
rootId = makeId()
|
||||
parts: List[ContentPart] = []
|
||||
|
|
@ -179,7 +180,7 @@ class EmailExtractor(Extractor):
|
|||
attachName = getattr(attachment, "longFilename", None) or getattr(attachment, "shortFilename", None) or "attachment"
|
||||
attachData = getattr(attachment, "data", None)
|
||||
if attachData:
|
||||
parts.extend(_delegateAttachment(attachData, attachName, rootId))
|
||||
parts.extend(_delegateAttachment(attachData, attachName, rootId, depth))
|
||||
|
||||
try:
|
||||
msgFile.close()
|
||||
|
|
@ -199,18 +200,39 @@ def _buildHeaderText(msg) -> str:
|
|||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _delegateAttachment(attachData: bytes, attachName: str, parentId: str) -> List[ContentPart]:
|
||||
"""Delegate an attachment to the appropriate type-specific extractor."""
|
||||
_MAX_CASCADE_DEPTH = 10
|
||||
|
||||
def _delegateAttachment(attachData: bytes, attachName: str, parentId: str, depth: int = 0) -> List[ContentPart]:
|
||||
"""Delegate an attachment to the appropriate type-specific extractor.
|
||||
|
||||
Passes ``_cascadeDepth`` through the context so nested Email→Container→Email
|
||||
chains share a global depth counter and don't recurse infinitely.
|
||||
"""
|
||||
if depth >= _MAX_CASCADE_DEPTH:
|
||||
logger.warning(f"Cascade depth {depth} reached for {attachName}, skipping extraction")
|
||||
import base64
|
||||
encodedData = base64.b64encode(attachData).decode("utf-8") if attachData else ""
|
||||
return [ContentPart(
|
||||
id=makeId(), parentId=parentId, label=attachName,
|
||||
typeGroup="binary", mimeType="application/octet-stream",
|
||||
data=encodedData,
|
||||
metadata={"size": len(attachData), "emailAttachment": attachName, "cascadeDepthExceeded": True},
|
||||
)]
|
||||
|
||||
guessedMime, _ = mimetypes.guess_type(attachName)
|
||||
detectedMime = guessedMime or "application/octet-stream"
|
||||
|
||||
from ..subRegistry import ExtractorRegistry
|
||||
registry = ExtractorRegistry()
|
||||
from ..subRegistry import getExtractorRegistry
|
||||
registry = getExtractorRegistry()
|
||||
extractor = registry.resolve(detectedMime, attachName)
|
||||
|
||||
if extractor and not isinstance(extractor, EmailExtractor):
|
||||
if extractor:
|
||||
try:
|
||||
childParts = extractor.extract(attachData, {"fileName": attachName, "mimeType": detectedMime})
|
||||
childParts = extractor.extract(attachData, {
|
||||
"fileName": attachName,
|
||||
"mimeType": detectedMime,
|
||||
"_cascadeDepth": depth + 1,
|
||||
})
|
||||
for part in childParts:
|
||||
part.parentId = parentId
|
||||
if not part.metadata:
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ class ExtractionService:
|
|||
self._interfaceDbComponent = getComponentInterface(
|
||||
context.user,
|
||||
mandateId=context.mandate_id,
|
||||
featureInstanceId=context.feature_instance_id,
|
||||
)
|
||||
self._extractorRegistry = getExtractorRegistry()
|
||||
if ExtractionService._sharedChunkerRegistry is None:
|
||||
|
|
|
|||
|
|
@ -122,21 +122,54 @@ def _onConnectionRevoked(
|
|||
)
|
||||
|
||||
|
||||
_SOURCE_TYPE_MAP = {
|
||||
"msft": {
|
||||
"sharepoint": ("sharepointFolder", "onedriveFolder"),
|
||||
"outlook": ("outlookFolder", "calendarFolder", "contactFolder"),
|
||||
},
|
||||
"google": {
|
||||
"drive": ("googleDriveFolder",),
|
||||
"gmail": ("gmailFolder",),
|
||||
},
|
||||
"clickup": {
|
||||
"clickup": ("clickupList", "clickup"),
|
||||
},
|
||||
"infomaniak": {
|
||||
"kdrive": ("kdriveFolder", "infomaniak"),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list] = None):
|
||||
"""Load DataSource rows with ragIndexEnabled=true for a connection.
|
||||
|
||||
If dataSourceIds is provided (mini-bootstrap), filter to only those IDs.
|
||||
"""
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.datamodels.datamodelDataSource import DataSource
|
||||
|
||||
rootIf = getRootInterface()
|
||||
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||
if dataSourceIds:
|
||||
return [ds for ds in allDs if ds.get("id") in dataSourceIds and ds.get("ragIndexEnabled")]
|
||||
return [ds for ds in allDs if ds.get("ragIndexEnabled")]
|
||||
|
||||
|
||||
async def _bootstrapJobHandler(
|
||||
job: Dict[str, Any],
|
||||
progressCb,
|
||||
) -> Dict[str, Any]:
|
||||
"""Dispatch bootstrap by authority. Each authority runs its own sub-bootstraps."""
|
||||
"""Dispatch bootstrap by authority, iterating only over ragIndexEnabled DataSources."""
|
||||
payload = job.get("payload") or {}
|
||||
connectionId = payload.get("connectionId")
|
||||
authority = (payload.get("authority") or "").lower()
|
||||
dataSourceIds = payload.get("dataSourceIds")
|
||||
if not connectionId:
|
||||
raise ValueError("connection.bootstrap requires payload.connectionId")
|
||||
|
||||
progressCb(5, f"resolving {authority} connection")
|
||||
|
||||
# Defensive consent check: if the connection has since disabled knowledge ingestion
|
||||
# (e.g. user toggled setting after the job was enqueued), skip all walkers.
|
||||
# Defensive consent check
|
||||
try:
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
_root = getRootInterface()
|
||||
|
|
@ -156,6 +189,21 @@ async def _bootstrapJobHandler(
|
|||
except Exception as _guardErr:
|
||||
logger.debug("Could not load connection for consent guard: %s", _guardErr)
|
||||
|
||||
# Load only ragIndexEnabled DataSources for this connection
|
||||
dataSources = _loadRagEnabledDataSources(connectionId, dataSourceIds)
|
||||
if not dataSources:
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.skipped — no rag-enabled DataSources connectionId=%s",
|
||||
connectionId,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.skipped",
|
||||
"connectionId": connectionId,
|
||||
"authority": authority,
|
||||
"reason": "no_data_sources",
|
||||
},
|
||||
)
|
||||
return {"connectionId": connectionId, "authority": authority, "skipped": True, "reason": "no_data_sources"}
|
||||
|
||||
def _normalize(res: Any, label: str) -> Dict[str, Any]:
|
||||
if isinstance(res, Exception):
|
||||
logger.error(
|
||||
|
|
@ -165,6 +213,10 @@ async def _bootstrapJobHandler(
|
|||
return {"error": str(res)}
|
||||
return res or {}
|
||||
|
||||
def _filterDs(walkerKey: str) -> list:
|
||||
sourceTypes = _SOURCE_TYPE_MAP.get(authority, {}).get(walkerKey, ())
|
||||
return [ds for ds in dataSources if ds.get("sourceType") in sourceTypes]
|
||||
|
||||
if authority == "msft":
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
|
||||
bootstrapSharepoint,
|
||||
|
|
@ -173,10 +225,15 @@ async def _bootstrapJobHandler(
|
|||
bootstrapOutlook,
|
||||
)
|
||||
|
||||
progressCb(10, "sharepoint + outlook")
|
||||
progressCb(0, "Synchronisierung läuft...")
|
||||
spDs = _filterDs("sharepoint")
|
||||
olDs = _filterDs("outlook")
|
||||
async def _noopResult():
|
||||
return {"skipped": True, "reason": "no_datasources"}
|
||||
|
||||
spResult, olResult = await asyncio.gather(
|
||||
bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb),
|
||||
bootstrapOutlook(connectionId=connectionId, progressCb=progressCb),
|
||||
bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb, dataSources=spDs) if spDs else _noopResult(),
|
||||
bootstrapOutlook(connectionId=connectionId, progressCb=progressCb, dataSources=olDs) if olDs else _noopResult(),
|
||||
return_exceptions=True,
|
||||
)
|
||||
return {
|
||||
|
|
@ -194,10 +251,15 @@ async def _bootstrapJobHandler(
|
|||
bootstrapGmail,
|
||||
)
|
||||
|
||||
progressCb(10, "drive + gmail")
|
||||
progressCb(0, "Synchronisierung läuft...")
|
||||
gdDs = _filterDs("drive")
|
||||
gmDs = _filterDs("gmail")
|
||||
async def _noopResult():
|
||||
return {"skipped": True, "reason": "no_datasources"}
|
||||
|
||||
gdResult, gmResult = await asyncio.gather(
|
||||
bootstrapGdrive(connectionId=connectionId, progressCb=progressCb),
|
||||
bootstrapGmail(connectionId=connectionId, progressCb=progressCb),
|
||||
bootstrapGdrive(connectionId=connectionId, progressCb=progressCb, dataSources=gdDs) if gdDs else _noopResult(),
|
||||
bootstrapGmail(connectionId=connectionId, progressCb=progressCb, dataSources=gmDs) if gmDs else _noopResult(),
|
||||
return_exceptions=True,
|
||||
)
|
||||
return {
|
||||
|
|
@ -212,14 +274,29 @@ async def _bootstrapJobHandler(
|
|||
bootstrapClickup,
|
||||
)
|
||||
|
||||
progressCb(10, "clickup tasks")
|
||||
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb)
|
||||
progressCb(0, "Synchronisierung läuft...")
|
||||
cuDs = _filterDs("clickup")
|
||||
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb, dataSources=cuDs) if cuDs else {"skipped": True, "reason": "no_datasources"}
|
||||
return {
|
||||
"connectionId": connectionId,
|
||||
"authority": authority,
|
||||
"clickup": _normalize(cuResult, "clickup"),
|
||||
}
|
||||
|
||||
if authority == "infomaniak":
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncKdrive import (
|
||||
bootstrapKdrive,
|
||||
)
|
||||
|
||||
progressCb(0, "Synchronisierung läuft...")
|
||||
kdDs = _filterDs("kdrive")
|
||||
kdResult = await bootstrapKdrive(connectionId=connectionId, progressCb=progressCb, dataSources=kdDs) if kdDs else {"skipped": True, "reason": "no_datasources"}
|
||||
return {
|
||||
"connectionId": connectionId,
|
||||
"authority": authority,
|
||||
"kdrive": _normalize(kdResult, "kdrive"),
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.skipped reason=unsupported_authority authority=%s connectionId=%s",
|
||||
authority, connectionId,
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ is None).
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -21,10 +21,11 @@ _DEFAULT_CLICKUP_SCOPE = "title_description"
|
|||
|
||||
@dataclass
|
||||
class ConnectionIngestionPrefs:
|
||||
"""Parsed per-connection preferences for knowledge ingestion walkers."""
|
||||
"""Parsed per-connection preferences for knowledge ingestion walkers.
|
||||
|
||||
# PII
|
||||
neutralizeBeforeEmbed: bool = False
|
||||
Neutralization is now controlled per DataSource.neutralize (not here).
|
||||
Surface toggles are obsolete — walker iterates only over ragIndexEnabled DataSources.
|
||||
"""
|
||||
|
||||
# Mail (Outlook + Gmail)
|
||||
mailContentDepth: str = _DEFAULT_MAIL_DEPTH # "metadata" | "snippet" | "full"
|
||||
|
|
@ -32,18 +33,11 @@ class ConnectionIngestionPrefs:
|
|||
|
||||
# Files (Drive / SharePoint / OneDrive)
|
||||
filesIndexBinaries: bool = True
|
||||
mimeAllowlist: List[str] = field(default_factory=list) # empty = all allowed
|
||||
|
||||
# ClickUp
|
||||
clickupScope: str = _DEFAULT_CLICKUP_SCOPE # "titles" | "title_description" | "with_comments"
|
||||
clickupIndexAttachments: bool = False
|
||||
|
||||
# Per-authority surface toggles (default everything on)
|
||||
gmailEnabled: bool = True
|
||||
driveEnabled: bool = True
|
||||
sharepointEnabled: bool = True
|
||||
outlookEnabled: bool = True
|
||||
|
||||
# Time window
|
||||
maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS # 0 = no limit
|
||||
|
||||
|
|
@ -78,22 +72,12 @@ def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs:
|
|||
v = raw.get(key)
|
||||
return int(v) if isinstance(v, int) else default
|
||||
|
||||
surface = raw.get("surfaceToggles") or {}
|
||||
google_surf = surface.get("google") or {}
|
||||
msft_surf = surface.get("msft") or {}
|
||||
|
||||
return ConnectionIngestionPrefs(
|
||||
neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False),
|
||||
mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH),
|
||||
mailIndexAttachments=_bool("mailIndexAttachments", False),
|
||||
filesIndexBinaries=_bool("filesIndexBinaries", True),
|
||||
mimeAllowlist=list(raw.get("mimeAllowlist") or []),
|
||||
clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE),
|
||||
clickupIndexAttachments=_bool("clickupIndexAttachments", False),
|
||||
gmailEnabled=bool(google_surf.get("gmail", True)),
|
||||
driveEnabled=bool(google_surf.get("drive", True)),
|
||||
sharepointEnabled=bool(msft_surf.get("sharepoint", True)),
|
||||
outlookEnabled=bool(msft_surf.get("outlook", True)),
|
||||
maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS),
|
||||
)
|
||||
except Exception as exc:
|
||||
|
|
|
|||
|
|
@ -23,7 +23,13 @@ import logging
|
|||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
||||
WalkerTimeout,
|
||||
ingestWithTimeout,
|
||||
logItemStart,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -150,8 +156,6 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -
|
|||
"data": description,
|
||||
"contextRef": {"part": "description"},
|
||||
})
|
||||
# text_content is ClickUp's rendered-markdown version; include if it adds
|
||||
# something beyond the plain description (common for bullet lists, checklists).
|
||||
textContent = _truncate(task.get("text_content"), limits.maxDescriptionChars)
|
||||
if textContent and textContent != description:
|
||||
parts.append({
|
||||
|
|
@ -166,33 +170,35 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -
|
|||
async def bootstrapClickup(
|
||||
connectionId: str,
|
||||
*,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||
dataSources: Optional[List[Dict[str, Any]]] = None,
|
||||
progressCb: Optional[Any] = None,
|
||||
adapter: Any = None,
|
||||
connection: Any = None,
|
||||
knowledgeService: Any = None,
|
||||
limits: Optional[ClickupBootstrapLimits] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Walk workspaces → lists → tasks and ingest each task as a virtual doc."""
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||
prefs = loadConnectionPrefs(connectionId)
|
||||
"""Walk workspaces → lists → tasks and ingest each task as a virtual doc.
|
||||
|
||||
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
|
||||
Each DataSource defines the neutralize policy for its subtree.
|
||||
"""
|
||||
if not dataSources:
|
||||
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
|
||||
|
||||
if not limits:
|
||||
limits = ClickupBootstrapLimits(
|
||||
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
||||
neutralize=prefs.neutralizeBeforeEmbed,
|
||||
clickupScope=prefs.clickupScope,
|
||||
)
|
||||
limits = ClickupBootstrapLimits()
|
||||
|
||||
startMs = time.time()
|
||||
result = ClickupBootstrapResult(connectionId=connectionId)
|
||||
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.started part=clickup connectionId=%s",
|
||||
connectionId,
|
||||
"ingestion.connection.bootstrap.started part=clickup connectionId=%s dataSources=%d",
|
||||
connectionId, len(dataSources),
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.started",
|
||||
"part": "clickup",
|
||||
"connectionId": connectionId,
|
||||
"dataSourceCount": len(dataSources),
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -215,30 +221,56 @@ async def bootstrapClickup(
|
|||
return _finalizeResult(connectionId, result, startMs)
|
||||
|
||||
teams = (teamsResp or {}).get("teams") or []
|
||||
for team in teams[: limits.maxWorkspaces]:
|
||||
|
||||
cancelled = False
|
||||
for ds in dataSources:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||
break
|
||||
teamId = str(team.get("id", "") or "")
|
||||
if not teamId:
|
||||
continue
|
||||
result.workspaces += 1
|
||||
try:
|
||||
await _walkTeam(
|
||||
svc=svc,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
team=team,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
|
||||
result.errors.append(f"team({teamId}): {exc}")
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
cancelled = True
|
||||
break
|
||||
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
dsId = ds.get("id", "")
|
||||
dsNeutralize = ds.get("neutralize", False)
|
||||
dsLimits = ClickupBootstrapLimits(
|
||||
maxTasks=limits.maxTasks,
|
||||
maxWorkspaces=limits.maxWorkspaces,
|
||||
maxListsPerWorkspace=limits.maxListsPerWorkspace,
|
||||
maxDescriptionChars=limits.maxDescriptionChars,
|
||||
maxAgeDays=limits.maxAgeDays,
|
||||
includeClosed=limits.includeClosed,
|
||||
neutralize=dsNeutralize,
|
||||
clickupScope=limits.clickupScope,
|
||||
)
|
||||
|
||||
for team in teams[:dsLimits.maxWorkspaces]:
|
||||
if result.indexed + result.skippedDuplicate >= dsLimits.maxTasks:
|
||||
break
|
||||
teamId = str(team.get("id", "") or "")
|
||||
if not teamId:
|
||||
continue
|
||||
result.workspaces += 1
|
||||
try:
|
||||
await _walkTeam(
|
||||
svc=svc,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
team=team,
|
||||
limits=dsLimits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dsId,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
|
||||
result.errors.append(f"team({teamId}): {exc}")
|
||||
|
||||
finalResult = _finalizeResult(connectionId, result, startMs)
|
||||
if cancelled:
|
||||
finalResult["cancelled"] = True
|
||||
return finalResult
|
||||
|
||||
|
||||
async def _resolveDependencies(connectionId: str):
|
||||
|
|
@ -280,8 +312,12 @@ async def _walkTeam(
|
|||
team: Dict[str, Any],
|
||||
limits: ClickupBootstrapLimits,
|
||||
result: ClickupBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
progressCb: Optional[Any],
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
return
|
||||
|
||||
teamId = str(team.get("id", "") or "")
|
||||
spacesResp = await svc.getSpaces(teamId)
|
||||
spaces = (spacesResp or {}).get("spaces") or []
|
||||
|
|
@ -294,14 +330,12 @@ async def _walkTeam(
|
|||
if not spaceId:
|
||||
continue
|
||||
|
||||
# Folderless lists directly under the space
|
||||
folderless = await svc.getFolderlessLists(spaceId)
|
||||
for lst in (folderless or {}).get("lists") or []:
|
||||
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
||||
break
|
||||
listsCollected.append({**lst, "_space": space})
|
||||
|
||||
# Lists inside folders
|
||||
foldersResp = await svc.getFolders(spaceId)
|
||||
for folder in (foldersResp or {}).get("folders") or []:
|
||||
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
||||
|
|
@ -318,6 +352,8 @@ async def _walkTeam(
|
|||
for lst in listsCollected:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||
return
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
return
|
||||
result.lists += 1
|
||||
await _walkList(
|
||||
svc=svc,
|
||||
|
|
@ -330,6 +366,7 @@ async def _walkTeam(
|
|||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dataSourceId,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -344,13 +381,16 @@ async def _walkList(
|
|||
lst: Dict[str, Any],
|
||||
limits: ClickupBootstrapLimits,
|
||||
result: ClickupBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
progressCb: Optional[Any],
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
listId = str(lst.get("id", "") or "")
|
||||
if not listId:
|
||||
return
|
||||
page = 0
|
||||
while result.indexed + result.skippedDuplicate < limits.maxTasks:
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
return
|
||||
resp = await svc.getTasksInList(
|
||||
listId,
|
||||
page=page,
|
||||
|
|
@ -371,7 +411,6 @@ async def _walkList(
|
|||
if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
# Inject the list/folder/space metadata we already loaded.
|
||||
task["list"] = task.get("list") or {"id": listId, "name": lst.get("name")}
|
||||
task["folder"] = task.get("folder") or lst.get("_folder") or {}
|
||||
task["space"] = task.get("space") or lst.get("_space") or {}
|
||||
|
|
@ -385,9 +424,10 @@ async def _walkList(
|
|||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dataSourceId,
|
||||
)
|
||||
|
||||
if len(tasks) < 100: # ClickUp page-size hint: fewer than 100 => last page
|
||||
if len(tasks) < 100:
|
||||
return
|
||||
page += 1
|
||||
|
||||
|
|
@ -402,7 +442,8 @@ async def _ingestTask(
|
|||
task: Dict[str, Any],
|
||||
limits: ClickupBootstrapLimits,
|
||||
result: ClickupBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
progressCb: Optional[Any],
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
|
|
@ -414,35 +455,44 @@ async def _ingestTask(
|
|||
name = task.get("name") or f"Task {taskId}"
|
||||
syntheticId = _syntheticTaskId(connectionId, taskId)
|
||||
fileName = f"{name[:80].strip() or taskId}.task.json"
|
||||
logItemStart("clickup", f"{teamId}/{taskId}")
|
||||
|
||||
contentObjects = _buildContentObjects(task, limits)
|
||||
|
||||
try:
|
||||
handle = await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="clickup_task",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType="application/vnd.clickup.task+json",
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=revision or None,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"authority": "clickup",
|
||||
"service": "clickup",
|
||||
"externalItemId": taskId,
|
||||
"teamId": teamId,
|
||||
"listId": ((task.get("list") or {}).get("id")),
|
||||
"spaceId": ((task.get("space") or {}).get("id")),
|
||||
"url": task.get("url"),
|
||||
"status": ((task.get("status") or {}).get("status")),
|
||||
"tier": limits.clickupScope,
|
||||
},
|
||||
)
|
||||
handle = await ingestWithTimeout(
|
||||
knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="clickup_task",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType="application/vnd.clickup.task+json",
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=revision or None,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"dataSourceId": dataSourceId,
|
||||
"authority": "clickup",
|
||||
"service": "clickup",
|
||||
"externalItemId": taskId,
|
||||
"teamId": teamId,
|
||||
"listId": ((task.get("list") or {}).get("id")),
|
||||
"spaceId": ((task.get("space") or {}).get("id")),
|
||||
"url": task.get("url"),
|
||||
"status": ((task.get("status") or {}).get("status")),
|
||||
"tier": limits.clickupScope,
|
||||
},
|
||||
)
|
||||
),
|
||||
label=taskId,
|
||||
)
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.error("clickup ingestion %s failed: %s", taskId, exc, exc_info=True)
|
||||
result.failed += 1
|
||||
|
|
@ -456,17 +506,17 @@ async def _ingestTask(
|
|||
else:
|
||||
result.failed += 1
|
||||
|
||||
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
if progressCb is not None and processed % 5 == 0:
|
||||
if hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
return
|
||||
try:
|
||||
progressCb(
|
||||
min(90, 10 + int(80 * processed / max(1, limits.maxTasks))),
|
||||
f"clickup processed={processed}",
|
||||
)
|
||||
progressCb(0, f"{processed} Tasks verarbeitet, {result.indexed} indexiert")
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.progress part=clickup processed=%d skippedDup=%d failed=%d",
|
||||
if processed % 50 == 0:
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.progress part=clickup processed=%d skippedDup=%d failed=%d",
|
||||
processed, result.skippedDuplicate, result.failed,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.progress",
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ via export), runs the standard extraction pipeline and routes results through
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
import time
|
||||
|
|
@ -20,6 +21,13 @@ from datetime import datetime, timedelta, timezone
|
|||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
||||
WalkerTimeout,
|
||||
downloadWithTimeout,
|
||||
extractWithTimeout,
|
||||
ingestWithTimeout,
|
||||
logItemStart,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -30,7 +38,6 @@ SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
|||
MAX_DEPTH_DEFAULT = 4
|
||||
MAX_AGE_DAYS_DEFAULT = 365
|
||||
|
||||
# Google Drive uses virtual mime-types for folders and non-downloadable assets.
|
||||
FOLDER_MIME = "application/vnd.google-apps.folder"
|
||||
|
||||
|
||||
|
|
@ -41,12 +48,8 @@ class GdriveBootstrapLimits:
|
|||
maxFileSize: int = MAX_FILE_SIZE_DEFAULT
|
||||
skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
|
||||
maxDepth: int = MAX_DEPTH_DEFAULT
|
||||
# Only ingest files modified within the last N days. None disables filter.
|
||||
maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
|
||||
# Pass-through to IngestionJob.neutralize
|
||||
neutralize: bool = False
|
||||
# Whether to skip binary/non-text files
|
||||
filesIndexBinaries: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -95,10 +98,8 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
|
|||
if not maxAgeDays:
|
||||
return True
|
||||
if not modifiedIso:
|
||||
# No timestamp -> be permissive (Drive native docs sometimes omit it on export).
|
||||
return True
|
||||
try:
|
||||
# Google returns RFC 3339 with `Z` or offset; python 3.11+ parses both.
|
||||
ts = datetime.fromisoformat(modifiedIso.replace("Z", "+00:00"))
|
||||
except Exception:
|
||||
return True
|
||||
|
|
@ -111,34 +112,36 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
|
|||
async def bootstrapGdrive(
|
||||
connectionId: str,
|
||||
*,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||
dataSources: Optional[List[Dict[str, Any]]] = None,
|
||||
progressCb: Optional[Any] = None,
|
||||
adapter: Any = None,
|
||||
connection: Any = None,
|
||||
knowledgeService: Any = None,
|
||||
limits: Optional[GdriveBootstrapLimits] = None,
|
||||
runExtractionFn: Optional[Callable[..., Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Walk My Drive starting from the virtual root folder."""
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||
prefs = loadConnectionPrefs(connectionId)
|
||||
"""Walk My Drive starting from the virtual root folder.
|
||||
|
||||
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
|
||||
Each DataSource defines the root path + neutralize policy for its subtree.
|
||||
"""
|
||||
if not dataSources:
|
||||
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
|
||||
|
||||
if not limits:
|
||||
limits = GdriveBootstrapLimits(
|
||||
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
||||
neutralize=prefs.neutralizeBeforeEmbed,
|
||||
filesIndexBinaries=prefs.filesIndexBinaries,
|
||||
)
|
||||
limits = GdriveBootstrapLimits()
|
||||
|
||||
startMs = time.time()
|
||||
result = GdriveBootstrapResult(connectionId=connectionId)
|
||||
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.started part=gdrive connectionId=%s",
|
||||
connectionId,
|
||||
"ingestion.connection.bootstrap.started part=gdrive connectionId=%s dataSources=%d",
|
||||
connectionId, len(dataSources),
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.started",
|
||||
"part": "gdrive",
|
||||
"connectionId": connectionId,
|
||||
"dataSourceCount": len(dataSources),
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -158,25 +161,51 @@ async def bootstrapGdrive(
|
|||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||
|
||||
try:
|
||||
await _walkFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
runExtractionFn=runExtractionFn,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
folderPath="/", # DriveAdapter.browse maps "" / "/" -> "root"
|
||||
depth=0,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("gdrive walk failed for %s: %s", connectionId, exc, exc_info=True)
|
||||
result.errors.append(f"walk: {exc}")
|
||||
cancelled = False
|
||||
for ds in dataSources:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||
break
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
cancelled = True
|
||||
break
|
||||
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
dsPath = ds.get("path", "/")
|
||||
dsId = ds.get("id", "")
|
||||
dsNeutralize = ds.get("neutralize", False)
|
||||
dsMaxAgeDays = ds.get("maxAgeDays", limits.maxAgeDays)
|
||||
dsLimits = GdriveBootstrapLimits(
|
||||
maxItems=limits.maxItems,
|
||||
maxBytes=limits.maxBytes,
|
||||
maxFileSize=limits.maxFileSize,
|
||||
skipMimePrefixes=limits.skipMimePrefixes,
|
||||
maxDepth=limits.maxDepth,
|
||||
maxAgeDays=dsMaxAgeDays,
|
||||
neutralize=dsNeutralize,
|
||||
)
|
||||
|
||||
try:
|
||||
await _walkFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
runExtractionFn=runExtractionFn,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
folderPath=dsPath,
|
||||
depth=0,
|
||||
limits=dsLimits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dsId,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("gdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
|
||||
result.errors.append(f"walk({dsPath}): {exc}")
|
||||
|
||||
finalResult = _finalizeResult(connectionId, result, startMs)
|
||||
if cancelled:
|
||||
finalResult["cancelled"] = True
|
||||
return finalResult
|
||||
|
||||
|
||||
async def _resolveDependencies(connectionId: str):
|
||||
|
|
@ -220,10 +249,13 @@ async def _walkFolder(
|
|||
depth: int,
|
||||
limits: GdriveBootstrapLimits,
|
||||
result: GdriveBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
progressCb: Optional[Any],
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
if depth > limits.maxDepth:
|
||||
return
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
return
|
||||
try:
|
||||
entries = await adapter.browse(folderPath)
|
||||
except Exception as exc:
|
||||
|
|
@ -236,6 +268,8 @@ async def _walkFolder(
|
|||
return
|
||||
if result.bytesProcessed >= limits.maxBytes:
|
||||
return
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||
return
|
||||
|
||||
entryPath = getattr(entry, "path", "") or ""
|
||||
metadata = getattr(entry, "metadata", {}) or {}
|
||||
|
|
@ -254,6 +288,7 @@ async def _walkFolder(
|
|||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dataSourceId,
|
||||
)
|
||||
continue
|
||||
|
||||
|
|
@ -288,6 +323,7 @@ async def _walkFolder(
|
|||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dataSourceId,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -306,29 +342,35 @@ async def _ingestOne(
|
|||
revision: Optional[str],
|
||||
limits: GdriveBootstrapLimits,
|
||||
result: GdriveBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
progressCb: Optional[Any],
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
syntheticFileId = _syntheticFileId(connectionId, externalItemId)
|
||||
fileName = getattr(entry, "name", "") or externalItemId
|
||||
declaredSize = int(getattr(entry, "size", 0) or 0) or None
|
||||
logItemStart("gdrive", entryPath, sizeBytes=declaredSize, mime=mimeType)
|
||||
|
||||
try:
|
||||
downloaded = await adapter.download(entryPath)
|
||||
downloaded = await downloadWithTimeout(adapter.download(entryPath), label=entryPath)
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.warning("gdrive download %s failed: %s", entryPath, exc)
|
||||
result.failed += 1
|
||||
result.errors.append(f"download({entryPath}): {exc}")
|
||||
return
|
||||
|
||||
# Adapter.download returns raw bytes today; guard DownloadResult shape too.
|
||||
fileBytes: bytes
|
||||
if isinstance(downloaded, (bytes, bytearray)):
|
||||
fileBytes = bytes(downloaded)
|
||||
else:
|
||||
fileBytes = bytes(getattr(downloaded, "data", b"") or b"")
|
||||
if getattr(downloaded, "mimeType", None):
|
||||
mimeType = downloaded.mimeType # export may have changed the type
|
||||
mimeType = downloaded.mimeType
|
||||
if not fileBytes:
|
||||
result.failed += 1
|
||||
return
|
||||
|
|
@ -339,10 +381,16 @@ async def _ingestOne(
|
|||
result.bytesProcessed += len(fileBytes)
|
||||
|
||||
try:
|
||||
extracted = runExtractionFn(
|
||||
extracted = await extractWithTimeout(
|
||||
runExtractionFn,
|
||||
fileBytes, fileName, mimeType,
|
||||
ExtractionOptions(mergeStrategy=None),
|
||||
label=entryPath,
|
||||
)
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.warning("gdrive extraction %s failed: %s", entryPath, exc)
|
||||
result.failed += 1
|
||||
|
|
@ -354,28 +402,37 @@ async def _ingestOne(
|
|||
result.skippedPolicy += 1
|
||||
return
|
||||
|
||||
provenance: Dict[str, Any] = {
|
||||
"connectionId": connectionId,
|
||||
"dataSourceId": dataSourceId,
|
||||
"authority": "google",
|
||||
"service": "drive",
|
||||
"externalItemId": externalItemId,
|
||||
"entryPath": entryPath,
|
||||
"tier": "body",
|
||||
}
|
||||
try:
|
||||
handle = await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="gdrive_item",
|
||||
sourceId=syntheticFileId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=revision,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"authority": "google",
|
||||
"service": "drive",
|
||||
"externalItemId": externalItemId,
|
||||
"entryPath": entryPath,
|
||||
"tier": "body",
|
||||
},
|
||||
)
|
||||
handle = await ingestWithTimeout(
|
||||
knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="gdrive_item",
|
||||
sourceId=syntheticFileId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=revision,
|
||||
neutralize=limits.neutralize,
|
||||
provenance=provenance,
|
||||
)
|
||||
),
|
||||
label=entryPath,
|
||||
)
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.error("gdrive ingestion %s failed: %s", entryPath, exc, exc_info=True)
|
||||
result.failed += 1
|
||||
|
|
@ -388,14 +445,13 @@ async def _ingestOne(
|
|||
result.indexed += 1
|
||||
else:
|
||||
result.failed += 1
|
||||
if handle.error:
|
||||
result.errors.append(f"ingest({entryPath}): {handle.error}")
|
||||
|
||||
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
if progressCb is not None and processed % 5 == 0:
|
||||
try:
|
||||
progressCb(
|
||||
min(90, 10 + int(80 * processed / max(1, limits.maxItems))),
|
||||
f"gdrive processed={processed}",
|
||||
)
|
||||
progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert")
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
|
|
@ -411,6 +467,8 @@ async def _ingestOne(
|
|||
},
|
||||
)
|
||||
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||
durationMs = int((time.time() - startMs) * 1000)
|
||||
|
|
|
|||
|
|
@ -24,6 +24,11 @@ from datetime import datetime, timedelta, timezone
|
|||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
|
||||
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
||||
WalkerTimeout,
|
||||
ingestWithTimeout,
|
||||
logItemStart,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -175,35 +180,36 @@ def _buildContentObjects(
|
|||
async def bootstrapGmail(
|
||||
connectionId: str,
|
||||
*,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||
dataSources: Optional[List[Dict[str, Any]]] = None,
|
||||
progressCb: Optional[Any] = None,
|
||||
adapter: Any = None,
|
||||
connection: Any = None,
|
||||
knowledgeService: Any = None,
|
||||
limits: Optional[GmailBootstrapLimits] = None,
|
||||
googleGetFn: Optional[Callable[..., Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Enumerate Gmail labels (INBOX + SENT default) and ingest messages."""
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||
prefs = loadConnectionPrefs(connectionId)
|
||||
"""Enumerate Gmail labels (INBOX + SENT default) and ingest messages.
|
||||
|
||||
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
|
||||
Each DataSource defines the neutralize policy for its scope.
|
||||
"""
|
||||
if not dataSources:
|
||||
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
|
||||
|
||||
if not limits:
|
||||
limits = GmailBootstrapLimits(
|
||||
includeAttachments=prefs.mailIndexAttachments,
|
||||
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
||||
mailContentDepth=prefs.mailContentDepth,
|
||||
neutralize=prefs.neutralizeBeforeEmbed,
|
||||
)
|
||||
limits = GmailBootstrapLimits()
|
||||
|
||||
startMs = time.time()
|
||||
result = GmailBootstrapResult(connectionId=connectionId)
|
||||
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.started part=gmail connectionId=%s",
|
||||
connectionId,
|
||||
"ingestion.connection.bootstrap.started part=gmail connectionId=%s dataSources=%d",
|
||||
connectionId, len(dataSources),
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.started",
|
||||
"part": "gmail",
|
||||
"connectionId": connectionId,
|
||||
"dataSourceCount": len(dataSources),
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -221,26 +227,51 @@ async def bootstrapGmail(
|
|||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||
|
||||
for labelId in limits.labels:
|
||||
cancelled = False
|
||||
for ds in dataSources:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||
break
|
||||
try:
|
||||
await _ingestLabel(
|
||||
googleGetFn=googleGetFn,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
labelId=labelId,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
|
||||
result.errors.append(f"label({labelId}): {exc}")
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
cancelled = True
|
||||
break
|
||||
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
dsId = ds.get("id", "")
|
||||
dsNeutralize = ds.get("neutralize", False)
|
||||
dsLimits = GmailBootstrapLimits(
|
||||
maxMessages=limits.maxMessages,
|
||||
labels=limits.labels,
|
||||
maxBodyChars=limits.maxBodyChars,
|
||||
includeAttachments=limits.includeAttachments,
|
||||
maxAttachmentBytes=limits.maxAttachmentBytes,
|
||||
maxAgeDays=limits.maxAgeDays,
|
||||
mailContentDepth=limits.mailContentDepth,
|
||||
neutralize=dsNeutralize,
|
||||
)
|
||||
|
||||
for labelId in dsLimits.labels:
|
||||
if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages:
|
||||
break
|
||||
try:
|
||||
await _ingestLabel(
|
||||
googleGetFn=googleGetFn,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
labelId=labelId,
|
||||
limits=dsLimits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dsId,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
|
||||
result.errors.append(f"label({labelId}): {exc}")
|
||||
|
||||
finalResult = _finalizeResult(connectionId, result, startMs)
|
||||
if cancelled:
|
||||
finalResult["cancelled"] = True
|
||||
return finalResult
|
||||
|
||||
|
||||
async def _resolveDependencies(connectionId: str):
|
||||
|
|
@ -282,7 +313,8 @@ async def _ingestLabel(
|
|||
labelId: str,
|
||||
limits: GmailBootstrapLimits,
|
||||
result: GmailBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
progressCb: Optional[Any],
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
|
||||
if remaining <= 0:
|
||||
|
|
@ -316,6 +348,8 @@ async def _ingestLabel(
|
|||
for stub in messageStubs:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||
break
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||
return
|
||||
msgId = stub.get("id")
|
||||
if not msgId:
|
||||
continue
|
||||
|
|
@ -337,6 +371,7 @@ async def _ingestLabel(
|
|||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dataSourceId,
|
||||
)
|
||||
|
||||
nextPageToken = page.get("nextPageToken")
|
||||
|
|
@ -355,7 +390,8 @@ async def _ingestMessage(
|
|||
message: Dict[str, Any],
|
||||
limits: GmailBootstrapLimits,
|
||||
result: GmailBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
progressCb: Optional[Any],
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
|
|
@ -368,33 +404,42 @@ async def _ingestMessage(
|
|||
subject = headers.get("subject") or "(no subject)"
|
||||
syntheticId = _syntheticMessageId(connectionId, messageId)
|
||||
fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml"
|
||||
logItemStart("gmail", f"{labelId}/{messageId}", mime="message/rfc822")
|
||||
|
||||
contentObjects = _buildContentObjects(
|
||||
message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth
|
||||
)
|
||||
try:
|
||||
handle = await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="gmail_message",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType="message/rfc822",
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=str(revision) if revision else None,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"authority": "google",
|
||||
"service": "gmail",
|
||||
"externalItemId": messageId,
|
||||
"label": labelId,
|
||||
"threadId": message.get("threadId"),
|
||||
"tier": limits.mailContentDepth,
|
||||
},
|
||||
)
|
||||
handle = await ingestWithTimeout(
|
||||
knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="gmail_message",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType="message/rfc822",
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=str(revision) if revision else None,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"dataSourceId": dataSourceId,
|
||||
"authority": "google",
|
||||
"service": "gmail",
|
||||
"externalItemId": messageId,
|
||||
"label": labelId,
|
||||
"threadId": message.get("threadId"),
|
||||
"tier": limits.mailContentDepth,
|
||||
},
|
||||
)
|
||||
),
|
||||
label=messageId,
|
||||
)
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.error("gmail ingestion %s failed: %s", messageId, exc, exc_info=True)
|
||||
result.failed += 1
|
||||
|
|
@ -420,23 +465,22 @@ async def _ingestMessage(
|
|||
parentSyntheticId=syntheticId,
|
||||
limits=limits,
|
||||
result=result,
|
||||
dataSourceId=dataSourceId,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("gmail attachments %s failed: %s", messageId, exc)
|
||||
result.errors.append(f"attachments({messageId}): {exc}")
|
||||
|
||||
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
if progressCb is not None and processed % 5 == 0:
|
||||
try:
|
||||
progressCb(
|
||||
min(90, 10 + int(80 * processed / max(1, limits.maxMessages))),
|
||||
f"gmail processed={processed}",
|
||||
)
|
||||
progressCb(0, f"{processed} Mails verarbeitet, {result.indexed} indexiert")
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.progress part=gmail processed=%d skippedDup=%d failed=%d",
|
||||
processed, result.skippedDuplicate, result.failed,
|
||||
if processed % 50 == 0:
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.progress part=gmail processed=%d skippedDup=%d failed=%d",
|
||||
processed, result.skippedDuplicate, result.failed,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.progress",
|
||||
"part": "gmail",
|
||||
|
|
@ -461,6 +505,7 @@ async def _ingestAttachments(
|
|||
parentSyntheticId: str,
|
||||
limits: GmailBootstrapLimits,
|
||||
result: GmailBootstrapResult,
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
"""Child ingestion jobs for file attachments. Skips inline images (cid: refs)."""
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
|
@ -512,13 +557,26 @@ async def _ingestAttachments(
|
|||
fileName = stub["filename"]
|
||||
mimeType = stub["mimeType"]
|
||||
syntheticId = _syntheticAttachmentId(connectionId, messageId, stub["attachmentId"])
|
||||
attLabel = f"{messageId}/att:{stub['attachmentId']}/{fileName}"
|
||||
logItemStart("gmail-attachment", attLabel, sizeBytes=stub.get("size") or None, mime=mimeType)
|
||||
|
||||
try:
|
||||
extracted = runExtraction(
|
||||
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
||||
extractWithTimeout as _extractWithTimeout,
|
||||
)
|
||||
|
||||
def _runAttExtraction():
|
||||
return runExtraction(
|
||||
extractorRegistry, chunkerRegistry,
|
||||
rawBytes, fileName, mimeType,
|
||||
ExtractionOptions(mergeStrategy=None),
|
||||
)
|
||||
|
||||
try:
|
||||
extracted = await _extractWithTimeout(_runAttExtraction, label=attLabel)
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
continue
|
||||
except Exception as exc:
|
||||
logger.warning("gmail attachment extract %s failed: %s", stub["attachmentId"], exc)
|
||||
result.failed += 1
|
||||
|
|
@ -550,26 +608,33 @@ async def _ingestAttachments(
|
|||
continue
|
||||
|
||||
try:
|
||||
await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="gmail_attachment",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"authority": "google",
|
||||
"service": "gmail",
|
||||
"parentId": parentSyntheticId,
|
||||
"externalItemId": stub["attachmentId"],
|
||||
"parentMessageId": messageId,
|
||||
},
|
||||
)
|
||||
await ingestWithTimeout(
|
||||
knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="gmail_attachment",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"dataSourceId": dataSourceId,
|
||||
"authority": "google",
|
||||
"service": "gmail",
|
||||
"parentId": parentSyntheticId,
|
||||
"externalItemId": stub["attachmentId"],
|
||||
"parentMessageId": messageId,
|
||||
},
|
||||
)
|
||||
),
|
||||
label=attLabel,
|
||||
)
|
||||
result.attachmentsIndexed += 1
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
except Exception as exc:
|
||||
logger.warning("gmail attachment ingest %s failed: %s", stub["attachmentId"], exc)
|
||||
result.failed += 1
|
||||
|
|
|
|||
|
|
@ -0,0 +1,439 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""kDrive bootstrap for the unified knowledge ingestion lane.
|
||||
|
||||
Walks every ragIndexEnabled kDrive DataSource, downloads file items and
|
||||
hands them to KnowledgeService.requestIngestion. Idempotency is provided
|
||||
by the ingestion facade (content-hash dedup).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
||||
WalkerTimeout,
|
||||
downloadWithTimeout,
|
||||
extractWithTimeout,
|
||||
ingestWithTimeout,
|
||||
logItemStart,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_ITEMS_DEFAULT = 500
|
||||
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
|
||||
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
|
||||
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
||||
MAX_DEPTH_DEFAULT = 4
|
||||
|
||||
|
||||
@dataclass
|
||||
class KdriveBootstrapLimits:
|
||||
maxItems: int = MAX_ITEMS_DEFAULT
|
||||
maxBytes: int = MAX_BYTES_DEFAULT
|
||||
maxFileSize: int = MAX_FILE_SIZE_DEFAULT
|
||||
skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
|
||||
maxDepth: int = MAX_DEPTH_DEFAULT
|
||||
neutralize: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class KdriveBootstrapResult:
|
||||
connectionId: str
|
||||
indexed: int = 0
|
||||
skippedDuplicate: int = 0
|
||||
skippedPolicy: int = 0
|
||||
failed: int = 0
|
||||
bytesProcessed: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
||||
token = hashlib.sha256(f"{connectionId}:{externalItemId}".encode("utf-8")).hexdigest()[:16]
|
||||
return f"kd:{connectionId[:8]}:{token}"
|
||||
|
||||
|
||||
def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
|
||||
parts = getattr(extracted, "parts", None) or []
|
||||
out: List[Dict[str, Any]] = []
|
||||
for part in parts:
|
||||
data = getattr(part, "data", None) or ""
|
||||
if not data or not str(data).strip():
|
||||
continue
|
||||
typeGroup = getattr(part, "typeGroup", "text") or "text"
|
||||
contentType = "text"
|
||||
if typeGroup == "image":
|
||||
contentType = "image"
|
||||
elif typeGroup in ("binary", "container"):
|
||||
contentType = "other"
|
||||
out.append({
|
||||
"contentObjectId": getattr(part, "id", ""),
|
||||
"contentType": contentType,
|
||||
"data": data,
|
||||
"contextRef": {
|
||||
"containerPath": fileName,
|
||||
"location": getattr(part, "label", None) or "file",
|
||||
**(getattr(part, "metadata", None) or {}),
|
||||
},
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
async def bootstrapKdrive(
|
||||
connectionId: str,
|
||||
*,
|
||||
dataSources: Optional[List[Dict[str, Any]]] = None,
|
||||
progressCb: Optional[Any] = None,
|
||||
adapter: Any = None,
|
||||
connection: Any = None,
|
||||
knowledgeService: Any = None,
|
||||
limits: Optional[KdriveBootstrapLimits] = None,
|
||||
runExtractionFn: Optional[Callable[..., Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Enumerate kDrive folders and ingest files via the facade."""
|
||||
if not dataSources:
|
||||
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
|
||||
|
||||
if not limits:
|
||||
limits = KdriveBootstrapLimits()
|
||||
|
||||
startMs = time.time()
|
||||
result = KdriveBootstrapResult(connectionId=connectionId)
|
||||
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.started part=kdrive connectionId=%s dataSources=%d",
|
||||
connectionId, len(dataSources),
|
||||
extra={"event": "ingestion.connection.bootstrap.started", "part": "kdrive",
|
||||
"connectionId": connectionId, "dataSourceCount": len(dataSources)},
|
||||
)
|
||||
|
||||
if adapter is None or knowledgeService is None or connection is None:
|
||||
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
|
||||
if runExtractionFn is None:
|
||||
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
|
||||
ExtractorRegistry, ChunkerRegistry,
|
||||
)
|
||||
extractorRegistry = ExtractorRegistry()
|
||||
chunkerRegistry = ChunkerRegistry()
|
||||
|
||||
def runExtractionFn(bytesData, name, mime, options):
|
||||
return runExtraction(extractorRegistry, chunkerRegistry, bytesData, name, mime, options)
|
||||
|
||||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||
|
||||
cancelled = False
|
||||
for ds in dataSources:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||
break
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
cancelled = True
|
||||
break
|
||||
|
||||
dsPath = ds.get("path", "")
|
||||
dsId = ds.get("id", "")
|
||||
dsNeutralize = ds.get("neutralize", False)
|
||||
dsLimits = KdriveBootstrapLimits(
|
||||
maxItems=limits.maxItems,
|
||||
maxBytes=limits.maxBytes,
|
||||
maxFileSize=limits.maxFileSize,
|
||||
skipMimePrefixes=limits.skipMimePrefixes,
|
||||
maxDepth=limits.maxDepth,
|
||||
neutralize=dsNeutralize,
|
||||
)
|
||||
|
||||
try:
|
||||
await _walkFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
runExtractionFn=runExtractionFn,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
folderPath=dsPath,
|
||||
depth=0,
|
||||
limits=dsLimits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dsId,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("kdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
|
||||
result.errors.append(f"walk({dsPath}): {exc}")
|
||||
|
||||
finalResult = _finalizeResult(connectionId, result, startMs)
|
||||
if cancelled:
|
||||
finalResult["cancelled"] = True
|
||||
return finalResult
|
||||
|
||||
|
||||
async def _resolveDependencies(connectionId: str):
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.auth import TokenManager
|
||||
from modules.connectors.providerInfomaniak.connectorInfomaniak import InfomaniakConnector
|
||||
from modules.serviceCenter import getService
|
||||
from modules.serviceCenter.context import ServiceCenterContext
|
||||
from modules.security.rootAccess import getRootUser
|
||||
|
||||
rootInterface = getRootInterface()
|
||||
connection = rootInterface.getUserConnectionById(connectionId)
|
||||
if connection is None:
|
||||
raise ValueError(f"UserConnection not found: {connectionId}")
|
||||
|
||||
token = TokenManager().getFreshToken(connectionId)
|
||||
if not token or not token.tokenAccess:
|
||||
raise ValueError(f"No valid token for connection {connectionId}")
|
||||
|
||||
provider = InfomaniakConnector(connection, token.tokenAccess)
|
||||
adapter = provider.getServiceAdapter("kdrive")
|
||||
|
||||
rootUser = getRootUser()
|
||||
ctx = ServiceCenterContext(
|
||||
user=rootUser,
|
||||
mandate_id=str(getattr(connection, "mandateId", "") or ""),
|
||||
)
|
||||
knowledgeService = getService("knowledge", ctx)
|
||||
return adapter, connection, knowledgeService
|
||||
|
||||
|
||||
async def _walkFolder(
|
||||
*,
|
||||
adapter,
|
||||
knowledgeService,
|
||||
runExtractionFn,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
folderPath: str,
|
||||
depth: int,
|
||||
limits: KdriveBootstrapLimits,
|
||||
result: KdriveBootstrapResult,
|
||||
progressCb: Optional[Any],
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
if depth > limits.maxDepth:
|
||||
return
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
return
|
||||
try:
|
||||
entries = await adapter.browse(folderPath)
|
||||
except Exception as exc:
|
||||
logger.warning("kdrive browse %s failed: %s", folderPath, exc)
|
||||
result.errors.append(f"browse({folderPath}): {exc}")
|
||||
return
|
||||
|
||||
for entry in entries:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||
return
|
||||
if result.bytesProcessed >= limits.maxBytes:
|
||||
return
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||
return
|
||||
|
||||
entryPath = getattr(entry, "path", "") or ""
|
||||
if getattr(entry, "isFolder", False):
|
||||
await _walkFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
runExtractionFn=runExtractionFn,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
folderPath=entryPath,
|
||||
depth=depth + 1,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dataSourceId,
|
||||
)
|
||||
continue
|
||||
|
||||
mimeType = getattr(entry, "mimeType", None) or "application/octet-stream"
|
||||
if any(mimeType.startswith(prefix) for prefix in limits.skipMimePrefixes):
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
size = int(getattr(entry, "size", 0) or 0)
|
||||
if size and size > limits.maxFileSize:
|
||||
result.skippedPolicy += 1
|
||||
continue
|
||||
|
||||
metadata = getattr(entry, "metadata", {}) or {}
|
||||
externalItemId = metadata.get("id") or entryPath
|
||||
revision = metadata.get("revision") or metadata.get("lastModified")
|
||||
|
||||
await _ingestOne(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
runExtractionFn=runExtractionFn,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
entry=entry,
|
||||
entryPath=entryPath,
|
||||
mimeType=mimeType,
|
||||
externalItemId=externalItemId,
|
||||
revision=revision,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dataSourceId,
|
||||
)
|
||||
|
||||
|
||||
async def _ingestOne(
|
||||
*,
|
||||
adapter,
|
||||
knowledgeService,
|
||||
runExtractionFn,
|
||||
connectionId: str,
|
||||
mandateId: str,
|
||||
userId: str,
|
||||
entry,
|
||||
entryPath: str,
|
||||
mimeType: str,
|
||||
externalItemId: str,
|
||||
revision: Optional[str],
|
||||
limits: KdriveBootstrapLimits,
|
||||
result: KdriveBootstrapResult,
|
||||
progressCb: Optional[Any],
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
syntheticFileId = _syntheticFileId(connectionId, externalItemId)
|
||||
fileName = getattr(entry, "name", "") or externalItemId
|
||||
declaredSize = int(getattr(entry, "size", 0) or 0) or None
|
||||
logItemStart("kdrive", entryPath, sizeBytes=declaredSize, mime=mimeType)
|
||||
|
||||
try:
|
||||
downloadResult = await downloadWithTimeout(adapter.download(entryPath), label=entryPath)
|
||||
fileBytes = getattr(downloadResult, "data", None)
|
||||
dlFileName = getattr(downloadResult, "fileName", None)
|
||||
dlMimeType = getattr(downloadResult, "mimeType", None)
|
||||
if dlFileName:
|
||||
fileName = dlFileName
|
||||
if dlMimeType:
|
||||
mimeType = dlMimeType
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.warning("kdrive download %s failed: %s", entryPath, exc)
|
||||
result.failed += 1
|
||||
result.errors.append(f"download({entryPath}): {exc}")
|
||||
return
|
||||
if not fileBytes:
|
||||
result.failed += 1
|
||||
return
|
||||
|
||||
result.bytesProcessed += len(fileBytes)
|
||||
|
||||
try:
|
||||
extracted = await extractWithTimeout(
|
||||
runExtractionFn,
|
||||
fileBytes, fileName, mimeType,
|
||||
ExtractionOptions(mergeStrategy=None),
|
||||
label=entryPath,
|
||||
)
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.warning("kdrive extraction %s failed: %s", entryPath, exc)
|
||||
result.failed += 1
|
||||
result.errors.append(f"extract({entryPath}): {exc}")
|
||||
return
|
||||
|
||||
contentObjects = _toContentObjects(extracted, fileName)
|
||||
if not contentObjects:
|
||||
result.skippedPolicy += 1
|
||||
return
|
||||
|
||||
provenance: Dict[str, Any] = {
|
||||
"connectionId": connectionId,
|
||||
"dataSourceId": dataSourceId,
|
||||
"authority": "infomaniak",
|
||||
"service": "kdrive",
|
||||
"externalItemId": externalItemId,
|
||||
"externalPath": entryPath,
|
||||
"revision": revision,
|
||||
}
|
||||
try:
|
||||
handle = await ingestWithTimeout(
|
||||
knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="kdrive_item",
|
||||
sourceId=syntheticFileId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=revision,
|
||||
neutralize=limits.neutralize,
|
||||
provenance=provenance,
|
||||
)
|
||||
),
|
||||
label=entryPath,
|
||||
)
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.error("kdrive ingestion %s failed: %s", entryPath, exc, exc_info=True)
|
||||
result.failed += 1
|
||||
result.errors.append(f"ingest({entryPath}): {exc}")
|
||||
return
|
||||
|
||||
if handle.status == "duplicate":
|
||||
result.skippedDuplicate += 1
|
||||
elif handle.status == "indexed":
|
||||
result.indexed += 1
|
||||
else:
|
||||
result.failed += 1
|
||||
if handle.error:
|
||||
result.errors.append(f"ingest({entryPath}): {handle.error}")
|
||||
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
if progressCb is not None and processed % 5 == 0:
|
||||
try:
|
||||
progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
def _finalizeResult(connectionId: str, result: KdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||
durationMs = int((time.time() - startMs) * 1000)
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.done part=kdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d",
|
||||
connectionId,
|
||||
result.indexed, result.skippedDuplicate, result.skippedPolicy, result.failed,
|
||||
durationMs,
|
||||
extra={"event": "ingestion.connection.bootstrap.done", "part": "kdrive",
|
||||
"connectionId": connectionId, "indexed": result.indexed,
|
||||
"skippedDup": result.skippedDuplicate, "skippedPolicy": result.skippedPolicy,
|
||||
"failed": result.failed, "durationMs": durationMs},
|
||||
)
|
||||
return {
|
||||
"connectionId": result.connectionId,
|
||||
"indexed": result.indexed,
|
||||
"skippedDuplicate": result.skippedDuplicate,
|
||||
"skippedPolicy": result.skippedPolicy,
|
||||
"failed": result.failed,
|
||||
"bytesProcessed": result.bytesProcessed,
|
||||
"durationMs": durationMs,
|
||||
"errors": result.errors[:20],
|
||||
}
|
||||
|
|
@ -18,9 +18,15 @@ import hashlib
|
|||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
|
||||
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
||||
WalkerTimeout,
|
||||
extractWithTimeout,
|
||||
ingestWithTimeout,
|
||||
logItemStart,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -139,34 +145,35 @@ def _buildContentObjects(
|
|||
async def bootstrapOutlook(
|
||||
connectionId: str,
|
||||
*,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||
dataSources: Optional[List[Dict[str, Any]]] = None,
|
||||
progressCb: Optional[Any] = None,
|
||||
adapter: Any = None,
|
||||
connection: Any = None,
|
||||
knowledgeService: Any = None,
|
||||
limits: Optional[OutlookBootstrapLimits] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Enumerate Outlook folders (inbox + sent by default) and ingest messages."""
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||
prefs = loadConnectionPrefs(connectionId)
|
||||
"""Enumerate Outlook folders (inbox + sent by default) and ingest messages.
|
||||
|
||||
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
|
||||
Each DataSource defines the neutralize policy for its messages.
|
||||
"""
|
||||
if not dataSources:
|
||||
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
|
||||
|
||||
if not limits:
|
||||
limits = OutlookBootstrapLimits(
|
||||
includeAttachments=prefs.mailIndexAttachments,
|
||||
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
||||
mailContentDepth=prefs.mailContentDepth,
|
||||
neutralize=prefs.neutralizeBeforeEmbed,
|
||||
)
|
||||
limits = OutlookBootstrapLimits()
|
||||
|
||||
startMs = time.time()
|
||||
result = OutlookBootstrapResult(connectionId=connectionId)
|
||||
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.started part=outlook connectionId=%s",
|
||||
connectionId,
|
||||
"ingestion.connection.bootstrap.started part=outlook connectionId=%s dataSources=%d",
|
||||
connectionId, len(dataSources),
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.started",
|
||||
"part": "outlook",
|
||||
"connectionId": connectionId,
|
||||
"dataSourceCount": len(dataSources),
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -176,27 +183,52 @@ async def bootstrapOutlook(
|
|||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||
|
||||
folderIds = await _selectFolderIds(adapter, limits)
|
||||
for folderId in folderIds:
|
||||
cancelled = False
|
||||
for ds in dataSources:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||
break
|
||||
try:
|
||||
await _ingestFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
folderId=folderId,
|
||||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
|
||||
result.errors.append(f"folder({folderId}): {exc}")
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
cancelled = True
|
||||
break
|
||||
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
dsId = ds.get("id", "")
|
||||
dsNeutralize = ds.get("neutralize", False)
|
||||
dsLimits = OutlookBootstrapLimits(
|
||||
maxMessages=limits.maxMessages,
|
||||
maxFolders=limits.maxFolders,
|
||||
maxBodyChars=limits.maxBodyChars,
|
||||
includeAttachments=limits.includeAttachments,
|
||||
maxAttachmentBytes=limits.maxAttachmentBytes,
|
||||
maxAgeDays=limits.maxAgeDays,
|
||||
mailContentDepth=limits.mailContentDepth,
|
||||
neutralize=dsNeutralize,
|
||||
)
|
||||
|
||||
folderIds = await _selectFolderIds(adapter, dsLimits)
|
||||
for folderId in folderIds:
|
||||
if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages:
|
||||
break
|
||||
try:
|
||||
await _ingestFolder(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
folderId=folderId,
|
||||
limits=dsLimits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dsId,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
|
||||
result.errors.append(f"folder({folderId}): {exc}")
|
||||
|
||||
finalResult = _finalizeResult(connectionId, result, startMs)
|
||||
if cancelled:
|
||||
finalResult["cancelled"] = True
|
||||
return finalResult
|
||||
|
||||
|
||||
async def _resolveDependencies(connectionId: str):
|
||||
|
|
@ -266,8 +298,12 @@ async def _ingestFolder(
|
|||
folderId: str,
|
||||
limits: OutlookBootstrapLimits,
|
||||
result: OutlookBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
progressCb: Optional[Any],
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
return
|
||||
|
||||
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
|
||||
if remaining <= 0:
|
||||
return
|
||||
|
|
@ -307,6 +343,8 @@ async def _ingestFolder(
|
|||
for message in page.get("value", []) or []:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||
break
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||
return
|
||||
await _ingestMessage(
|
||||
adapter=adapter,
|
||||
knowledgeService=knowledgeService,
|
||||
|
|
@ -317,6 +355,7 @@ async def _ingestFolder(
|
|||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dataSourceId,
|
||||
)
|
||||
|
||||
nextLink = page.get("@odata.nextLink")
|
||||
|
|
@ -338,7 +377,8 @@ async def _ingestMessage(
|
|||
message: Dict[str, Any],
|
||||
limits: OutlookBootstrapLimits,
|
||||
result: OutlookBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
progressCb: Optional[Any],
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
|
|
@ -350,33 +390,42 @@ async def _ingestMessage(
|
|||
subject = message.get("subject") or "(no subject)"
|
||||
syntheticId = _syntheticMessageId(connectionId, messageId)
|
||||
fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml"
|
||||
logItemStart("outlook", messageId, mime="message/rfc822")
|
||||
|
||||
contentObjects = _buildContentObjects(
|
||||
message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth
|
||||
)
|
||||
# Always at least the header is emitted, so `contentObjects` is non-empty.
|
||||
try:
|
||||
handle = await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="outlook_message",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType="message/rfc822",
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=revision,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"authority": "msft",
|
||||
"service": "outlook",
|
||||
"externalItemId": messageId,
|
||||
"internetMessageId": message.get("internetMessageId"),
|
||||
"tier": limits.mailContentDepth,
|
||||
},
|
||||
)
|
||||
handle = await ingestWithTimeout(
|
||||
knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="outlook_message",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType="message/rfc822",
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=revision,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"dataSourceId": dataSourceId,
|
||||
"authority": "msft",
|
||||
"service": "outlook",
|
||||
"externalItemId": messageId,
|
||||
"internetMessageId": message.get("internetMessageId"),
|
||||
"tier": limits.mailContentDepth,
|
||||
},
|
||||
)
|
||||
),
|
||||
label=messageId,
|
||||
)
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.error("outlook ingestion %s failed: %s", messageId, exc, exc_info=True)
|
||||
result.failed += 1
|
||||
|
|
@ -402,23 +451,22 @@ async def _ingestMessage(
|
|||
parentSyntheticId=syntheticId,
|
||||
limits=limits,
|
||||
result=result,
|
||||
dataSourceId=dataSourceId,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("outlook attachments %s failed: %s", messageId, exc)
|
||||
result.errors.append(f"attachments({messageId}): {exc}")
|
||||
|
||||
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
if progressCb is not None and processed % 5 == 0:
|
||||
try:
|
||||
progressCb(
|
||||
min(90, 10 + int(80 * processed / max(1, limits.maxMessages))),
|
||||
f"outlook processed={processed}",
|
||||
)
|
||||
progressCb(0, f"{processed} Mails verarbeitet, {result.indexed} indexiert")
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.progress part=outlook processed=%d skippedDup=%d failed=%d",
|
||||
processed, result.skippedDuplicate, result.failed,
|
||||
if processed % 50 == 0:
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.progress part=outlook processed=%d skippedDup=%d failed=%d",
|
||||
processed, result.skippedDuplicate, result.failed,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.progress",
|
||||
"part": "outlook",
|
||||
|
|
@ -443,6 +491,7 @@ async def _ingestAttachments(
|
|||
parentSyntheticId: str,
|
||||
limits: OutlookBootstrapLimits,
|
||||
result: OutlookBootstrapResult,
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
"""Child ingestion jobs for file attachments (skip inline & oversized)."""
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
|
@ -481,13 +530,22 @@ async def _ingestAttachments(
|
|||
mimeType = attachment.get("contentType") or "application/octet-stream"
|
||||
attachmentId = attachment.get("id") or fileName
|
||||
syntheticId = _syntheticAttachmentId(connectionId, messageId, attachmentId)
|
||||
attLabel = f"{messageId}/att:{attachmentId}/{fileName}"
|
||||
logItemStart("outlook-attachment", attLabel, sizeBytes=size or None, mime=mimeType)
|
||||
|
||||
try:
|
||||
extracted = runExtraction(
|
||||
def _runAttExtraction():
|
||||
return runExtraction(
|
||||
extractorRegistry, chunkerRegistry,
|
||||
rawBytes, fileName, mimeType,
|
||||
ExtractionOptions(mergeStrategy=None),
|
||||
)
|
||||
|
||||
try:
|
||||
extracted = await extractWithTimeout(_runAttExtraction, label=attLabel)
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
continue
|
||||
except Exception as exc:
|
||||
logger.warning("outlook attachment extract %s failed: %s", attachmentId, exc)
|
||||
result.failed += 1
|
||||
|
|
@ -519,27 +577,34 @@ async def _ingestAttachments(
|
|||
continue
|
||||
|
||||
try:
|
||||
await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="outlook_attachment",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"authority": "msft",
|
||||
"service": "outlook",
|
||||
"parentId": parentSyntheticId,
|
||||
"externalItemId": attachmentId,
|
||||
"parentMessageId": messageId,
|
||||
},
|
||||
)
|
||||
await ingestWithTimeout(
|
||||
knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="outlook_attachment",
|
||||
sourceId=syntheticId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
neutralize=limits.neutralize,
|
||||
provenance={
|
||||
"connectionId": connectionId,
|
||||
"dataSourceId": dataSourceId,
|
||||
"authority": "msft",
|
||||
"service": "outlook",
|
||||
"parentId": parentSyntheticId,
|
||||
"externalItemId": attachmentId,
|
||||
"parentMessageId": messageId,
|
||||
},
|
||||
)
|
||||
),
|
||||
label=attLabel,
|
||||
)
|
||||
result.attachmentsIndexed += 1
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
except Exception as exc:
|
||||
logger.warning("outlook attachment ingest %s failed: %s", attachmentId, exc)
|
||||
result.failed += 1
|
||||
|
|
|
|||
|
|
@ -20,6 +20,13 @@ from dataclasses import dataclass, field
|
|||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||
from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import (
|
||||
WalkerTimeout,
|
||||
downloadWithTimeout,
|
||||
extractWithTimeout,
|
||||
ingestWithTimeout,
|
||||
logItemStart,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -94,35 +101,36 @@ def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
|
|||
async def bootstrapSharepoint(
|
||||
connectionId: str,
|
||||
*,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||
dataSources: Optional[List[Dict[str, Any]]] = None,
|
||||
progressCb: Optional[Any] = None,
|
||||
adapter: Any = None,
|
||||
connection: Any = None,
|
||||
knowledgeService: Any = None,
|
||||
limits: Optional[SharepointBootstrapLimits] = None,
|
||||
runExtractionFn: Optional[Callable[..., Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Enumerate SharePoint drives and ingest every reachable file via the façade.
|
||||
"""Enumerate SharePoint drives and ingest files via the facade.
|
||||
|
||||
Parameters allow injection for tests; production callers pass only
|
||||
`connectionId` (and optionally a progressCb) and everything else is
|
||||
resolved against the registered services.
|
||||
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
|
||||
Each DataSource defines the root path + neutralize policy for its subtree.
|
||||
"""
|
||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||
prefs = loadConnectionPrefs(connectionId)
|
||||
if not dataSources:
|
||||
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
|
||||
|
||||
if not limits:
|
||||
limits = SharepointBootstrapLimits(neutralize=prefs.neutralizeBeforeEmbed)
|
||||
limits = SharepointBootstrapLimits()
|
||||
|
||||
startMs = time.time()
|
||||
result = SharepointBootstrapResult(connectionId=connectionId)
|
||||
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.started part=sharepoint connectionId=%s",
|
||||
connectionId,
|
||||
"ingestion.connection.bootstrap.started part=sharepoint connectionId=%s dataSources=%d",
|
||||
connectionId, len(dataSources),
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.started",
|
||||
"part": "sharepoint",
|
||||
"connectionId": connectionId,
|
||||
"dataSourceCount": len(dataSources),
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -142,17 +150,27 @@ async def bootstrapSharepoint(
|
|||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||
|
||||
try:
|
||||
sites = await adapter.browse("/", limit=limits.maxSites)
|
||||
except Exception as exc:
|
||||
logger.error("sharepoint site discovery failed for %s: %s", connectionId, exc, exc_info=True)
|
||||
result.errors.append(f"site_discovery: {exc}")
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
|
||||
for site in sites[: limits.maxSites]:
|
||||
cancelled = False
|
||||
for ds in dataSources:
|
||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||
break
|
||||
sitePath = getattr(site, "path", "") or ""
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
cancelled = True
|
||||
break
|
||||
|
||||
dsPath = ds.get("path", "")
|
||||
dsId = ds.get("id", "")
|
||||
dsNeutralize = ds.get("neutralize", False)
|
||||
dsLimits = SharepointBootstrapLimits(
|
||||
maxItems=limits.maxItems,
|
||||
maxBytes=limits.maxBytes,
|
||||
maxFileSize=limits.maxFileSize,
|
||||
skipMimePrefixes=limits.skipMimePrefixes,
|
||||
maxDepth=limits.maxDepth,
|
||||
maxSites=limits.maxSites,
|
||||
neutralize=dsNeutralize,
|
||||
)
|
||||
|
||||
try:
|
||||
await _walkFolder(
|
||||
adapter=adapter,
|
||||
|
|
@ -161,17 +179,21 @@ async def bootstrapSharepoint(
|
|||
connectionId=connectionId,
|
||||
mandateId=mandateId,
|
||||
userId=userId,
|
||||
folderPath=sitePath,
|
||||
folderPath=dsPath,
|
||||
depth=0,
|
||||
limits=limits,
|
||||
limits=dsLimits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dsId,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("sharepoint walk failed for site %s: %s", sitePath, exc, exc_info=True)
|
||||
result.errors.append(f"walk({sitePath}): {exc}")
|
||||
logger.error("sharepoint walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
|
||||
result.errors.append(f"walk({dsPath}): {exc}")
|
||||
|
||||
return _finalizeResult(connectionId, result, startMs)
|
||||
finalResult = _finalizeResult(connectionId, result, startMs)
|
||||
if cancelled:
|
||||
finalResult["cancelled"] = True
|
||||
return finalResult
|
||||
|
||||
|
||||
async def _resolveDependencies(connectionId: str):
|
||||
|
|
@ -221,10 +243,13 @@ async def _walkFolder(
|
|||
depth: int,
|
||||
limits: SharepointBootstrapLimits,
|
||||
result: SharepointBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
progressCb: Optional[Any],
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
if depth > limits.maxDepth:
|
||||
return
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||
return
|
||||
try:
|
||||
entries = await adapter.browse(folderPath)
|
||||
except Exception as exc:
|
||||
|
|
@ -237,6 +262,8 @@ async def _walkFolder(
|
|||
return
|
||||
if result.bytesProcessed >= limits.maxBytes:
|
||||
return
|
||||
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||
return
|
||||
|
||||
entryPath = getattr(entry, "path", "") or ""
|
||||
if getattr(entry, "isFolder", False):
|
||||
|
|
@ -252,6 +279,7 @@ async def _walkFolder(
|
|||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dataSourceId,
|
||||
)
|
||||
continue
|
||||
|
||||
|
|
@ -283,6 +311,7 @@ async def _walkFolder(
|
|||
limits=limits,
|
||||
result=result,
|
||||
progressCb=progressCb,
|
||||
dataSourceId=dataSourceId,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -301,15 +330,22 @@ async def _ingestOne(
|
|||
revision: Optional[str],
|
||||
limits: SharepointBootstrapLimits,
|
||||
result: SharepointBootstrapResult,
|
||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||
progressCb: Optional[Any],
|
||||
dataSourceId: str = "",
|
||||
) -> None:
|
||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||
|
||||
syntheticFileId = _syntheticFileId(connectionId, externalItemId)
|
||||
fileName = getattr(entry, "name", "") or externalItemId
|
||||
declaredSize = int(getattr(entry, "size", 0) or 0) or None
|
||||
logItemStart("sharepoint", entryPath, sizeBytes=declaredSize, mime=mimeType)
|
||||
|
||||
try:
|
||||
fileBytes = await adapter.download(entryPath)
|
||||
fileBytes = await downloadWithTimeout(adapter.download(entryPath), label=entryPath)
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.warning("sharepoint download %s failed: %s", entryPath, exc)
|
||||
result.failed += 1
|
||||
|
|
@ -322,10 +358,16 @@ async def _ingestOne(
|
|||
result.bytesProcessed += len(fileBytes)
|
||||
|
||||
try:
|
||||
extracted = runExtractionFn(
|
||||
extracted = await extractWithTimeout(
|
||||
runExtractionFn,
|
||||
fileBytes, fileName, mimeType,
|
||||
ExtractionOptions(mergeStrategy=None),
|
||||
label=entryPath,
|
||||
)
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.warning("sharepoint extraction %s failed: %s", entryPath, exc)
|
||||
result.failed += 1
|
||||
|
|
@ -339,6 +381,7 @@ async def _ingestOne(
|
|||
|
||||
provenance: Dict[str, Any] = {
|
||||
"connectionId": connectionId,
|
||||
"dataSourceId": dataSourceId,
|
||||
"authority": "msft",
|
||||
"service": "sharepoint",
|
||||
"externalItemId": externalItemId,
|
||||
|
|
@ -346,20 +389,27 @@ async def _ingestOne(
|
|||
"revision": revision,
|
||||
}
|
||||
try:
|
||||
handle = await knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="sharepoint_item",
|
||||
sourceId=syntheticFileId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=revision,
|
||||
neutralize=limits.neutralize,
|
||||
provenance=provenance,
|
||||
)
|
||||
handle = await ingestWithTimeout(
|
||||
knowledgeService.requestIngestion(
|
||||
IngestionJob(
|
||||
sourceKind="sharepoint_item",
|
||||
sourceId=syntheticFileId,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
contentObjects=contentObjects,
|
||||
contentVersion=revision,
|
||||
neutralize=limits.neutralize,
|
||||
provenance=provenance,
|
||||
)
|
||||
),
|
||||
label=entryPath,
|
||||
)
|
||||
except WalkerTimeout as exc:
|
||||
result.failed += 1
|
||||
result.errors.append(str(exc))
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.error("sharepoint ingestion %s failed: %s", entryPath, exc, exc_info=True)
|
||||
result.failed += 1
|
||||
|
|
@ -375,27 +425,17 @@ async def _ingestOne(
|
|||
if handle.error:
|
||||
result.errors.append(f"ingest({entryPath}): {handle.error}")
|
||||
|
||||
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
processed = result.indexed + result.skippedDuplicate
|
||||
if progressCb is not None and processed % 5 == 0:
|
||||
try:
|
||||
progressCb(
|
||||
min(90, 10 + int(80 * processed / max(1, limits.maxItems))),
|
||||
f"sharepoint processed={processed}",
|
||||
)
|
||||
progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert")
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.progress part=sharepoint processed=%d skippedDup=%d failed=%d",
|
||||
processed, result.skippedDuplicate, result.failed,
|
||||
extra={
|
||||
"event": "ingestion.connection.bootstrap.progress",
|
||||
"part": "sharepoint",
|
||||
"connectionId": connectionId,
|
||||
"processed": processed,
|
||||
"skippedDup": result.skippedDuplicate,
|
||||
"failed": result.failed,
|
||||
},
|
||||
)
|
||||
if processed % 50 == 0:
|
||||
logger.info(
|
||||
"ingestion.connection.bootstrap.progress part=sharepoint processed=%d indexed=%d failed=%d",
|
||||
processed, result.indexed, result.failed,
|
||||
)
|
||||
|
||||
# Yield so the event loop can interleave other tasks (download/extract are
|
||||
# CPU-ish and extraction uses sync libs; cooperative scheduling prevents
|
||||
|
|
|
|||
|
|
@ -0,0 +1,78 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Resolve effective policies (neutralize, ragIndexEnabled) for DataSource tree hierarchies.
|
||||
|
||||
Tree-inheritance rule: nearest ancestor DataSource with an explicit value wins.
|
||||
If no ancestor has a value, the default (False) is used.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def resolveEffectiveNeutralize(
|
||||
ds: Dict[str, Any],
|
||||
allDataSources: List[Dict[str, Any]],
|
||||
) -> bool:
|
||||
"""Compute effective neutralize by walking up the path tree.
|
||||
|
||||
A DataSource at /sites/HR/Documents inherits from /sites/HR if
|
||||
that ancestor has neutralize=True and the child has no explicit override.
|
||||
"""
|
||||
ownValue = ds.get("neutralize")
|
||||
if ownValue is not None and ownValue is not False:
|
||||
return True
|
||||
if ownValue is False:
|
||||
return False
|
||||
return _findAncestorPolicy(ds, allDataSources, "neutralize")
|
||||
|
||||
|
||||
def resolveEffectiveRagIndexEnabled(
|
||||
ds: Dict[str, Any],
|
||||
allDataSources: List[Dict[str, Any]],
|
||||
) -> bool:
|
||||
"""Compute effective ragIndexEnabled by walking up the path tree."""
|
||||
ownValue = ds.get("ragIndexEnabled")
|
||||
if ownValue is True:
|
||||
return True
|
||||
if ownValue is False:
|
||||
return False
|
||||
return _findAncestorPolicy(ds, allDataSources, "ragIndexEnabled")
|
||||
|
||||
|
||||
def _findAncestorPolicy(
|
||||
ds: Dict[str, Any],
|
||||
allDataSources: List[Dict[str, Any]],
|
||||
field: str,
|
||||
) -> bool:
|
||||
"""Walk ancestors (longest-prefix match) to find an inherited policy value."""
|
||||
dsPath = ds.get("path", "")
|
||||
connectionId = ds.get("connectionId", "")
|
||||
if not dsPath:
|
||||
return False
|
||||
|
||||
ancestors = []
|
||||
for candidate in allDataSources:
|
||||
if candidate.get("id") == ds.get("id"):
|
||||
continue
|
||||
if candidate.get("connectionId") != connectionId:
|
||||
continue
|
||||
candidatePath = candidate.get("path", "")
|
||||
if not candidatePath:
|
||||
continue
|
||||
if dsPath.startswith(candidatePath) and len(candidatePath) < len(dsPath):
|
||||
ancestors.append(candidate)
|
||||
|
||||
ancestors.sort(key=lambda a: len(a.get("path", "")), reverse=True)
|
||||
|
||||
for ancestor in ancestors:
|
||||
val = ancestor.get(field)
|
||||
if val is True:
|
||||
return True
|
||||
if val is False:
|
||||
return False
|
||||
return False
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Shared helpers for ingestion walkers (timeouts, per-item logging).
|
||||
|
||||
Walkers (sharepoint, gdrive, gmail, outlook, clickup, kdrive) all face the
|
||||
same risks:
|
||||
|
||||
- A single `adapter.download()` call can hang on the network for hours.
|
||||
- A single `runExtraction()` call can hang on a corrupt PDF/Office doc inside
|
||||
a sync extractor library, blocking the asyncio loop.
|
||||
- A single `requestIngestion()` call can stall on the embedding API.
|
||||
|
||||
Without timeouts, one bad item freezes the whole bootstrap job and we end
|
||||
up with "Job stuck at 10% for 10h" zombies.
|
||||
|
||||
These helpers wrap each phase in `asyncio.wait_for`. Sync extraction runs
|
||||
on a worker thread so the loop stays responsive. Every wrapped call also
|
||||
emits a short start/done log line, so when something hangs we know the
|
||||
exact item that caused it (path, size, mime).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Any, Awaitable, Callable, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DOWNLOAD_TIMEOUT_S = 60
|
||||
EXTRACTION_TIMEOUT_S = 90
|
||||
INGEST_TIMEOUT_S = 60
|
||||
|
||||
|
||||
class WalkerTimeout(Exception):
|
||||
"""Raised when a walker phase exceeds its timeout budget."""
|
||||
|
||||
|
||||
async def downloadWithTimeout(
|
||||
awaitable: Awaitable[Any],
|
||||
*,
|
||||
label: str,
|
||||
timeoutSeconds: int = DOWNLOAD_TIMEOUT_S,
|
||||
) -> Any:
|
||||
"""Run a download awaitable with a hard timeout.
|
||||
|
||||
`label` is a short human-readable identifier (typically the external path)
|
||||
used in log messages so we can pinpoint the offending item in case of a
|
||||
hang or timeout.
|
||||
"""
|
||||
logger.info("walker.download.start %s timeout=%ds", label, timeoutSeconds)
|
||||
try:
|
||||
result = await asyncio.wait_for(awaitable, timeout=timeoutSeconds)
|
||||
logger.debug("walker.download.done %s", label)
|
||||
return result
|
||||
except asyncio.TimeoutError as ex:
|
||||
logger.warning("walker.download.timeout %s after %ds", label, timeoutSeconds)
|
||||
raise WalkerTimeout(f"download timeout after {timeoutSeconds}s: {label}") from ex
|
||||
|
||||
|
||||
async def extractWithTimeout(
|
||||
syncFn: Callable[..., Any],
|
||||
*args: Any,
|
||||
label: str,
|
||||
timeoutSeconds: int = EXTRACTION_TIMEOUT_S,
|
||||
) -> Any:
|
||||
"""Run a synchronous extraction function on a worker thread with timeout.
|
||||
|
||||
Sync extractors (PDF, OCR, MS Office) cannot be cancelled cleanly from
|
||||
asyncio; `wait_for` only protects the awaiter. The underlying thread may
|
||||
keep running until the process exits — but at least the walker proceeds
|
||||
to the next item instead of freezing forever.
|
||||
"""
|
||||
logger.info("walker.extract.start %s timeout=%ds", label, timeoutSeconds)
|
||||
try:
|
||||
result = await asyncio.wait_for(
|
||||
asyncio.to_thread(syncFn, *args),
|
||||
timeout=timeoutSeconds,
|
||||
)
|
||||
logger.debug("walker.extract.done %s", label)
|
||||
return result
|
||||
except asyncio.TimeoutError as ex:
|
||||
logger.warning("walker.extract.timeout %s after %ds", label, timeoutSeconds)
|
||||
raise WalkerTimeout(f"extract timeout after {timeoutSeconds}s: {label}") from ex
|
||||
|
||||
|
||||
async def ingestWithTimeout(
|
||||
awaitable: Awaitable[Any],
|
||||
*,
|
||||
label: str,
|
||||
timeoutSeconds: int = INGEST_TIMEOUT_S,
|
||||
) -> Any:
|
||||
"""Run an ingestion request with a hard timeout."""
|
||||
logger.debug("walker.ingest.start %s timeout=%ds", label, timeoutSeconds)
|
||||
try:
|
||||
result = await asyncio.wait_for(awaitable, timeout=timeoutSeconds)
|
||||
logger.debug("walker.ingest.done %s", label)
|
||||
return result
|
||||
except asyncio.TimeoutError as ex:
|
||||
logger.warning("walker.ingest.timeout %s after %ds", label, timeoutSeconds)
|
||||
raise WalkerTimeout(f"ingest timeout after {timeoutSeconds}s: {label}") from ex
|
||||
|
||||
|
||||
def logItemStart(service: str, label: str, *, sizeBytes: Optional[int] = None, mime: Optional[str] = None) -> None:
|
||||
"""Log that processing of one item is about to begin.
|
||||
|
||||
When the worker hangs, the LAST `walker.item.start` line in the log
|
||||
points to the exact item that caused the freeze. This is the single
|
||||
most valuable diagnostic for stuck-job triage.
|
||||
"""
|
||||
parts = [f"walker.item.start service={service} path={label}"]
|
||||
if sizeBytes is not None:
|
||||
parts.append(f"size={sizeBytes}")
|
||||
if mime:
|
||||
parts.append(f"mime={mime}")
|
||||
logger.info(" ".join(parts))
|
||||
|
|
@ -98,7 +98,8 @@ class WebService:
|
|||
searchUrls = []
|
||||
searchResultsWithContent = []
|
||||
if needsSearch and (not allUrls or len(allUrls) < maxNumberPages):
|
||||
self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content")
|
||||
if operationId:
|
||||
self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content")
|
||||
|
||||
try:
|
||||
searchUrls, searchResultsWithContent = await self._performWebSearch(
|
||||
|
|
@ -113,16 +114,14 @@ class WebService:
|
|||
searchUrls = []
|
||||
searchResultsWithContent = []
|
||||
|
||||
# Prioritize Tavily search URLs over AI-extracted URLs (they're more relevant)
|
||||
if searchUrls:
|
||||
# Prepend Tavily URLs to the list (they're more relevant)
|
||||
allUrls = searchUrls + allUrls
|
||||
logger.info(f"Using {len(searchUrls)} Tavily URLs + {len(allUrls) - len(searchUrls)} other URLs = {len(allUrls)} total")
|
||||
else:
|
||||
# If Tavily search failed, use AI-extracted URLs
|
||||
logger.warning("Tavily search returned no URLs, using AI-extracted URLs only")
|
||||
|
||||
self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
|
||||
if operationId:
|
||||
self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
|
||||
|
||||
# If we have search results (even without content), use them directly instead of crawling
|
||||
# Tavily search results are more relevant than generic AI-extracted URLs
|
||||
|
|
|
|||
|
|
@ -85,6 +85,11 @@ class AiAuditLogger:
|
|||
try:
|
||||
from modules.datamodels.datamodelAiAudit import AiAuditLogEntry
|
||||
|
||||
if contentInput:
|
||||
contentInput = contentInput.replace("\x00", "")
|
||||
if contentOutput:
|
||||
contentOutput = contentOutput.replace("\x00", "")
|
||||
|
||||
inputPreview = (contentInput or "")[:_PREVIEW_LENGTH] or None
|
||||
outputPreview = (contentOutput or "")[:_PREVIEW_LENGTH] or None
|
||||
inputHash = hashlib.sha256(contentInput.encode("utf-8")).hexdigest() if contentInput else None
|
||||
|
|
|
|||
|
|
@ -144,6 +144,14 @@ NAVIGATION_SECTIONS = [
|
|||
"path": "/automations",
|
||||
"order": 30,
|
||||
},
|
||||
{
|
||||
"id": "rag-inventory",
|
||||
"objectKey": "ui.system.ragInventory",
|
||||
"label": t("RAG-Inventar"),
|
||||
"icon": "FaDatabase",
|
||||
"path": "/rag-inventory",
|
||||
"order": 35,
|
||||
},
|
||||
{
|
||||
"id": "store",
|
||||
"objectKey": "ui.system.store",
|
||||
|
|
@ -322,6 +330,16 @@ NAVIGATION_SECTIONS = [
|
|||
"adminOnly": True,
|
||||
"sysAdminOnly": True,
|
||||
},
|
||||
{
|
||||
"id": "admin-stt-benchmark",
|
||||
"objectKey": "ui.admin.sttBenchmark",
|
||||
"label": t("STT Benchmark"),
|
||||
"icon": "FaMicrophone",
|
||||
"path": "/admin/stt-benchmark",
|
||||
"order": 92,
|
||||
"adminOnly": True,
|
||||
"sysAdminOnly": True,
|
||||
},
|
||||
{
|
||||
"id": "admin-languages",
|
||||
"objectKey": "ui.admin.languages",
|
||||
|
|
|
|||
|
|
@ -47,7 +47,9 @@ backports-tarfile==1.2.0
|
|||
bcrypt==4.0.1
|
||||
# via -r requirements.txt
|
||||
beautifulsoup4==4.12.2
|
||||
# via -r requirements.txt
|
||||
# via
|
||||
# -r requirements.txt
|
||||
# extract-msg
|
||||
bleach==6.3.0
|
||||
# via -r requirements.txt
|
||||
bokeh==3.3.4
|
||||
|
|
@ -81,6 +83,10 @@ click-plugins==1.1.1.2
|
|||
# via fiona
|
||||
cligj==0.7.2
|
||||
# via fiona
|
||||
colorclass==2.2.2
|
||||
# via oletools
|
||||
compressed-rtf==1.0.7
|
||||
# via extract-msg
|
||||
contourpy==1.3.3
|
||||
# via
|
||||
# bokeh
|
||||
|
|
@ -89,6 +95,7 @@ cryptography==43.0.3
|
|||
# via
|
||||
# -r requirements.txt
|
||||
# msal
|
||||
# msoffcrypto-tool
|
||||
# pyjwt
|
||||
# python-jose
|
||||
# secretstorage
|
||||
|
|
@ -102,6 +109,10 @@ dnspython==2.8.0
|
|||
# via email-validator
|
||||
docutils==0.22.4
|
||||
# via -r requirements.txt
|
||||
easygui==0.98.3
|
||||
# via oletools
|
||||
ebcdic==1.1.1
|
||||
# via extract-msg
|
||||
ecdsa==0.19.1
|
||||
# via python-jose
|
||||
email-validator==2.0.0
|
||||
|
|
@ -110,6 +121,8 @@ et-xmlfile==2.0.0
|
|||
# via openpyxl
|
||||
executing==2.2.1
|
||||
# via stack-data
|
||||
extract-msg==0.55.0
|
||||
# via -r requirements.txt
|
||||
fastapi==0.115.0
|
||||
# via -r requirements.txt
|
||||
fiona==1.10.1
|
||||
|
|
@ -251,6 +264,8 @@ langgraph-sdk==0.3.3
|
|||
# via langgraph
|
||||
langsmith==0.6.8
|
||||
# via langchain-core
|
||||
lark==1.3.1
|
||||
# via rtfde
|
||||
limits==5.6.0
|
||||
# via slowapi
|
||||
linkify-it-py==2.0.3
|
||||
|
|
@ -285,6 +300,8 @@ msal==1.24.1
|
|||
# via
|
||||
# -r requirements.txt
|
||||
# office365-rest-python-client
|
||||
msoffcrypto-tool==6.0.0
|
||||
# via oletools
|
||||
multidict==6.7.1
|
||||
# via
|
||||
# aiohttp
|
||||
|
|
@ -310,6 +327,15 @@ oauthlib==3.3.1
|
|||
# via requests-oauthlib
|
||||
office365-rest-python-client==2.6.2
|
||||
# via -r requirements.txt
|
||||
olefile==0.47
|
||||
# via
|
||||
# extract-msg
|
||||
# msoffcrypto-tool
|
||||
# oletools
|
||||
oletools==0.60.2
|
||||
# via
|
||||
# pcodedmp
|
||||
# rtfde
|
||||
openpyxl==3.1.5
|
||||
# via -r requirements.txt
|
||||
orjson==3.11.7
|
||||
|
|
@ -345,6 +371,8 @@ parso==0.8.5
|
|||
# via jedi
|
||||
passlib==1.7.4
|
||||
# via -r requirements.txt
|
||||
pcodedmp==1.2.6
|
||||
# via oletools
|
||||
pillow==12.1.0
|
||||
# via
|
||||
# -r requirements.txt
|
||||
|
|
@ -413,6 +441,7 @@ pyparsing==3.3.2
|
|||
# via
|
||||
# httplib2
|
||||
# matplotlib
|
||||
# oletools
|
||||
pypdf2==3.0.1
|
||||
# via -r requirements.txt
|
||||
pyproj==3.7.2
|
||||
|
|
@ -453,6 +482,8 @@ pyyaml==6.0.3
|
|||
# via
|
||||
# bokeh
|
||||
# langchain-core
|
||||
red-black-tree-mod==1.22
|
||||
# via extract-msg
|
||||
referencing==0.37.0
|
||||
# via
|
||||
# jsonschema
|
||||
|
|
@ -489,6 +520,8 @@ rsa==4.9.1
|
|||
# via
|
||||
# google-auth
|
||||
# python-jose
|
||||
rtfde==0.1.2.2
|
||||
# via extract-msg
|
||||
seaborn==0.13.0
|
||||
# via -r requirements.txt
|
||||
secretstorage==3.5.0
|
||||
|
|
@ -573,7 +606,9 @@ typing-inspection==0.4.2
|
|||
tzdata==2025.3
|
||||
# via pandas
|
||||
tzlocal==5.3.1
|
||||
# via apscheduler
|
||||
# via
|
||||
# apscheduler
|
||||
# extract-msg
|
||||
uc-micro-py==1.0.3
|
||||
# via linkify-it-py
|
||||
uritemplate==4.2.0
|
||||
|
|
|
|||
|
|
@ -110,6 +110,9 @@ asyncpg==0.30.0
|
|||
## Stripe payments
|
||||
stripe>=11.0.0
|
||||
|
||||
## Outlook MSG file extraction
|
||||
extract-msg>=0.55.0
|
||||
|
||||
## Geospatial libraries for STAC connector
|
||||
pyproj>=3.6.0 # For coordinate transformations (EPSG:2056 <-> EPSG:4326)
|
||||
shapely>=2.0.0 # For geometric operations (intersections, area calculations)
|
||||
|
|
|
|||
88
scripts/script_db_migrate_datasource_rag.py
Normal file
88
scripts/script_db_migrate_datasource_rag.py
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Migration: Rename DataSource.autoSync -> ragIndexEnabled, lastSynced -> lastIndexed.
|
||||
|
||||
This is a one-off migration for the RAG consent & control unification.
|
||||
Safe to run multiple times (checks column existence before acting).
|
||||
|
||||
Usage:
|
||||
python script_db_migrate_datasource_rag.py [--dry-run]
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
scriptPath = Path(__file__).resolve()
|
||||
gatewayPath = scriptPath.parent.parent
|
||||
sys.path.insert(0, str(gatewayPath))
|
||||
os.chdir(str(gatewayPath))
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import psycopg2
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
|
||||
def _getConnection():
|
||||
return psycopg2.connect(
|
||||
host=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||
port=int(APP_CONFIG.get("DB_PORT", "5432")),
|
||||
database=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
|
||||
user=APP_CONFIG.get("DB_USER"),
|
||||
password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||
)
|
||||
|
||||
|
||||
def _columnExists(cur, table: str, column: str) -> bool:
|
||||
cur.execute(
|
||||
"""SELECT 1 FROM information_schema.columns
|
||||
WHERE table_schema = 'public' AND table_name = %s AND column_name = %s""",
|
||||
(table, column),
|
||||
)
|
||||
return cur.fetchone() is not None
|
||||
|
||||
|
||||
def migrate(dryRun: bool = False):
|
||||
conn = _getConnection()
|
||||
conn.autocommit = False
|
||||
cur = conn.cursor()
|
||||
|
||||
renames = [
|
||||
("DataSource", "autoSync", "ragIndexEnabled"),
|
||||
("DataSource", "lastSynced", "lastIndexed"),
|
||||
]
|
||||
|
||||
executed = []
|
||||
for table, oldCol, newCol in renames:
|
||||
if _columnExists(cur, table, oldCol) and not _columnExists(cur, table, newCol):
|
||||
sql = f'ALTER TABLE public."{table}" RENAME COLUMN "{oldCol}" TO "{newCol}";'
|
||||
logger.info("EXEC: %s", sql)
|
||||
if not dryRun:
|
||||
cur.execute(sql)
|
||||
executed.append(sql)
|
||||
elif _columnExists(cur, table, newCol):
|
||||
logger.info("SKIP: %s.%s already exists (migration already applied)", table, newCol)
|
||||
elif not _columnExists(cur, table, oldCol):
|
||||
logger.warning("SKIP: %s.%s does not exist (table schema may differ)", table, oldCol)
|
||||
|
||||
if not dryRun and executed:
|
||||
conn.commit()
|
||||
logger.info("Migration committed (%d statements)", len(executed))
|
||||
elif dryRun and executed:
|
||||
conn.rollback()
|
||||
logger.info("DRY RUN — would execute %d statements", len(executed))
|
||||
else:
|
||||
logger.info("Nothing to do — schema already up to date")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--dry-run", action="store_true", help="Print SQL without executing")
|
||||
args = parser.parse_args()
|
||||
migrate(dryRun=args.dry_run)
|
||||
3
tests/eval/__init__.py
Normal file
3
tests/eval/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# Copyright (c) 2026 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Eval harness for the Feature Data Sub-Agent (Phase 1.5)."""
|
||||
246
tests/eval/fakeFeatureDataProvider.py
Normal file
246
tests/eval/fakeFeatureDataProvider.py
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
# Copyright (c) 2026 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""In-memory drop-in for FeatureDataProvider used by the eval harness.
|
||||
|
||||
Implements the same three public methods (browseTable / queryTable /
|
||||
aggregateTable) plus the small surface the Sub-Agent reads (getActualColumns),
|
||||
but runs all filters/aggregations in Python over the BenchmarkFixture rows.
|
||||
|
||||
This keeps the eval hermetic: no DB connection, no fixtures to insert/clean,
|
||||
no flakiness from shared test schemas. Only the LLM call is real.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
_ALLOWED_AGGREGATES = {"SUM", "COUNT", "AVG", "MIN", "MAX"}
|
||||
|
||||
|
||||
class FakeFeatureDataProvider:
|
||||
"""In-memory provider compatible with :class:`FeatureDataProvider`."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
rowsByTable: Dict[str, List[Dict[str, Any]]],
|
||||
availableTables: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> None:
|
||||
self._rowsByTable = {name: list(rows) for name, rows in rowsByTable.items()}
|
||||
self._availableTables = list(availableTables or [])
|
||||
self.callLog: List[Dict[str, Any]] = []
|
||||
|
||||
def getAvailableTables(self, featureCode: str) -> List[Dict[str, Any]]: # noqa: ARG002
|
||||
return list(self._availableTables)
|
||||
|
||||
def getTableSchema(self, featureCode: str, tableName: str) -> Optional[Dict[str, Any]]: # noqa: ARG002
|
||||
for obj in self._availableTables:
|
||||
if obj.get("meta", {}).get("table") == tableName:
|
||||
return obj
|
||||
return None
|
||||
|
||||
def getActualColumns(self, tableName: str) -> List[str]:
|
||||
rows = self._rowsByTable.get(tableName, [])
|
||||
if not rows:
|
||||
return []
|
||||
seen: List[str] = []
|
||||
seenSet: set = set()
|
||||
for row in rows:
|
||||
for key in row.keys():
|
||||
if key not in seenSet:
|
||||
seen.append(key)
|
||||
seenSet.add(key)
|
||||
return seen
|
||||
|
||||
def browseTable(
|
||||
self,
|
||||
tableName: str,
|
||||
featureInstanceId: str,
|
||||
mandateId: str,
|
||||
fields: List[str] = None,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
extraFilters: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
self.callLog.append({"method": "browseTable", "table": tableName, "fields": fields, "limit": limit})
|
||||
rows = self._scopeRows(tableName, featureInstanceId, mandateId)
|
||||
rows = _applyFilters(rows, extraFilters)
|
||||
total = len(rows)
|
||||
rows = rows[offset : offset + limit]
|
||||
if fields:
|
||||
rows = [{k: v for k, v in row.items() if k in fields} for row in rows]
|
||||
return {"rows": rows, "total": total, "limit": limit, "offset": offset}
|
||||
|
||||
def queryTable(
|
||||
self,
|
||||
tableName: str,
|
||||
featureInstanceId: str,
|
||||
mandateId: str,
|
||||
filters: List[Dict[str, Any]] = None,
|
||||
fields: List[str] = None,
|
||||
orderBy: str = None,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
extraFilters: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
self.callLog.append({
|
||||
"method": "queryTable", "table": tableName, "filters": filters,
|
||||
"fields": fields, "orderBy": orderBy, "limit": limit,
|
||||
})
|
||||
rows = self._scopeRows(tableName, featureInstanceId, mandateId)
|
||||
combined = list(filters or []) + list(extraFilters or [])
|
||||
rows = _applyFilters(rows, combined)
|
||||
if orderBy:
|
||||
try:
|
||||
rows = sorted(rows, key=lambda r: (r.get(orderBy) is None, r.get(orderBy)))
|
||||
except TypeError:
|
||||
rows = sorted(rows, key=lambda r: str(r.get(orderBy)))
|
||||
total = len(rows)
|
||||
rows = rows[offset : offset + limit]
|
||||
if fields:
|
||||
rows = [{k: v for k, v in row.items() if k in fields} for row in rows]
|
||||
return {"rows": rows, "total": total, "limit": limit, "offset": offset}
|
||||
|
||||
def aggregateTable(
|
||||
self,
|
||||
tableName: str,
|
||||
featureInstanceId: str,
|
||||
mandateId: str,
|
||||
aggregate: str,
|
||||
field: str,
|
||||
groupBy: str = None,
|
||||
extraFilters: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
self.callLog.append({
|
||||
"method": "aggregateTable", "table": tableName,
|
||||
"aggregate": aggregate, "field": field, "groupBy": groupBy,
|
||||
})
|
||||
aggregate = aggregate.upper()
|
||||
if aggregate not in _ALLOWED_AGGREGATES:
|
||||
return {"rows": [], "error": f"Unsupported aggregate: {aggregate}"}
|
||||
rows = self._scopeRows(tableName, featureInstanceId, mandateId)
|
||||
rows = _applyFilters(rows, extraFilters)
|
||||
|
||||
if groupBy:
|
||||
groups: Dict[Any, List[Dict[str, Any]]] = {}
|
||||
for row in rows:
|
||||
groups.setdefault(row.get(groupBy), []).append(row)
|
||||
outRows = [
|
||||
{"groupValue": key, "result": _aggregate(aggregate, [r.get(field) for r in grp])}
|
||||
for key, grp in groups.items()
|
||||
]
|
||||
outRows.sort(key=lambda r: (r["result"] is None, -(r["result"] or 0)))
|
||||
else:
|
||||
outRows = [{"result": _aggregate(aggregate, [r.get(field) for r in rows])}]
|
||||
|
||||
return {
|
||||
"rows": outRows,
|
||||
"aggregate": aggregate,
|
||||
"field": field,
|
||||
"groupBy": groupBy,
|
||||
}
|
||||
|
||||
def _scopeRows(self, tableName: str, featureInstanceId: str, mandateId: str) -> List[Dict[str, Any]]:
|
||||
rows = self._rowsByTable.get(tableName, [])
|
||||
return [
|
||||
row for row in rows
|
||||
if (row.get("featureInstanceId") in (None, featureInstanceId))
|
||||
and (row.get("mandateId") in (None, mandateId))
|
||||
]
|
||||
|
||||
|
||||
def _applyFilters(rows: List[Dict[str, Any]], filters: Optional[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
|
||||
if not filters:
|
||||
return rows
|
||||
out = rows
|
||||
for f in filters:
|
||||
field = f.get("field")
|
||||
op = (f.get("op") or "=").upper()
|
||||
value = f.get("value")
|
||||
out = [r for r in out if _matchesFilter(r.get(field), op, value)]
|
||||
return out
|
||||
|
||||
|
||||
def _matchesFilter(rowValue: Any, op: str, filterValue: Any) -> bool:
|
||||
if op in ("IS NULL",):
|
||||
return rowValue is None
|
||||
if op in ("IS NOT NULL",):
|
||||
return rowValue is not None
|
||||
if rowValue is None:
|
||||
return False
|
||||
if op == "=":
|
||||
return _coerceEqual(rowValue, filterValue)
|
||||
if op == "!=":
|
||||
return not _coerceEqual(rowValue, filterValue)
|
||||
if op == ">":
|
||||
return _coerceFloat(rowValue) > _coerceFloat(filterValue)
|
||||
if op == "<":
|
||||
return _coerceFloat(rowValue) < _coerceFloat(filterValue)
|
||||
if op == ">=":
|
||||
return _coerceFloat(rowValue) >= _coerceFloat(filterValue)
|
||||
if op == "<=":
|
||||
return _coerceFloat(rowValue) <= _coerceFloat(filterValue)
|
||||
if op in ("LIKE", "ILIKE"):
|
||||
pattern = str(filterValue or "")
|
||||
target = str(rowValue)
|
||||
if op == "ILIKE":
|
||||
pattern = pattern.lower()
|
||||
target = target.lower()
|
||||
return _sqlLike(target, pattern)
|
||||
if op == "IN":
|
||||
if isinstance(filterValue, (list, tuple, set)):
|
||||
return any(_coerceEqual(rowValue, v) for v in filterValue)
|
||||
return _coerceEqual(rowValue, filterValue)
|
||||
return False
|
||||
|
||||
|
||||
def _coerceEqual(a: Any, b: Any) -> bool:
|
||||
if a == b:
|
||||
return True
|
||||
try:
|
||||
return str(a) == str(b)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _coerceFloat(value: Any) -> float:
|
||||
if value is None:
|
||||
return 0.0
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return 0.0
|
||||
|
||||
|
||||
def _sqlLike(value: str, pattern: str) -> bool:
|
||||
"""Approximate SQL LIKE -- only % and _ wildcards."""
|
||||
import re
|
||||
regex = ""
|
||||
i = 0
|
||||
while i < len(pattern):
|
||||
ch = pattern[i]
|
||||
if ch == "%":
|
||||
regex += ".*"
|
||||
elif ch == "_":
|
||||
regex += "."
|
||||
else:
|
||||
regex += re.escape(ch)
|
||||
i += 1
|
||||
return re.fullmatch(regex, value or "") is not None
|
||||
|
||||
|
||||
def _aggregate(op: str, values: List[Any]) -> Any:
|
||||
if op == "COUNT":
|
||||
return sum(1 for v in values if v is not None)
|
||||
nums = [_coerceFloat(v) for v in values if v is not None]
|
||||
if not nums:
|
||||
return 0 if op == "SUM" else None
|
||||
if op == "SUM":
|
||||
return round(sum(nums), 4)
|
||||
if op == "AVG":
|
||||
return round(sum(nums) / len(nums), 4)
|
||||
if op == "MIN":
|
||||
return min(nums)
|
||||
if op == "MAX":
|
||||
return max(nums)
|
||||
return None
|
||||
735
tests/eval/runTrusteeBenchmark.py
Normal file
735
tests/eval/runTrusteeBenchmark.py
Normal file
|
|
@ -0,0 +1,735 @@
|
|||
# Copyright (c) 2026 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Trustee Sub-Agent Eval Harness (Phase 1.5).
|
||||
|
||||
Standalone runner that fires real AI calls against the Feature Data
|
||||
Sub-Agent in three configurations:
|
||||
|
||||
* ``baseline`` -- production code without the pre-execute validator
|
||||
(Repair-Loop disabled, Trustee domain hints active).
|
||||
* ``phase1`` -- pre-execute validator on (Repair-Loop active),
|
||||
domain hints active, no ontology yet.
|
||||
* ``phase2`` -- validator on, ontology-driven schema context +
|
||||
constraints (replaces hand-written domain hints).
|
||||
|
||||
For each mode we run all 19 gold-standard questions against an
|
||||
in-memory :class:`FakeFeatureDataProvider`, capture the agent's tool
|
||||
calls and final answer, score them against the gold standard, and
|
||||
write a Markdown report to ``local/notes/`` for analysis.
|
||||
|
||||
Usage::
|
||||
|
||||
cd gateway
|
||||
python -m tests.eval.runTrusteeBenchmark # all 3 modes
|
||||
python -m tests.eval.runTrusteeBenchmark phase1 # one mode only
|
||||
python -m tests.eval.runTrusteeBenchmark baseline phase1
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Path setup so `python -m tests.eval.runTrusteeBenchmark` works from gateway/
|
||||
# ---------------------------------------------------------------------------
|
||||
_GATEWAY_DIR = Path(__file__).resolve().parents[2]
|
||||
if str(_GATEWAY_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(_GATEWAY_DIR))
|
||||
|
||||
import yaml # noqa: E402
|
||||
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelAgent import ( # noqa: E402
|
||||
AgentConfig,
|
||||
AgentEventTypeEnum,
|
||||
)
|
||||
from modules.datamodels.datamodelAi import ( # noqa: E402
|
||||
AiCallRequest,
|
||||
AiCallResponse,
|
||||
OperationTypeEnum,
|
||||
)
|
||||
from modules.serviceCenter.services.serviceAgent.agentLoop import runAgentLoop # noqa: E402
|
||||
from modules.serviceCenter.services.serviceAgent.featureDataAgent import ( # noqa: E402
|
||||
_buildSubAgentTools,
|
||||
_buildSchemaContext,
|
||||
)
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelOntology import ( # noqa: E402
|
||||
QueryValidationError,
|
||||
)
|
||||
from modules.serviceCenter.services.serviceAgent.queryValidator import ( # noqa: E402
|
||||
QueryValidator,
|
||||
)
|
||||
|
||||
from tests.eval.fakeFeatureDataProvider import ( # noqa: E402
|
||||
FakeFeatureDataProvider,
|
||||
)
|
||||
from tests.fixtures.trusteeBenchmark.loadTrusteeBenchmarkFixture import ( # noqa: E402
|
||||
buildTrusteeBenchmarkFixture,
|
||||
BenchmarkFixture,
|
||||
)
|
||||
|
||||
|
||||
logger = logging.getLogger("trusteeBenchmark")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# NoOpValidator -- baseline mode (Repair-Loop OFF)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _NoOpValidator(QueryValidator):
|
||||
"""Validator that never rejects anything (used for baseline measurement)."""
|
||||
|
||||
def validateBrowseQuery(self, tableName, args): # noqa: ARG002
|
||||
return None
|
||||
|
||||
def validateQueryTable(self, tableName, args): # noqa: ARG002
|
||||
return None
|
||||
|
||||
def validateAggregateQuery(self, tableName, args): # noqa: ARG002
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mode-specific tool/prompt building
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class _ModeConfig:
|
||||
name: str
|
||||
label: str
|
||||
useValidator: bool
|
||||
useOntology: bool
|
||||
|
||||
|
||||
_MODES: Dict[str, _ModeConfig] = {
|
||||
"baseline": _ModeConfig(name="baseline", label="Baseline (no validator)", useValidator=False, useOntology=False),
|
||||
"phase1": _ModeConfig(name="phase1", label="Phase 1 (validator on)", useValidator=True, useOntology=False),
|
||||
"phase2": _ModeConfig(name="phase2", label="Phase 2 (validator + ontology)", useValidator=True, useOntology=True),
|
||||
}
|
||||
|
||||
|
||||
def _buildValidator(mode: _ModeConfig) -> QueryValidator:
|
||||
"""Construct the per-mode validator.
|
||||
|
||||
* baseline: no-op (Repair-Loop disabled, used to measure raw LLM
|
||||
accuracy against today's prompt path).
|
||||
* phase1: convention-based QueryValidator (NEVER_AGGREGATE on
|
||||
``*Balance``/``*Total`` suffixes; no ontology).
|
||||
* phase2: ontology-driven QueryValidator (constraints from the
|
||||
trustee ontology override the convention defaults).
|
||||
"""
|
||||
if not mode.useValidator:
|
||||
return _NoOpValidator()
|
||||
if mode.useOntology:
|
||||
try:
|
||||
from modules.features.trustee.trusteeOntology import getTrusteeOntology
|
||||
return QueryValidator(ontology=getTrusteeOntology())
|
||||
except Exception as e:
|
||||
logger.warning("Could not load trustee ontology, falling back: %s", e)
|
||||
return QueryValidator()
|
||||
|
||||
|
||||
def _applyEnvForMode(mode: _ModeConfig) -> None:
|
||||
"""Set the ontology toggle for the production prompt builder.
|
||||
|
||||
The Phase 2 path uses ``featureDataAgent._buildSchemaContext`` to pull
|
||||
the prompt block from ``getAgentOntology()`` automatically. For
|
||||
baseline/phase1 we set ``POWERON_DISABLE_FEATURE_ONTOLOGY=1`` so the
|
||||
builder falls back to the legacy ``getAgentDomainHints()`` block --
|
||||
measuring exactly the production prompt that ships today.
|
||||
"""
|
||||
if mode.useOntology:
|
||||
os.environ.pop("POWERON_DISABLE_FEATURE_ONTOLOGY", None)
|
||||
else:
|
||||
os.environ["POWERON_DISABLE_FEATURE_ONTOLOGY"] = "1"
|
||||
|
||||
|
||||
def _buildSystemPrompt(featureCode: str, instanceLabel: str, selectedTables: List[Dict[str, Any]]) -> str:
|
||||
"""Build the sub-agent system prompt via the production path.
|
||||
|
||||
Mode-specific behaviour (legacy hints vs ontology block) is controlled
|
||||
by the ``POWERON_DISABLE_FEATURE_ONTOLOGY`` env flag set per mode in
|
||||
:func:`_applyEnvForMode`. Keeping the builder call identical for all
|
||||
three modes means the benchmark measures the EXACT prompt the agent
|
||||
would see in production -- no eval-only forks.
|
||||
"""
|
||||
return _buildSchemaContext(featureCode, instanceLabel, selectedTables, requestLang="de")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Question loading + per-question evaluation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Question:
|
||||
id: str
|
||||
question: str
|
||||
intent: str
|
||||
expectedTools: List[str]
|
||||
expectedTable: Optional[str]
|
||||
expectedAggregate: Optional[str]
|
||||
expectedAggregateField: Optional[str]
|
||||
requiredFilters: Dict[str, Any]
|
||||
forbiddenTools: List[str]
|
||||
expectedNumbers: List[float]
|
||||
expectedAnswerContains: List[str]
|
||||
numericTolerance: float
|
||||
|
||||
|
||||
def _loadQuestions(yamlPath: Path) -> List[_Question]:
|
||||
with open(yamlPath, "r", encoding="utf-8") as f:
|
||||
rawList = yaml.safe_load(f)
|
||||
questions: List[_Question] = []
|
||||
for raw in rawList:
|
||||
questions.append(_Question(
|
||||
id=raw["id"],
|
||||
question=raw["question"],
|
||||
intent=raw.get("intent", ""),
|
||||
expectedTools=list(raw.get("expectedTools") or []),
|
||||
expectedTable=raw.get("expectedTable"),
|
||||
expectedAggregate=raw.get("expectedAggregate"),
|
||||
expectedAggregateField=raw.get("expectedAggregateField"),
|
||||
requiredFilters=dict(raw.get("requiredFilters") or {}),
|
||||
forbiddenTools=list(raw.get("forbiddenTools") or []),
|
||||
expectedNumbers=[float(x) for x in (raw.get("expectedNumbers") or [])],
|
||||
expectedAnswerContains=[str(x) for x in (raw.get("expectedAnswerContains") or [])],
|
||||
numericTolerance=float(raw.get("numericTolerance") or 0.005),
|
||||
))
|
||||
return questions
|
||||
|
||||
|
||||
@dataclass
|
||||
class _RunResult:
|
||||
questionId: str
|
||||
finalText: str
|
||||
toolCalls: List[Dict[str, Any]] = field(default_factory=list)
|
||||
toolResults: List[Dict[str, Any]] = field(default_factory=list)
|
||||
summary: Dict[str, Any] = field(default_factory=dict)
|
||||
durationS: float = 0.0
|
||||
error: Optional[str] = None
|
||||
|
||||
@property
|
||||
def costCHF(self) -> float:
|
||||
return float(self.summary.get("costCHF") or 0.0)
|
||||
|
||||
@property
|
||||
def rounds(self) -> int:
|
||||
return int(self.summary.get("rounds") or 0)
|
||||
|
||||
@property
|
||||
def validationFailures(self) -> int:
|
||||
return int(self.summary.get("validationFailures") or 0)
|
||||
|
||||
@property
|
||||
def repairAttempts(self) -> int:
|
||||
return int(self.summary.get("repairAttempts") or 0)
|
||||
|
||||
@property
|
||||
def successAfterRepair(self) -> int:
|
||||
return int(self.summary.get("successAfterRepair") or 0)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Score:
|
||||
patternOk: bool = False
|
||||
forbidOk: bool = False
|
||||
numericOk: bool = False
|
||||
accuracyOk: bool = False
|
||||
notes: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def _scoreRun(question: _Question, run: _RunResult) -> _Score:
|
||||
score = _Score()
|
||||
if run.error:
|
||||
score.notes.append(f"Sub-agent error: {run.error}")
|
||||
return score
|
||||
|
||||
score.patternOk = _checkPattern(question, run)
|
||||
score.forbidOk = _checkForbid(question, run)
|
||||
score.numericOk = _checkNumeric(question, run)
|
||||
score.accuracyOk = score.patternOk and score.forbidOk and score.numericOk
|
||||
return score
|
||||
|
||||
|
||||
def _checkPattern(question: _Question, run: _RunResult) -> bool:
|
||||
"""Did the agent call one of the expected tools on the expected table with required filters?"""
|
||||
if not question.expectedTools:
|
||||
return True
|
||||
matchingCalls = [
|
||||
c for c in run.toolCalls
|
||||
if c.get("toolName") in question.expectedTools
|
||||
and (not question.expectedTable or c.get("args", {}).get("tableName") == question.expectedTable)
|
||||
]
|
||||
if not matchingCalls:
|
||||
return False
|
||||
|
||||
if question.expectedAggregate:
|
||||
wantAgg = question.expectedAggregate.upper()
|
||||
wantField = question.expectedAggregateField
|
||||
for c in matchingCalls:
|
||||
args = c.get("args", {})
|
||||
if c.get("toolName") != "aggregateTable":
|
||||
continue
|
||||
if (args.get("aggregate") or "").upper() != wantAgg:
|
||||
continue
|
||||
if wantField and args.get("field") != wantField:
|
||||
continue
|
||||
if not _filtersSatisfied(question.requiredFilters, args.get("extraFilters") or args.get("filters") or []):
|
||||
continue
|
||||
return True
|
||||
return False
|
||||
|
||||
if question.requiredFilters:
|
||||
for c in matchingCalls:
|
||||
args = c.get("args", {})
|
||||
filters = args.get("filters") or args.get("extraFilters") or []
|
||||
if _filtersSatisfied(question.requiredFilters, filters):
|
||||
return True
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _filtersSatisfied(required: Dict[str, Any], actualFilters: List[Dict[str, Any]]) -> bool:
|
||||
if not required:
|
||||
return True
|
||||
for reqField, reqValue in required.items():
|
||||
if reqField.endswith("Like"):
|
||||
field = reqField[:-4]
|
||||
wanted = str(reqValue)
|
||||
ok = any(
|
||||
(f.get("field") == field) and (f.get("op", "").upper() in ("LIKE", "ILIKE"))
|
||||
and str(f.get("value")) == wanted
|
||||
for f in actualFilters
|
||||
)
|
||||
if not ok:
|
||||
return False
|
||||
else:
|
||||
ok = any(
|
||||
f.get("field") == reqField and _filterValueEqual(f.get("value"), reqValue)
|
||||
for f in actualFilters
|
||||
)
|
||||
if not ok:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _filterValueEqual(a: Any, b: Any) -> bool:
|
||||
if a == b:
|
||||
return True
|
||||
try:
|
||||
return str(a).strip() == str(b).strip()
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _checkForbid(question: _Question, run: _RunResult) -> bool:
|
||||
"""Did the agent AVOID forbidden tool/op combinations?
|
||||
|
||||
Forbidden hits only count if the call actually went through to the
|
||||
provider (success=True). Validator-rejected calls don't count -- the
|
||||
Repair-Loop is doing its job and steering the agent away.
|
||||
"""
|
||||
if not question.forbiddenTools:
|
||||
return True
|
||||
forbiddenSet = set(question.forbiddenTools)
|
||||
for r in run.toolResults:
|
||||
if not r.get("success"):
|
||||
continue
|
||||
if r.get("toolName") in forbiddenSet:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _checkNumeric(question: _Question, run: _RunResult) -> bool:
|
||||
text = (run.finalText or "")
|
||||
if question.expectedNumbers:
|
||||
textNumbers = _extractNumbers(text)
|
||||
for expected in question.expectedNumbers:
|
||||
tol = max(abs(expected) * question.numericTolerance, 0.5)
|
||||
if not any(abs(n - expected) <= tol for n in textNumbers):
|
||||
return False
|
||||
|
||||
if question.expectedAnswerContains:
|
||||
lowered = text.lower()
|
||||
for needle in question.expectedAnswerContains:
|
||||
if needle.lower() not in lowered:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _extractNumbers(text: str) -> List[float]:
|
||||
"""Pick out all numbers from a free-text answer.
|
||||
|
||||
Handles Swiss thousand separators (apostrophe and U+2019), German
|
||||
decimals (comma), plain integers/floats, and JSON numbers. Trailing
|
||||
punctuation (``,``, ``;``, ``.`` from end-of-sentence) is stripped
|
||||
before parsing so ``"180500.0,"`` parses cleanly to 180500.0.
|
||||
"""
|
||||
cleaned = text.replace("\u2019", "'")
|
||||
tokens = re.findall(r"-?\d[\d'.,]*", cleaned)
|
||||
out: List[float] = []
|
||||
for tok in tokens:
|
||||
tok = tok.rstrip(",;")
|
||||
if tok.endswith(".") and tok.count(".") == 1:
|
||||
tok = tok[:-1]
|
||||
norm = tok.replace("'", "")
|
||||
if norm.count(",") == 1 and norm.count(".") == 0:
|
||||
norm = norm.replace(",", ".")
|
||||
elif norm.count(",") >= 1 and norm.count(".") >= 1:
|
||||
if norm.rfind(",") > norm.rfind("."):
|
||||
norm = norm.replace(".", "").replace(",", ".")
|
||||
else:
|
||||
norm = norm.replace(",", "")
|
||||
else:
|
||||
norm = norm.replace(",", "")
|
||||
try:
|
||||
out.append(float(norm))
|
||||
except ValueError:
|
||||
continue
|
||||
return out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AI call wiring
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _bootstrapServices() -> Tuple[Any, str, str]:
|
||||
"""Spin up a minimal service hub bound to the root user + initial mandate.
|
||||
|
||||
Returns the ServiceHub, the user id, and the mandate id used for billing.
|
||||
"""
|
||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||
from modules.datamodels.datamodelUam import Mandate
|
||||
from modules.serviceHub import getInterface as getServices
|
||||
|
||||
rootInterface = getRootInterface()
|
||||
user = rootInterface.currentUser
|
||||
mandateId = rootInterface.getInitialId(Mandate)
|
||||
if not mandateId:
|
||||
raise RuntimeError("No initial mandate available -- run bootstrap loader first.")
|
||||
services = getServices(user, workflow=None, mandateId=mandateId, featureInstanceId=None)
|
||||
return services, user.id, mandateId
|
||||
|
||||
|
||||
async def _runOneQuestion(
|
||||
*,
|
||||
services: Any,
|
||||
userId: str,
|
||||
mandateId: str,
|
||||
fixture: BenchmarkFixture,
|
||||
question: _Question,
|
||||
mode: _ModeConfig,
|
||||
) -> _RunResult:
|
||||
"""Execute a single sub-agent run for one question under one mode."""
|
||||
provider = FakeFeatureDataProvider(
|
||||
rowsByTable=fixture.rowsByTable,
|
||||
availableTables=fixture.selectedTables,
|
||||
)
|
||||
validator = _buildValidator(mode)
|
||||
registry = _buildSubAgentTools(
|
||||
provider=provider,
|
||||
featureInstanceId=fixture.featureInstanceId,
|
||||
mandateId=fixture.mandateId,
|
||||
tableFilters={},
|
||||
validator=validator,
|
||||
)
|
||||
|
||||
systemPrompt = _buildSystemPrompt(
|
||||
featureCode="trustee",
|
||||
instanceLabel="Demo AG",
|
||||
selectedTables=fixture.selectedTables,
|
||||
)
|
||||
|
||||
cost = 0.0
|
||||
|
||||
async def _aiCallFn(req: AiCallRequest) -> AiCallResponse:
|
||||
nonlocal cost
|
||||
resp = await services.ai.callAi(req)
|
||||
cost += float(getattr(resp, "priceCHF", 0.0) or 0.0)
|
||||
return resp
|
||||
|
||||
async def _getCost() -> float:
|
||||
return cost
|
||||
|
||||
config = AgentConfig(
|
||||
maxRounds=6,
|
||||
maxCostCHF=0.50,
|
||||
operationType=OperationTypeEnum.DATA_QUERY,
|
||||
)
|
||||
|
||||
run = _RunResult(questionId=question.id, finalText="")
|
||||
t0 = time.time()
|
||||
try:
|
||||
async for event in runAgentLoop(
|
||||
prompt=question.question,
|
||||
toolRegistry=registry,
|
||||
config=config,
|
||||
aiCallFn=_aiCallFn,
|
||||
getWorkflowCostFn=_getCost,
|
||||
workflowId=f"eval-{mode.name}-{question.id}-{uuid.uuid4().hex[:6]}",
|
||||
userId=userId,
|
||||
featureInstanceId=fixture.featureInstanceId,
|
||||
mandateId=mandateId,
|
||||
systemPromptOverride=systemPrompt,
|
||||
):
|
||||
if event.type == AgentEventTypeEnum.FINAL:
|
||||
run.finalText = event.content or run.finalText
|
||||
elif event.type == AgentEventTypeEnum.MESSAGE and event.content:
|
||||
run.finalText += event.content
|
||||
elif event.type == AgentEventTypeEnum.TOOL_CALL:
|
||||
run.toolCalls.append(dict(event.data or {}))
|
||||
elif event.type == AgentEventTypeEnum.TOOL_RESULT:
|
||||
run.toolResults.append(dict(event.data or {}))
|
||||
elif event.type == AgentEventTypeEnum.AGENT_SUMMARY:
|
||||
run.summary = dict(event.data or {})
|
||||
elif event.type == AgentEventTypeEnum.ERROR:
|
||||
run.error = (run.error or "") + (event.content or "")
|
||||
except Exception as e:
|
||||
run.error = f"{type(e).__name__}: {e}"
|
||||
logger.exception("Sub-agent run failed for %s/%s", mode.name, question.id)
|
||||
run.durationS = time.time() - t0
|
||||
return run
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Report
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class _ModeReport:
|
||||
mode: _ModeConfig
|
||||
perQuestion: List[Tuple[_Question, _RunResult, _Score]] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def total(self) -> int:
|
||||
return len(self.perQuestion)
|
||||
|
||||
def _count(self, attr: str) -> int:
|
||||
return sum(1 for _, _, s in self.perQuestion if getattr(s, attr))
|
||||
|
||||
@property
|
||||
def accuracy(self) -> float:
|
||||
return self._count("accuracyOk") / max(self.total, 1)
|
||||
|
||||
@property
|
||||
def patternCompliance(self) -> float:
|
||||
return self._count("patternOk") / max(self.total, 1)
|
||||
|
||||
@property
|
||||
def repairConversionRate(self) -> float:
|
||||
attempts = sum(r.repairAttempts for _, r, _ in self.perQuestion)
|
||||
succeeded = sum(r.successAfterRepair for _, r, _ in self.perQuestion)
|
||||
if attempts == 0:
|
||||
return 0.0
|
||||
return succeeded / attempts
|
||||
|
||||
@property
|
||||
def totalCostCHF(self) -> float:
|
||||
return sum(r.costCHF for _, r, _ in self.perQuestion)
|
||||
|
||||
@property
|
||||
def totalRounds(self) -> int:
|
||||
return sum(r.rounds for _, r, _ in self.perQuestion)
|
||||
|
||||
@property
|
||||
def totalValidationFailures(self) -> int:
|
||||
return sum(r.validationFailures for _, r, _ in self.perQuestion)
|
||||
|
||||
|
||||
def _writeReport(reports: List[_ModeReport], outputPath: Path) -> None:
|
||||
lines: List[str] = []
|
||||
lines.append("# Trustee Sub-Agent Benchmark Report")
|
||||
lines.append("")
|
||||
lines.append(f"Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
lines.append("")
|
||||
lines.append("## Summary")
|
||||
lines.append("")
|
||||
lines.append("| Mode | Questions | Accuracy | Pattern compliance | Repair conversion | Validator rejects | Rounds | Cost (CHF) |")
|
||||
lines.append("|---|---|---|---|---|---|---|---|")
|
||||
for rep in reports:
|
||||
lines.append(
|
||||
f"| {rep.mode.label} | {rep.total} | {rep.accuracy:.1%} | {rep.patternCompliance:.1%} | "
|
||||
f"{rep.repairConversionRate:.1%} | {rep.totalValidationFailures} | {rep.totalRounds} | "
|
||||
f"{rep.totalCostCHF:.4f} |"
|
||||
)
|
||||
lines.append("")
|
||||
lines.append("## Per-question detail")
|
||||
for rep in reports:
|
||||
lines.append("")
|
||||
lines.append(f"### {rep.mode.label}")
|
||||
lines.append("")
|
||||
lines.append("| id | acc | pattern | forbid | numeric | rounds | val-fail | repairs | cost CHF | duration | tools |")
|
||||
lines.append("|---|---|---|---|---|---|---|---|---|---|---|")
|
||||
for q, r, s in rep.perQuestion:
|
||||
toolList = ",".join(
|
||||
f"{c.get('toolName')}({c.get('args',{}).get('tableName','?')})"
|
||||
for c in r.toolCalls
|
||||
)
|
||||
lines.append(
|
||||
f"| {q.id} | {_yn(s.accuracyOk)} | {_yn(s.patternOk)} | {_yn(s.forbidOk)} | {_yn(s.numericOk)} | "
|
||||
f"{r.rounds} | {r.validationFailures} | {r.repairAttempts}/{r.successAfterRepair} | "
|
||||
f"{r.costCHF:.4f} | {r.durationS:.1f}s | {toolList} |"
|
||||
)
|
||||
lines.append("")
|
||||
lines.append("#### Notes & failures")
|
||||
for q, r, s in rep.perQuestion:
|
||||
if s.accuracyOk:
|
||||
continue
|
||||
lines.append(f"- **{q.id}** ({q.intent}): pattern={s.patternOk} forbid={s.forbidOk} numeric={s.numericOk}")
|
||||
if r.error:
|
||||
lines.append(f" - error: `{r.error}`")
|
||||
lines.append(f" - answer: `{(r.finalText or '').strip().replace('|', '/').splitlines()[0][:240]}`")
|
||||
for note in s.notes:
|
||||
lines.append(f" - note: {note}")
|
||||
outputPath.parent.mkdir(parents=True, exist_ok=True)
|
||||
outputPath.write_text("\n".join(lines), encoding="utf-8")
|
||||
|
||||
|
||||
def _yn(b: bool) -> str:
|
||||
return "OK" if b else "FAIL"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _runMain(modesToRun: List[str], onlyQuestionId: Optional[str] = None) -> None:
|
||||
logging.basicConfig(
|
||||
level=logging.WARNING,
|
||||
format="%(asctime)s %(levelname)s %(name)s -- %(message)s",
|
||||
)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
fixture = buildTrusteeBenchmarkFixture()
|
||||
questionsPath = _GATEWAY_DIR / "tests" / "fixtures" / "trusteeBenchmark" / "questions.yaml"
|
||||
allQuestions = _loadQuestions(questionsPath)
|
||||
if onlyQuestionId:
|
||||
allQuestions = [q for q in allQuestions if q.id == onlyQuestionId]
|
||||
if not allQuestions:
|
||||
print(f"No question matches id={onlyQuestionId!r}")
|
||||
return
|
||||
|
||||
print(f"Loaded {len(allQuestions)} questions, {len(modesToRun)} modes -> {len(allQuestions) * len(modesToRun)} sub-agent runs.")
|
||||
|
||||
services, userId, mandateId = _bootstrapServices()
|
||||
print(f"Bootstrap OK: user={userId}, mandate={mandateId}")
|
||||
|
||||
reports: List[_ModeReport] = []
|
||||
for modeName in modesToRun:
|
||||
mode = _MODES[modeName]
|
||||
_applyEnvForMode(mode)
|
||||
rep = _ModeReport(mode=mode)
|
||||
print(f"\n=== Mode: {mode.label} ===")
|
||||
for idx, question in enumerate(allQuestions, start=1):
|
||||
print(f" [{idx:>2}/{len(allQuestions)}] {question.id}: {question.question[:80]} ...", flush=True)
|
||||
run = await _runOneQuestion(
|
||||
services=services,
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
fixture=fixture,
|
||||
question=question,
|
||||
mode=mode,
|
||||
)
|
||||
score = _scoreRun(question, run)
|
||||
rep.perQuestion.append((question, run, score))
|
||||
print(
|
||||
f" -> acc={_yn(score.accuracyOk)} "
|
||||
f"pattern={_yn(score.patternOk)} forbid={_yn(score.forbidOk)} "
|
||||
f"numeric={_yn(score.numericOk)} rounds={run.rounds} cost={run.costCHF:.4f} "
|
||||
f"val-fail={run.validationFailures} repairs={run.repairAttempts}/{run.successAfterRepair}",
|
||||
flush=True,
|
||||
)
|
||||
reports.append(rep)
|
||||
|
||||
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
||||
outDir = _GATEWAY_DIR.parent / "local" / "notes"
|
||||
reportPath = outDir / f"trustee-benchmark-{timestamp}.md"
|
||||
_writeReport(reports, reportPath)
|
||||
|
||||
rawJsonPath = outDir / f"trustee-benchmark-{timestamp}.json"
|
||||
rawJsonPath.write_text(
|
||||
json.dumps(
|
||||
[
|
||||
{
|
||||
"mode": rep.mode.name,
|
||||
"accuracy": rep.accuracy,
|
||||
"patternCompliance": rep.patternCompliance,
|
||||
"repairConversionRate": rep.repairConversionRate,
|
||||
"totalCostCHF": rep.totalCostCHF,
|
||||
"totalRounds": rep.totalRounds,
|
||||
"totalValidationFailures": rep.totalValidationFailures,
|
||||
"items": [
|
||||
{
|
||||
"questionId": q.id,
|
||||
"intent": q.intent,
|
||||
"accuracyOk": s.accuracyOk,
|
||||
"patternOk": s.patternOk,
|
||||
"forbidOk": s.forbidOk,
|
||||
"numericOk": s.numericOk,
|
||||
"rounds": r.rounds,
|
||||
"validationFailures": r.validationFailures,
|
||||
"repairAttempts": r.repairAttempts,
|
||||
"successAfterRepair": r.successAfterRepair,
|
||||
"costCHF": r.costCHF,
|
||||
"durationS": r.durationS,
|
||||
"finalText": (r.finalText or "")[:600],
|
||||
"toolCalls": r.toolCalls,
|
||||
"error": r.error,
|
||||
}
|
||||
for q, r, s in rep.perQuestion
|
||||
],
|
||||
}
|
||||
for rep in reports
|
||||
],
|
||||
indent=2,
|
||||
ensure_ascii=False,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
print(f"\nReport written: {reportPath}")
|
||||
print(f"Raw JSON: {rawJsonPath}")
|
||||
for rep in reports:
|
||||
print(f" {rep.mode.label}: acc={rep.accuracy:.1%} pattern={rep.patternCompliance:.1%} cost={rep.totalCostCHF:.4f}")
|
||||
|
||||
|
||||
def _parseArgs(argv: List[str]) -> Tuple[List[str], Optional[str]]:
|
||||
modes: List[str] = []
|
||||
only: Optional[str] = None
|
||||
for arg in argv:
|
||||
if arg.startswith("--only="):
|
||||
only = arg.split("=", 1)[1]
|
||||
elif arg in _MODES:
|
||||
modes.append(arg)
|
||||
else:
|
||||
print(f"Unknown argument: {arg!r}. Allowed modes: {list(_MODES)}")
|
||||
sys.exit(2)
|
||||
if not modes:
|
||||
modes = ["baseline", "phase1", "phase2"]
|
||||
return modes, only
|
||||
|
||||
|
||||
def main() -> None:
|
||||
modes, only = _parseArgs(sys.argv[1:])
|
||||
asyncio.run(_runMain(modes, onlyQuestionId=only))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
16
tests/fixtures/trusteeBenchmark/__init__.py
vendored
Normal file
16
tests/fixtures/trusteeBenchmark/__init__.py
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
# Copyright (c) 2026 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Trustee benchmark fixture: synthetic but realistic Swiss KMU accounting data.
|
||||
|
||||
Used by the Feature Data Sub-Agent eval harness (Phase 1.5) to measure
|
||||
hallucination rates against a fixed gold standard. Data is built in-memory
|
||||
via Pydantic models -- no SQL, no DB connection -- so the harness stays
|
||||
hermetic and reproducible.
|
||||
"""
|
||||
|
||||
from tests.fixtures.trusteeBenchmark.loadTrusteeBenchmarkFixture import (
|
||||
buildTrusteeBenchmarkFixture,
|
||||
BenchmarkFixture,
|
||||
)
|
||||
|
||||
__all__ = ["buildTrusteeBenchmarkFixture", "BenchmarkFixture"]
|
||||
275
tests/fixtures/trusteeBenchmark/loadTrusteeBenchmarkFixture.py
vendored
Normal file
275
tests/fixtures/trusteeBenchmark/loadTrusteeBenchmarkFixture.py
vendored
Normal file
|
|
@ -0,0 +1,275 @@
|
|||
# Copyright (c) 2026 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Synthetic Trustee benchmark fixture for the Feature Data Sub-Agent eval.
|
||||
|
||||
Builds an in-memory snapshot of one fictional Swiss KMU mandate
|
||||
("Demo AG") with:
|
||||
|
||||
* 3 fiscal years (2023, 2024, 2025) of `TrusteeDataAccountBalance` rows
|
||||
-- both annual totals (periodMonth=0) and monthly snapshots.
|
||||
* 8 representative accounts spanning all major chart-of-accounts blocks
|
||||
(cash, banks, receivables, payables, revenue, materials, personnel,
|
||||
operating expenses).
|
||||
* Per-month `TrusteeDataJournalEntry` + multiple `TrusteeDataJournalLine`
|
||||
rows so debit/credit/COUNT aggregations have meaningful answers.
|
||||
|
||||
The data is deterministic (no RNG) so a question's gold-standard answer
|
||||
is stable across runs.
|
||||
|
||||
This module deliberately stays decoupled from the production DB pipeline
|
||||
-- the harness uses :class:`FakeFeatureDataProvider` (see
|
||||
``gateway/tests/eval/fakeFeatureDataProvider.py``) to serve queries
|
||||
against this in-memory snapshot, mirroring the public methods of
|
||||
``FeatureDataProvider`` (browseTable / queryTable / aggregateTable).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
_MANDATE_ID = "m-demo-ag"
|
||||
_FEATURE_INSTANCE_ID = "fi-demo-ag-trustee"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Account master data
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_ACCOUNT_MASTER: List[Dict[str, Any]] = [
|
||||
{"accountNumber": "1000", "label": "Hauptkasse", "accountType": "asset", "currency": "CHF"},
|
||||
{"accountNumber": "1020", "label": "ZKB Geschaeftskonto", "accountType": "asset", "currency": "CHF"},
|
||||
{"accountNumber": "1021", "label": "PostFinance", "accountType": "asset", "currency": "CHF"},
|
||||
{"accountNumber": "1100", "label": "Forderungen aus Lieferungen und Leistungen", "accountType": "asset", "currency": "CHF"},
|
||||
{"accountNumber": "2000", "label": "Verbindlichkeiten aus Lieferungen", "accountType": "liability", "currency": "CHF"},
|
||||
{"accountNumber": "3000", "label": "Ertrag aus Beratung", "accountType": "revenue", "currency": "CHF"},
|
||||
{"accountNumber": "5400", "label": "Materialaufwand", "accountType": "expense", "currency": "CHF"},
|
||||
{"accountNumber": "6000", "label": "Mietaufwand", "accountType": "expense", "currency": "CHF"},
|
||||
]
|
||||
|
||||
|
||||
# Annual closing balances per (year, accountNumber) -- the canonical reference.
|
||||
# Asset/expense balances are positive, liability/revenue balances are stored
|
||||
# as positive numbers (sign by accountType, like most accounting systems).
|
||||
_ANNUAL_CLOSING: Dict[int, Dict[str, float]] = {
|
||||
2023: {
|
||||
"1000": 4_800.00,
|
||||
"1020": 132_500.00,
|
||||
"1021": 22_400.00,
|
||||
"1100": 58_200.00,
|
||||
"2000": 41_300.00,
|
||||
"3000": 410_000.00,
|
||||
"5400": 92_000.00,
|
||||
"6000": 36_000.00,
|
||||
},
|
||||
2024: {
|
||||
"1000": 5_200.00,
|
||||
"1020": 148_900.00,
|
||||
"1021": 26_750.00,
|
||||
"1100": 61_400.00,
|
||||
"2000": 44_100.00,
|
||||
"3000": 462_500.00,
|
||||
"5400": 104_300.00,
|
||||
"6000": 39_000.00,
|
||||
},
|
||||
2025: {
|
||||
"1000": 5_900.00,
|
||||
"1020": 152_400.00,
|
||||
"1021": 28_100.00,
|
||||
"1100": 66_800.00,
|
||||
"2000": 47_900.00,
|
||||
"3000": 488_700.00,
|
||||
"5400": 112_100.00,
|
||||
"6000": 42_000.00,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _openingFromPriorYear(year: int, accountNumber: str) -> float:
|
||||
"""Opening balance of year N = closing balance of year N-1 (0 if N-1 is unknown)."""
|
||||
prior = year - 1
|
||||
return float(_ANNUAL_CLOSING.get(prior, {}).get(accountNumber, 0.0))
|
||||
|
||||
|
||||
def _monthlyProgression(opening: float, closing: float, month: int) -> float:
|
||||
"""Linear interpolation between opening and closing for monthly snapshots.
|
||||
|
||||
Not realistic in detail but deterministic and monotonic per account, so
|
||||
questions about "Stand per Ende März" produce stable answers.
|
||||
"""
|
||||
if month <= 0:
|
||||
return float(closing)
|
||||
frac = month / 12.0
|
||||
return round(float(opening) + (float(closing) - float(opening)) * frac, 2)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Journal entries / lines -- minimal but realistic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_JOURNAL_ENTRIES_2025: List[Dict[str, Any]] = [
|
||||
{"month": 3, "day": 15, "reference": "RG-2025-0042", "description": "Beratung Kunde ACME AG", "amount": 18_500.00, "debit": "1100", "credit": "3000"},
|
||||
{"month": 3, "day": 22, "reference": "EK-2025-0017", "description": "Materialeinkauf Buehler AG", "amount": 9_200.00, "debit": "5400", "credit": "2000"},
|
||||
{"month": 3, "day": 28, "reference": "MIETE-2025-03", "description": "Mietzins Buero Maerz", "amount": 3_000.00, "debit": "6000", "credit": "1020"},
|
||||
{"month": 4, "day": 5, "reference": "RG-2025-0051", "description": "Beratung Kunde Bell AG", "amount": 24_300.00, "debit": "1100", "credit": "3000"},
|
||||
{"month": 4, "day": 18, "reference": "EK-2025-0024", "description": "Materialeinkauf Industriebedarf", "amount": 7_800.00, "debit": "5400", "credit": "2000"},
|
||||
{"month": 6, "day": 12, "reference": "RG-2025-0079", "description": "Beratung Kunde Bell AG", "amount": 32_100.00, "debit": "1100", "credit": "3000"},
|
||||
{"month": 6, "day": 30, "reference": "MIETE-2025-Q2", "description": "Mietzins Buero Q2-Abrechnung", "amount": 3_500.00, "debit": "6000", "credit": "1020"},
|
||||
{"month": 9, "day": 4, "reference": "RG-2025-0114", "description": "Beratung Kunde Migros", "amount": 41_500.00, "debit": "1100", "credit": "3000"},
|
||||
{"month": 9, "day": 25, "reference": "EK-2025-0061", "description": "Materialeinkauf Buehler AG", "amount": 12_400.00, "debit": "5400", "credit": "2000"},
|
||||
{"month": 11, "day": 14, "reference": "RG-2025-0188", "description": "Beratung Kunde ACME AG", "amount": 28_700.00, "debit": "1100", "credit": "3000"},
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Snapshot containers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class BenchmarkFixture:
|
||||
"""In-memory rows that mimic feature DB tables.
|
||||
|
||||
Each ``rowsByTable[tableName]`` is a list of column dicts compatible
|
||||
with the Pydantic feature data models (TrusteeDataAccountBalance, etc.).
|
||||
"""
|
||||
mandateId: str
|
||||
featureInstanceId: str
|
||||
rowsByTable: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict)
|
||||
selectedTables: List[Dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
|
||||
def _buildSelectedTables() -> List[Dict[str, Any]]:
|
||||
"""Return the DATA_OBJECT-shaped descriptors the sub-agent expects.
|
||||
|
||||
Mirrors what the catalog would return for the trustee feature; the
|
||||
real `getDataObjects("trustee")` call would yield the same shape but
|
||||
we hard-code the three tables we actually populate.
|
||||
"""
|
||||
return [
|
||||
{
|
||||
"objectKey": "data.feature.trustee.TrusteeDataAccount",
|
||||
"label": {"de": "Kontenplan", "en": "Chart of accounts"},
|
||||
"meta": {
|
||||
"table": "TrusteeDataAccount",
|
||||
"fields": ["id", "accountNumber", "label", "accountType", "currency", "isActive"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"objectKey": "data.feature.trustee.TrusteeDataAccountBalance",
|
||||
"label": {"de": "Kontosalden", "en": "Account balances"},
|
||||
"meta": {
|
||||
"table": "TrusteeDataAccountBalance",
|
||||
"fields": [
|
||||
"id", "accountNumber", "periodYear", "periodMonth",
|
||||
"openingBalance", "debitTotal", "creditTotal",
|
||||
"closingBalance", "currency",
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
"objectKey": "data.feature.trustee.TrusteeDataJournalLine",
|
||||
"label": {"de": "Buchungszeilen", "en": "Journal lines"},
|
||||
"meta": {
|
||||
"table": "TrusteeDataJournalLine",
|
||||
"fields": [
|
||||
"id", "journalEntryId", "accountNumber",
|
||||
"debitAmount", "creditAmount", "currency", "description",
|
||||
],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def buildTrusteeBenchmarkFixture() -> BenchmarkFixture:
|
||||
"""Materialize the full in-memory benchmark snapshot.
|
||||
|
||||
All rows include ``mandateId`` and ``featureInstanceId`` columns so the
|
||||
fake provider can scope them the same way the real one does.
|
||||
"""
|
||||
accountRows: List[Dict[str, Any]] = []
|
||||
for i, acc in enumerate(_ACCOUNT_MASTER):
|
||||
accountRows.append({
|
||||
"id": f"acc-{i:03d}",
|
||||
"accountNumber": acc["accountNumber"],
|
||||
"label": acc["label"],
|
||||
"accountType": acc["accountType"],
|
||||
"currency": acc["currency"],
|
||||
"isActive": True,
|
||||
"mandateId": _MANDATE_ID,
|
||||
"featureInstanceId": _FEATURE_INSTANCE_ID,
|
||||
})
|
||||
|
||||
balanceRows: List[Dict[str, Any]] = []
|
||||
rowIdx = 0
|
||||
for year, closings in _ANNUAL_CLOSING.items():
|
||||
for accountNumber, closing in closings.items():
|
||||
opening = _openingFromPriorYear(year, accountNumber)
|
||||
balanceRows.append({
|
||||
"id": f"bal-{rowIdx:04d}",
|
||||
"accountNumber": accountNumber,
|
||||
"periodYear": year,
|
||||
"periodMonth": 0,
|
||||
"openingBalance": opening,
|
||||
"debitTotal": round(max(closing - opening, 0.0) * 1.2, 2),
|
||||
"creditTotal": round(max(closing - opening, 0.0) * 0.2, 2),
|
||||
"closingBalance": float(closing),
|
||||
"currency": "CHF",
|
||||
"mandateId": _MANDATE_ID,
|
||||
"featureInstanceId": _FEATURE_INSTANCE_ID,
|
||||
})
|
||||
rowIdx += 1
|
||||
for month in range(1, 13):
|
||||
monthly = _monthlyProgression(opening, closing, month)
|
||||
balanceRows.append({
|
||||
"id": f"bal-{rowIdx:04d}",
|
||||
"accountNumber": accountNumber,
|
||||
"periodYear": year,
|
||||
"periodMonth": month,
|
||||
"openingBalance": opening,
|
||||
"debitTotal": round((monthly - opening) * 1.2, 2) if monthly > opening else 0.0,
|
||||
"creditTotal": round((monthly - opening) * 0.2, 2) if monthly > opening else 0.0,
|
||||
"closingBalance": monthly,
|
||||
"currency": "CHF",
|
||||
"mandateId": _MANDATE_ID,
|
||||
"featureInstanceId": _FEATURE_INSTANCE_ID,
|
||||
})
|
||||
rowIdx += 1
|
||||
|
||||
lineRows: List[Dict[str, Any]] = []
|
||||
for j, entry in enumerate(_JOURNAL_ENTRIES_2025):
|
||||
entryId = f"je-2025-{j:03d}"
|
||||
lineRows.append({
|
||||
"id": f"jl-{j*2:04d}",
|
||||
"journalEntryId": entryId,
|
||||
"accountNumber": entry["debit"],
|
||||
"debitAmount": float(entry["amount"]),
|
||||
"creditAmount": 0.0,
|
||||
"currency": "CHF",
|
||||
"description": entry["description"],
|
||||
"mandateId": _MANDATE_ID,
|
||||
"featureInstanceId": _FEATURE_INSTANCE_ID,
|
||||
})
|
||||
lineRows.append({
|
||||
"id": f"jl-{j*2+1:04d}",
|
||||
"journalEntryId": entryId,
|
||||
"accountNumber": entry["credit"],
|
||||
"debitAmount": 0.0,
|
||||
"creditAmount": float(entry["amount"]),
|
||||
"currency": "CHF",
|
||||
"description": entry["description"],
|
||||
"mandateId": _MANDATE_ID,
|
||||
"featureInstanceId": _FEATURE_INSTANCE_ID,
|
||||
})
|
||||
|
||||
fixture = BenchmarkFixture(
|
||||
mandateId=_MANDATE_ID,
|
||||
featureInstanceId=_FEATURE_INSTANCE_ID,
|
||||
rowsByTable={
|
||||
"TrusteeDataAccount": accountRows,
|
||||
"TrusteeDataAccountBalance": balanceRows,
|
||||
"TrusteeDataJournalLine": lineRows,
|
||||
},
|
||||
selectedTables=_buildSelectedTables(),
|
||||
)
|
||||
return fixture
|
||||
226
tests/fixtures/trusteeBenchmark/questions.yaml
vendored
Normal file
226
tests/fixtures/trusteeBenchmark/questions.yaml
vendored
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
# Trustee Sub-Agent Benchmark -- 19 questions analog Hein 2025
|
||||
#
|
||||
# Each question covers ONE expected hallucination class so we can attribute
|
||||
# accuracy gains to specific phases (validator / ontology).
|
||||
#
|
||||
# Scoring per question (all binary unless noted):
|
||||
# patternOk -- did the agent call the right tool(s) with the right filters?
|
||||
# forbidOk -- did it AVOID the forbidden tool/op (e.g. SUM closingBalance)?
|
||||
# numericOk -- does the final answer contain the expected number(s)?
|
||||
# accuracyOk -- patternOk AND forbidOk AND numericOk
|
||||
#
|
||||
# tolerance: relative tolerance for numeric comparison (default 0.005 = 0.5 %).
|
||||
|
||||
- id: q01
|
||||
question: "Was ist der Banksaldo per 31.12.2025 fuer das ZKB-Konto 1020?"
|
||||
intent: BANK_BALANCE_AT_DATE
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataAccountBalance
|
||||
requiredFilters:
|
||||
accountNumber: "1020"
|
||||
periodYear: 2025
|
||||
periodMonth: 0
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedNumbers: [152400.0]
|
||||
|
||||
- id: q02
|
||||
question: "Wie hoch ist die Hauptkasse (Konto 1000) per Ende 2024?"
|
||||
intent: CASH_BALANCE_AT_DATE
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataAccountBalance
|
||||
requiredFilters:
|
||||
accountNumber: "1000"
|
||||
periodYear: 2024
|
||||
periodMonth: 0
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedNumbers: [5200.0]
|
||||
|
||||
- id: q03
|
||||
question: "Summiere alle Bankkonten (102x) per 31.12.2025."
|
||||
intent: BANK_GROUP_TOTAL_AT_DATE
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataAccountBalance
|
||||
requiredFilters:
|
||||
periodYear: 2025
|
||||
periodMonth: 0
|
||||
accountNumberLike: "102%"
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedNumbers: [180500.0]
|
||||
numericTolerance: 0.01
|
||||
|
||||
- id: q04
|
||||
question: "Wie hat sich der Schlusssaldo des ZKB-Kontos 1020 ueber die Jahre 2023 bis 2025 entwickelt?"
|
||||
intent: BALANCE_HISTORY_PER_YEAR
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataAccountBalance
|
||||
requiredFilters:
|
||||
accountNumber: "1020"
|
||||
periodMonth: 0
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedNumbers: [132500.0, 148900.0, 152400.0]
|
||||
|
||||
- id: q05
|
||||
question: "Welches Konto hatte 2025 den hoechsten Schlusssaldo bei den Aktiven (1xxx)?"
|
||||
intent: TOP_ASSET_AT_DATE
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataAccountBalance
|
||||
requiredFilters:
|
||||
periodYear: 2025
|
||||
periodMonth: 0
|
||||
accountNumberLike: "1%"
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedAnswerContains: ["1020"]
|
||||
expectedNumbers: [152400.0]
|
||||
|
||||
- id: q06
|
||||
question: "Welche Konten gehoeren zu den Bankkonten (102x)?"
|
||||
intent: ACCOUNT_LIST_FILTER
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataAccount
|
||||
requiredFilters:
|
||||
accountNumberLike: "102%"
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedAnswerContains: ["1020", "1021"]
|
||||
|
||||
- id: q07
|
||||
question: "Wie hoch war der Materialaufwand (Konto 5400) im Jahr 2025?"
|
||||
intent: EXPENSE_AT_YEAR
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataAccountBalance
|
||||
requiredFilters:
|
||||
accountNumber: "5400"
|
||||
periodYear: 2025
|
||||
periodMonth: 0
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedNumbers: [112100.0]
|
||||
|
||||
- id: q08
|
||||
question: "Wie viele Buchungszeilen gibt es insgesamt im System?"
|
||||
intent: COUNT_ROWS
|
||||
expectedTools: [aggregateTable]
|
||||
expectedTable: TrusteeDataJournalLine
|
||||
expectedAggregate: COUNT
|
||||
forbiddenTools: []
|
||||
expectedNumbers: [20]
|
||||
|
||||
- id: q09
|
||||
question: "Wie hoch ist der gesamte Beratungsertrag (Konto 3000) im Jahr 2025?"
|
||||
intent: REVENUE_AT_YEAR
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataAccountBalance
|
||||
requiredFilters:
|
||||
accountNumber: "3000"
|
||||
periodYear: 2025
|
||||
periodMonth: 0
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedNumbers: [488700.0]
|
||||
|
||||
- id: q10
|
||||
question: "Wie viel wurde 2025 auf das Materialaufwand-Konto 5400 gebucht (Soll-Summe ueber Buchungszeilen)?"
|
||||
intent: JOURNAL_SUM_AT_ACCOUNT
|
||||
expectedTools: [aggregateTable]
|
||||
expectedTable: TrusteeDataJournalLine
|
||||
expectedAggregate: SUM
|
||||
expectedAggregateField: debitAmount
|
||||
requiredFilters:
|
||||
accountNumber: "5400"
|
||||
forbiddenTools: []
|
||||
expectedNumbers: [29400.0]
|
||||
numericTolerance: 0.01
|
||||
|
||||
- id: q11
|
||||
question: "Welche Buchungen im 1. Quartal 2025 (Januar bis Maerz) wurden auf Konto 3000 gebucht?"
|
||||
intent: JOURNAL_LINES_BY_ACCOUNT
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataJournalLine
|
||||
requiredFilters:
|
||||
accountNumber: "3000"
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedAnswerContains: ["18500", "ACME"]
|
||||
|
||||
- id: q12
|
||||
question: "Wie hoch war die Hauptkasse (Konto 1000) jeweils per Ende Maerz 2025 und per Ende Juni 2025?"
|
||||
intent: MULTI_MONTH_SNAPSHOT
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataAccountBalance
|
||||
requiredFilters:
|
||||
accountNumber: "1000"
|
||||
periodYear: 2025
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedNumbers: [5375.0, 5550.0]
|
||||
numericTolerance: 0.01
|
||||
|
||||
- id: q13
|
||||
question: "Wie hoch ist die Summe aller Aufwandskonten (5xxx und 6xxx) per Ende 2025?"
|
||||
intent: EXPENSE_GROUP_TOTAL
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataAccountBalance
|
||||
requiredFilters:
|
||||
periodYear: 2025
|
||||
periodMonth: 0
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedNumbers: [154100.0]
|
||||
numericTolerance: 0.01
|
||||
|
||||
- id: q14
|
||||
question: "Welches Konto hat den hoechsten openingBalance fuer 2025?"
|
||||
intent: TOP_OPENING_BALANCE
|
||||
# Both routes are legitimate: queryTable+orderBy+limit=1, or
|
||||
# aggregateTable(MAX) followed by queryTable lookup. We only insist that
|
||||
# the final answer names the right account and (optionally) the value.
|
||||
expectedTools: [queryTable, aggregateTable]
|
||||
expectedTable: TrusteeDataAccountBalance
|
||||
forbiddenTools: []
|
||||
expectedAnswerContains: ["3000"]
|
||||
expectedNumbers: [462500.0]
|
||||
|
||||
- id: q15
|
||||
question: "Liste alle Konten vom Typ asset auf."
|
||||
intent: ACCOUNTS_BY_TYPE
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataAccount
|
||||
requiredFilters:
|
||||
accountType: "asset"
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedAnswerContains: ["1000", "1020", "1021", "1100"]
|
||||
|
||||
- id: q16
|
||||
question: "Wie hoch ist der Schlusssaldo der Forderungen aus Lieferungen und Leistungen (Konto 1100) per Ende 2025?"
|
||||
intent: BALANCE_BY_NAME_LOOKUP
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataAccountBalance
|
||||
requiredFilters:
|
||||
accountNumber: "1100"
|
||||
periodYear: 2025
|
||||
periodMonth: 0
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedNumbers: [66800.0]
|
||||
|
||||
- id: q17
|
||||
question: "Wie hoch waren die Verbindlichkeiten (Konto 2000) jeweils per Ende 2023, 2024 und 2025?"
|
||||
intent: LIABILITY_HISTORY
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataAccountBalance
|
||||
requiredFilters:
|
||||
accountNumber: "2000"
|
||||
periodMonth: 0
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedNumbers: [41300.0, 44100.0, 47900.0]
|
||||
|
||||
- id: q18
|
||||
question: "Wie viele Bankkonten gibt es im Kontenplan (102x)?"
|
||||
intent: ACCOUNT_COUNT_BY_PREFIX
|
||||
expectedTools: [queryTable, aggregateTable]
|
||||
expectedTable: TrusteeDataAccount
|
||||
requiredFilters:
|
||||
accountNumberLike: "102%"
|
||||
forbiddenTools: []
|
||||
expectedNumbers: [2]
|
||||
|
||||
- id: q19
|
||||
question: "Gib mir alle Buchungszeilen mit einem Sollbetrag groesser als 20'000 CHF."
|
||||
intent: JOURNAL_LINES_BY_AMOUNT
|
||||
expectedTools: [queryTable]
|
||||
expectedTable: TrusteeDataJournalLine
|
||||
forbiddenTools: [aggregateTable]
|
||||
expectedAnswerContains: ["24300", "32100", "41500", "28700"]
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
"""Unit tests for Google STT helper config (no API calls)."""
|
||||
|
||||
from modules.connectors.connectorVoiceGoogle import _buildPrimarySttRecognitionFields
|
||||
|
||||
|
||||
def test_buildPrimaryStt_lightweight_stripsHeavyFeatures():
|
||||
d = _buildPrimarySttRecognitionFields(model="latest_short", lightweight=True)
|
||||
assert d["model"] == "latest_short"
|
||||
assert d["enable_word_time_offsets"] is False
|
||||
assert d["enable_word_confidence"] is False
|
||||
assert d["max_alternatives"] == 1
|
||||
assert d["use_enhanced"] is False
|
||||
assert d["enable_automatic_punctuation"] is True
|
||||
|
||||
|
||||
def test_buildPrimaryStt_full_matchesLegacyDefaults():
|
||||
d = _buildPrimarySttRecognitionFields(model="latest_long", lightweight=False)
|
||||
assert d["model"] == "latest_long"
|
||||
assert d["enable_word_time_offsets"] is True
|
||||
assert d["enable_word_confidence"] is True
|
||||
assert d["max_alternatives"] == 3
|
||||
assert d["use_enhanced"] is True
|
||||
112
tests/unit/serviceAgent/test_agentTrace_repairCounters.py
Normal file
112
tests/unit/serviceAgent/test_agentTrace_repairCounters.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
# Copyright (c) 2026 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Unit tests for the repair-loop telemetry aggregation in agentLoop.
|
||||
|
||||
These counters (`validationFailures`, `repairAttempts`, `successAfterRepair`)
|
||||
land on `AgentTrace` and are surfaced via the `AGENT_SUMMARY` event. The
|
||||
Eval-Harness (Phase 1.5) reads them to compute the repair conversion rate.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from modules.serviceCenter.services.serviceAgent.agentLoop import _computeRepairCounters
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelAgent import (
|
||||
AgentRoundLog, ToolCallLog,
|
||||
)
|
||||
|
||||
|
||||
def _round(*toolCalls: ToolCallLog) -> AgentRoundLog:
|
||||
return AgentRoundLog(roundNumber=1, toolCalls=list(toolCalls))
|
||||
|
||||
|
||||
def _failed(toolName: str, code: str) -> ToolCallLog:
|
||||
return ToolCallLog(
|
||||
toolName=toolName,
|
||||
success=False,
|
||||
validationFailureCode=code,
|
||||
error=f"{code}: ...",
|
||||
)
|
||||
|
||||
|
||||
def _ok(toolName: str) -> ToolCallLog:
|
||||
return ToolCallLog(toolName=toolName, success=True)
|
||||
|
||||
|
||||
def test_computeRepairCounters_emptyTrace():
|
||||
fails, attempts, succeeded = _computeRepairCounters([])
|
||||
assert (fails, attempts, succeeded) == (0, 0, 0)
|
||||
|
||||
|
||||
def test_computeRepairCounters_allCleanRunsHaveZeroCounters():
|
||||
rounds = [
|
||||
_round(_ok("queryTable"), _ok("browseTable")),
|
||||
_round(_ok("aggregateTable")),
|
||||
]
|
||||
fails, attempts, succeeded = _computeRepairCounters(rounds)
|
||||
assert (fails, attempts, succeeded) == (0, 0, 0)
|
||||
|
||||
|
||||
def test_computeRepairCounters_singleFailureCountsButNoRepairYet():
|
||||
"""One failure in round 1, no follow-up call -- counts the failure but
|
||||
nothing else. Repair only counts when the LLM tries again."""
|
||||
rounds = [_round(_failed("queryTable", "FIELD_NOT_FOUND"))]
|
||||
fails, attempts, succeeded = _computeRepairCounters(rounds)
|
||||
assert (fails, attempts, succeeded) == (1, 0, 0)
|
||||
|
||||
|
||||
def test_computeRepairCounters_repairThatSucceeds():
|
||||
"""Round 1 fails, round 2 retries same tool successfully."""
|
||||
rounds = [
|
||||
_round(_failed("queryTable", "FIELD_NOT_FOUND")),
|
||||
_round(_ok("queryTable")),
|
||||
]
|
||||
fails, attempts, succeeded = _computeRepairCounters(rounds)
|
||||
assert (fails, attempts, succeeded) == (1, 1, 1)
|
||||
|
||||
|
||||
def test_computeRepairCounters_repairThatFailsAgain():
|
||||
"""Round 1 fails, round 2 retries same tool but fails validation again."""
|
||||
rounds = [
|
||||
_round(_failed("queryTable", "FIELD_NOT_FOUND")),
|
||||
_round(_failed("queryTable", "FIELD_NOT_FOUND")),
|
||||
]
|
||||
fails, attempts, succeeded = _computeRepairCounters(rounds)
|
||||
assert (fails, attempts, succeeded) == (2, 1, 0)
|
||||
|
||||
|
||||
def test_computeRepairCounters_siblingCallsInSameRoundAreNotRepairs():
|
||||
"""When the LLM emits two queryTable calls in the same round, the
|
||||
second is NOT a repair attempt -- it had no way to see the first
|
||||
one's rejection yet (parallel dispatch within a round)."""
|
||||
rounds = [
|
||||
_round(
|
||||
_failed("queryTable", "FIELD_NOT_FOUND"),
|
||||
_failed("queryTable", "FIELD_NOT_FOUND"),
|
||||
),
|
||||
]
|
||||
fails, attempts, succeeded = _computeRepairCounters(rounds)
|
||||
assert (fails, attempts, succeeded) == (2, 0, 0)
|
||||
|
||||
|
||||
def test_computeRepairCounters_differentToolNamesAreIndependent():
|
||||
"""A queryTable failure does not flag a later browseTable as a repair."""
|
||||
rounds = [
|
||||
_round(_failed("queryTable", "FIELD_NOT_FOUND")),
|
||||
_round(_ok("browseTable")),
|
||||
]
|
||||
fails, attempts, succeeded = _computeRepairCounters(rounds)
|
||||
assert (fails, attempts, succeeded) == (1, 0, 0)
|
||||
|
||||
|
||||
def test_computeRepairCounters_multiToolMix():
|
||||
"""Trustee-like sequence: SUM(closingBalance) rejected, LLM switches to
|
||||
queryTable with a typo (rejected), then fixes the typo (success)."""
|
||||
rounds = [
|
||||
_round(_failed("aggregateTable", "INVALID_AGGREGATE_TARGET")),
|
||||
_round(_failed("queryTable", "FIELD_NOT_FOUND")),
|
||||
_round(_ok("queryTable")),
|
||||
]
|
||||
fails, attempts, succeeded = _computeRepairCounters(rounds)
|
||||
# 2 validation failures total, 1 prior-rejected queryTable retry that
|
||||
# succeeded; aggregateTable was never retried so no attempt counted for it.
|
||||
assert (fails, attempts, succeeded) == (2, 1, 1)
|
||||
|
|
@ -100,6 +100,9 @@ def _adapter(svc):
|
|||
return SimpleNamespace(_svc=svc)
|
||||
|
||||
|
||||
_DEFAULT_DS = [{"id": "ds-1", "neutralize": False}]
|
||||
|
||||
|
||||
def test_bootstrap_walks_team_space_lists_and_tasks():
|
||||
svc = _FakeClickupService(taskCount=2)
|
||||
knowledge = _FakeKnowledgeService()
|
||||
|
|
@ -108,6 +111,7 @@ def test_bootstrap_walks_team_space_lists_and_tasks():
|
|||
async def _run():
|
||||
return await bootstrapClickup(
|
||||
connectionId="c1",
|
||||
dataSources=_DEFAULT_DS,
|
||||
adapter=_adapter(svc),
|
||||
connection=connection,
|
||||
knowledgeService=knowledge,
|
||||
|
|
@ -126,10 +130,10 @@ def test_bootstrap_walks_team_space_lists_and_tasks():
|
|||
assert job.mimeType == "application/vnd.clickup.task+json"
|
||||
assert job.mandateId == "m1"
|
||||
assert job.provenance["connectionId"] == "c1"
|
||||
assert job.provenance["dataSourceId"] == "ds-1"
|
||||
assert job.provenance["authority"] == "clickup"
|
||||
assert job.provenance["teamId"] == "team-1"
|
||||
assert job.contentVersion # numeric millisecond string
|
||||
# At least the header content-object is present.
|
||||
ids = [co["contentObjectId"] for co in job.contentObjects]
|
||||
assert "header" in ids
|
||||
|
||||
|
|
@ -146,6 +150,7 @@ def test_bootstrap_reports_duplicates_on_second_run():
|
|||
async def _run():
|
||||
return await bootstrapClickup(
|
||||
connectionId="c1",
|
||||
dataSources=_DEFAULT_DS,
|
||||
adapter=_adapter(svc),
|
||||
connection=connection,
|
||||
knowledgeService=knowledge,
|
||||
|
|
@ -165,6 +170,7 @@ def test_bootstrap_skips_tasks_older_than_maxAgeDays():
|
|||
async def _run():
|
||||
return await bootstrapClickup(
|
||||
connectionId="c1",
|
||||
dataSources=_DEFAULT_DS,
|
||||
adapter=_adapter(svc),
|
||||
connection=connection,
|
||||
knowledgeService=knowledge,
|
||||
|
|
@ -185,6 +191,7 @@ def test_bootstrap_maxTasks_caps_ingestion():
|
|||
async def _run():
|
||||
return await bootstrapClickup(
|
||||
connectionId="c1",
|
||||
dataSources=_DEFAULT_DS,
|
||||
adapter=_adapter(svc),
|
||||
connection=connection,
|
||||
knowledgeService=knowledge,
|
||||
|
|
@ -195,9 +202,41 @@ def test_bootstrap_maxTasks_caps_ingestion():
|
|||
assert result["indexed"] == 3
|
||||
|
||||
|
||||
def test_bootstrap_skips_when_no_datasources():
|
||||
async def _run():
|
||||
return await bootstrapClickup(connectionId="c1")
|
||||
|
||||
result = asyncio.run(_run())
|
||||
assert result["skipped"] is True
|
||||
assert result["reason"] == "no_datasources"
|
||||
|
||||
|
||||
def test_bootstrap_honours_datasource_neutralize():
|
||||
svc = _FakeClickupService(taskCount=1)
|
||||
knowledge = _FakeKnowledgeService()
|
||||
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||
|
||||
async def _run():
|
||||
return await bootstrapClickup(
|
||||
connectionId="c1",
|
||||
dataSources=[{"id": "ds-n", "neutralize": True}],
|
||||
adapter=_adapter(svc),
|
||||
connection=connection,
|
||||
knowledgeService=knowledge,
|
||||
limits=ClickupBootstrapLimits(maxAgeDays=None),
|
||||
)
|
||||
|
||||
asyncio.run(_run())
|
||||
for job in knowledge.calls:
|
||||
assert job.neutralize is True
|
||||
assert job.provenance["dataSourceId"] == "ds-n"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_bootstrap_walks_team_space_lists_and_tasks()
|
||||
test_bootstrap_reports_duplicates_on_second_run()
|
||||
test_bootstrap_skips_tasks_older_than_maxAgeDays()
|
||||
test_bootstrap_maxTasks_caps_ingestion()
|
||||
test_bootstrap_skips_when_no_datasources()
|
||||
test_bootstrap_honours_datasource_neutralize()
|
||||
print("OK — bootstrapClickup tests passed")
|
||||
|
|
|
|||
|
|
@ -119,6 +119,9 @@ def _fakeRunExtraction(data, name, mime, options):
|
|||
)
|
||||
|
||||
|
||||
_DEFAULT_DS = [{"id": "ds1", "path": "/", "neutralize": False}]
|
||||
|
||||
|
||||
def test_bootstrap_walks_drive_and_subfolders():
|
||||
adapter = _FakeDriveAdapter()
|
||||
knowledge = _FakeKnowledgeService()
|
||||
|
|
@ -127,6 +130,7 @@ def test_bootstrap_walks_drive_and_subfolders():
|
|||
async def _run():
|
||||
return await bootstrapGdrive(
|
||||
connectionId="c1",
|
||||
dataSources=_DEFAULT_DS,
|
||||
adapter=adapter,
|
||||
connection=connection,
|
||||
knowledgeService=knowledge,
|
||||
|
|
@ -160,6 +164,7 @@ def test_bootstrap_reports_duplicates_on_second_run():
|
|||
async def _run():
|
||||
return await bootstrapGdrive(
|
||||
connectionId="c1",
|
||||
dataSources=_DEFAULT_DS,
|
||||
adapter=adapter,
|
||||
connection=connection,
|
||||
knowledgeService=knowledge,
|
||||
|
|
@ -180,11 +185,11 @@ def test_bootstrap_skips_files_older_than_maxAgeDays():
|
|||
async def _run():
|
||||
return await bootstrapGdrive(
|
||||
connectionId="c1",
|
||||
dataSources=[{"id": "ds1", "path": "/", "neutralize": False, "maxAgeDays": 180}],
|
||||
adapter=adapter,
|
||||
connection=connection,
|
||||
knowledgeService=knowledge,
|
||||
runExtractionFn=_fakeRunExtraction,
|
||||
limits=GdriveBootstrapLimits(maxAgeDays=180),
|
||||
)
|
||||
|
||||
result = asyncio.run(_run())
|
||||
|
|
@ -200,6 +205,7 @@ def test_bootstrap_passes_connection_provenance():
|
|||
async def _run():
|
||||
return await bootstrapGdrive(
|
||||
connectionId="c1",
|
||||
dataSources=_DEFAULT_DS,
|
||||
adapter=adapter,
|
||||
connection=connection,
|
||||
knowledgeService=knowledge,
|
||||
|
|
@ -212,14 +218,25 @@ def test_bootstrap_passes_connection_provenance():
|
|||
assert job.sourceKind == "gdrive_item"
|
||||
assert job.mandateId == "m1"
|
||||
assert job.provenance["connectionId"] == "c1"
|
||||
assert job.provenance["dataSourceId"] == "ds1"
|
||||
assert job.provenance["authority"] == "google"
|
||||
assert job.provenance["service"] == "drive"
|
||||
assert job.contentVersion # modifiedTime ISO string
|
||||
|
||||
|
||||
def test_bootstrap_skips_when_no_datasources():
|
||||
async def _run():
|
||||
return await bootstrapGdrive(connectionId="c1")
|
||||
|
||||
result = asyncio.run(_run())
|
||||
assert result["skipped"] is True
|
||||
assert result["reason"] == "no_datasources"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_bootstrap_walks_drive_and_subfolders()
|
||||
test_bootstrap_reports_duplicates_on_second_run()
|
||||
test_bootstrap_skips_files_older_than_maxAgeDays()
|
||||
test_bootstrap_passes_connection_provenance()
|
||||
test_bootstrap_skips_when_no_datasources()
|
||||
print("OK — bootstrapGdrive tests passed")
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
|
|||
async def _run():
|
||||
return await bootstrapOutlook(
|
||||
connectionId="c1",
|
||||
dataSources=[{"id": "ds1", "neutralize": False}],
|
||||
adapter=adapter,
|
||||
connection=connection,
|
||||
knowledgeService=knowledge,
|
||||
|
|
@ -129,6 +130,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
|
|||
assert job.sourceKind == "outlook_message"
|
||||
assert job.mimeType == "message/rfc822"
|
||||
assert job.provenance["connectionId"] == "c1"
|
||||
assert job.provenance["dataSourceId"] == "ds1"
|
||||
assert job.provenance["service"] == "outlook"
|
||||
assert job.contentVersion == "ck1"
|
||||
assert any(co["contentObjectId"] == "header" for co in job.contentObjects)
|
||||
|
|
@ -146,6 +148,7 @@ def test_bootstrap_outlook_follows_pagination():
|
|||
async def _run():
|
||||
return await bootstrapOutlook(
|
||||
connectionId="c1",
|
||||
dataSources=[{"id": "ds1", "neutralize": False}],
|
||||
adapter=adapter,
|
||||
connection=connection,
|
||||
knowledgeService=knowledge,
|
||||
|
|
@ -171,6 +174,7 @@ def test_bootstrap_outlook_reports_duplicates():
|
|||
async def _run():
|
||||
return await bootstrapOutlook(
|
||||
connectionId="c1",
|
||||
dataSources=[{"id": "ds1", "neutralize": False}],
|
||||
adapter=adapter,
|
||||
connection=connection,
|
||||
knowledgeService=knowledge,
|
||||
|
|
|
|||
|
|
@ -19,11 +19,18 @@ asked for the closing balance per period).
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from modules.shared import fkRegistry
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelAgent import (
|
||||
ToolCallRequest, ToolResult,
|
||||
)
|
||||
from modules.serviceCenter.services.serviceAgent.featureDataAgent import (
|
||||
_buildSchemaContext,
|
||||
_buildSubAgentTools,
|
||||
_buildTableSchemaBlock,
|
||||
_formatFieldLine,
|
||||
_summarizePythonType,
|
||||
|
|
@ -152,10 +159,29 @@ def test_buildSchemaContext_forbidsSummingAggregateFields():
|
|||
assert "closingBalance" in prompt
|
||||
|
||||
|
||||
def test_buildSchemaContext_appendsTrusteeDomainHints():
|
||||
"""When the feature module exposes getAgentDomainHints(), the schema prompt
|
||||
must include those hints so the sub-agent knows e.g. that 102x are bank
|
||||
accounts and periodMonth=0 is the annual total."""
|
||||
def test_buildSchemaContext_appendsTrusteeOntologyBlock(monkeypatch):
|
||||
"""When the feature exposes getAgentOntology(), the schema prompt must
|
||||
include the compiled ontology block (Phase 2 path)."""
|
||||
monkeypatch.delenv("POWERON_DISABLE_FEATURE_ONTOLOGY", raising=False)
|
||||
selected = [_trusteeAccountBalanceObj()]
|
||||
prompt = _buildSchemaContext(
|
||||
featureCode="trustee",
|
||||
instanceLabel="Demo AG",
|
||||
selectedTables=selected,
|
||||
requestLang="de",
|
||||
)
|
||||
assert "DOMAIN ONTOLOGY (trustee):" in prompt
|
||||
assert "BankAccount" in prompt
|
||||
assert "NEVER_AGGREGATE on TrusteeDataAccountBalance.closingBalance" in prompt.replace("never aggregate", "NEVER_AGGREGATE")
|
||||
assert "BANK_BALANCE_AT_DATE" in prompt
|
||||
|
||||
|
||||
def test_buildSchemaContext_fallsBackToLegacyHints_whenOntologyDisabled(monkeypatch):
|
||||
"""With POWERON_DISABLE_FEATURE_ONTOLOGY=1 the builder must fall back to
|
||||
the legacy `getAgentDomainHints()` block. This is the path used by the
|
||||
eval harness to measure `baseline` and `phase1` accuracy without the
|
||||
ontology-driven prompt."""
|
||||
monkeypatch.setenv("POWERON_DISABLE_FEATURE_ONTOLOGY", "1")
|
||||
selected = [_trusteeAccountBalanceObj()]
|
||||
prompt = _buildSchemaContext(
|
||||
featureCode="trustee",
|
||||
|
|
@ -164,16 +190,14 @@ def test_buildSchemaContext_appendsTrusteeDomainHints():
|
|||
requestLang="de",
|
||||
)
|
||||
assert "TRUSTEE DOMAIN HINTS" in prompt
|
||||
assert "DOMAIN ONTOLOGY" not in prompt
|
||||
assert "102x Bank / Post" in prompt
|
||||
assert "periodMonth = 0" in prompt
|
||||
assert "ANTI-PATTERNS" in prompt
|
||||
assert 'LIKE \'102%\'' in prompt or "LIKE '102%'" in prompt
|
||||
|
||||
|
||||
def test_buildSchemaContext_skipsHintsForFeaturesWithoutHook():
|
||||
"""Features that don't export getAgentDomainHints() should produce a prompt
|
||||
without the trailing hints block. Verified by using a feature code that
|
||||
cannot resolve to a main module (registry returns None)."""
|
||||
def test_buildSchemaContext_skipsHintsForFeaturesWithoutHook(monkeypatch):
|
||||
"""Features that don't export getAgentDomainHints()/getAgentOntology()
|
||||
should produce a prompt without any trailing hints block."""
|
||||
monkeypatch.delenv("POWERON_DISABLE_FEATURE_ONTOLOGY", raising=False)
|
||||
selected = [_trusteeAccountBalanceObj()]
|
||||
prompt = _buildSchemaContext(
|
||||
featureCode="nosuchfeature",
|
||||
|
|
@ -182,4 +206,90 @@ def test_buildSchemaContext_skipsHintsForFeaturesWithoutHook():
|
|||
requestLang="de",
|
||||
)
|
||||
assert "TRUSTEE DOMAIN HINTS" not in prompt
|
||||
assert "DOMAIN ONTOLOGY" not in prompt
|
||||
assert "Keep your answer SHORT" in prompt
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Validator integration (Phase 1: Repair-Loop)
|
||||
#
|
||||
# These tests guard that pre-execute validation fires BEFORE the provider
|
||||
# is touched, and that the structured error payload reaches the LLM via
|
||||
# `ToolResult.errorDetails` -- the contract the LLM relies on for repair.
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
def _buildRegistryWithMockProvider():
|
||||
"""Build a sub-agent ToolRegistry where the provider is a MagicMock.
|
||||
|
||||
The mock records calls so we can assert the validator short-circuits
|
||||
before the DB layer is reached."""
|
||||
provider = MagicMock()
|
||||
provider.browseTable.return_value = {"rows": [], "total": 0, "limit": 50, "offset": 0}
|
||||
provider.queryTable.return_value = {"rows": [], "total": 0, "limit": 50, "offset": 0}
|
||||
provider.aggregateTable.return_value = {"rows": [], "aggregate": "SUM", "field": "x"}
|
||||
registry = _buildSubAgentTools(
|
||||
provider=provider,
|
||||
featureInstanceId="fi-test",
|
||||
mandateId="m-test",
|
||||
tableFilters=None,
|
||||
validator=None,
|
||||
)
|
||||
return registry, provider
|
||||
|
||||
|
||||
def _dispatchSync(registry, toolName, args):
|
||||
"""Synchronously dispatch a tool call through the registry."""
|
||||
call = ToolCallRequest(name=toolName, args=args)
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
return loop.run_until_complete(registry.dispatch(call, context={}))
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
def test_subAgentTools_invalidFieldShortCircuitsBeforeProvider():
|
||||
"""A queryTable call with an unknown field must NOT reach the provider."""
|
||||
registry, provider = _buildRegistryWithMockProvider()
|
||||
result = _dispatchSync(registry, "queryTable", {
|
||||
"tableName": "TrusteeDataAccountBalance",
|
||||
"filters": [{"field": "klosingBalance", "op": "=", "value": 1}],
|
||||
})
|
||||
assert isinstance(result, ToolResult)
|
||||
assert result.success is False
|
||||
assert result.errorDetails is not None
|
||||
assert result.errorDetails["code"] == "FIELD_NOT_FOUND"
|
||||
assert result.errorDetails["suggestion"] == "closingBalance"
|
||||
assert result.error and result.error.startswith("FIELD_NOT_FOUND:")
|
||||
provider.queryTable.assert_not_called()
|
||||
|
||||
|
||||
def test_subAgentTools_sumClosingBalanceShortCircuits():
|
||||
"""The flagship hallucination -- SUM(closingBalance) -- must be blocked
|
||||
by the pre-execute validator before the DB is touched."""
|
||||
registry, provider = _buildRegistryWithMockProvider()
|
||||
result = _dispatchSync(registry, "aggregateTable", {
|
||||
"tableName": "TrusteeDataAccountBalance",
|
||||
"aggregate": "SUM",
|
||||
"field": "closingBalance",
|
||||
})
|
||||
assert result.success is False
|
||||
assert result.errorDetails["code"] == "INVALID_AGGREGATE_TARGET"
|
||||
assert result.errorDetails["field"] == "closingBalance"
|
||||
provider.aggregateTable.assert_not_called()
|
||||
|
||||
|
||||
def test_subAgentTools_validCallReachesProvider():
|
||||
"""Sanity: a valid call passes the validator and hits the provider."""
|
||||
registry, provider = _buildRegistryWithMockProvider()
|
||||
result = _dispatchSync(registry, "queryTable", {
|
||||
"tableName": "TrusteeDataAccountBalance",
|
||||
"filters": [
|
||||
{"field": "periodYear", "op": "=", "value": 2025},
|
||||
{"field": "periodMonth", "op": "=", "value": 0},
|
||||
],
|
||||
"fields": ["accountNumber", "closingBalance"],
|
||||
})
|
||||
assert result.success is True
|
||||
assert result.errorDetails is None
|
||||
provider.queryTable.assert_called_once()
|
||||
|
|
|
|||
295
tests/unit/services/test_queryValidator.py
Normal file
295
tests/unit/services/test_queryValidator.py
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
# Copyright (c) 2026 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Unit tests for the Feature Data Sub-Agent QueryValidator.
|
||||
|
||||
Each constraint is exercised with both a Happy and a Sad path so a future
|
||||
refactor that silently drops a check is caught immediately.
|
||||
|
||||
Test fixture is the real ``TrusteeDataAccountBalance`` / ``TrusteeDataJournalLine``
|
||||
Pydantic models -- both are perfectly suited because they cover all four
|
||||
constraint classes in production-realistic shape (string fields, numeric
|
||||
fields, fields named ``closingBalance`` / ``debitTotal``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from modules.shared import fkRegistry
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
|
||||
Constraint,
|
||||
ConstraintRule,
|
||||
OntologyDescriptor,
|
||||
ValidationErrorCode,
|
||||
)
|
||||
from modules.serviceCenter.services.serviceAgent.queryValidator import QueryValidator
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def _ensureModels():
|
||||
fkRegistry._ensureModelsLoaded()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def validator() -> QueryValidator:
|
||||
return QueryValidator()
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# FieldExists -- browseTable / queryTable / aggregateTable
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_browseQuery_happyPath_returnsNone(validator):
|
||||
err = validator.validateBrowseQuery(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"fields": ["accountNumber", "closingBalance"]},
|
||||
)
|
||||
assert err is None
|
||||
|
||||
|
||||
def test_browseQuery_invalidField_returnsFieldNotFound(validator):
|
||||
err = validator.validateBrowseQuery(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"fields": ["closingBlance"]}, # typo
|
||||
)
|
||||
assert err is not None
|
||||
assert err.code == ValidationErrorCode.FIELD_NOT_FOUND
|
||||
assert err.field == "closingBlance"
|
||||
assert err.suggestion == "closingBalance"
|
||||
|
||||
|
||||
def test_queryTable_filterOnInvalidField_returnsFieldNotFound(validator):
|
||||
err = validator.validateQueryTable(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"filters": [{"field": "klosingBalance", "op": "=", "value": 100}]},
|
||||
)
|
||||
assert err is not None
|
||||
assert err.code == ValidationErrorCode.FIELD_NOT_FOUND
|
||||
assert err.suggestion == "closingBalance"
|
||||
|
||||
|
||||
def test_queryTable_unknownTable_isLenient(validator):
|
||||
"""When the table isn't in MODEL_REGISTRY we skip validation -- relying on
|
||||
the SQL layer to surface schema errors. Prevents false positives for
|
||||
pure UDB tables not exposed via Pydantic."""
|
||||
err = validator.validateQueryTable(
|
||||
"NoSuchTable123",
|
||||
{"filters": [{"field": "anything", "op": "=", "value": 1}]},
|
||||
)
|
||||
assert err is None
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# OperatorCompatible
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_queryTable_likeOnStringField_isOk(validator):
|
||||
err = validator.validateQueryTable(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"filters": [{"field": "accountNumber", "op": "LIKE", "value": "102%"}]},
|
||||
)
|
||||
assert err is None
|
||||
|
||||
|
||||
def test_queryTable_likeOnNumericField_isOperatorIncompatible(validator):
|
||||
err = validator.validateQueryTable(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"filters": [{"field": "closingBalance", "op": "LIKE", "value": "100%"}]},
|
||||
)
|
||||
assert err is not None
|
||||
assert err.code == ValidationErrorCode.OPERATOR_INCOMPATIBLE
|
||||
assert err.field == "closingBalance"
|
||||
|
||||
|
||||
def test_queryTable_gteOnNumericField_isOk(validator):
|
||||
err = validator.validateQueryTable(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"filters": [{"field": "closingBalance", "op": ">=", "value": 100}]},
|
||||
)
|
||||
assert err is None
|
||||
|
||||
|
||||
def test_queryTable_gteOnStringField_isOperatorIncompatible(validator):
|
||||
err = validator.validateQueryTable(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"filters": [{"field": "currency", "op": ">=", "value": "CHF"}]},
|
||||
)
|
||||
assert err is not None
|
||||
assert err.code == ValidationErrorCode.OPERATOR_INCOMPATIBLE
|
||||
|
||||
|
||||
def test_queryTable_equalsOnAnyField_isOk(validator):
|
||||
"""`=` and `!=` work on any field type."""
|
||||
err = validator.validateQueryTable(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"filters": [{"field": "currency", "op": "=", "value": "CHF"}]},
|
||||
)
|
||||
assert err is None
|
||||
|
||||
|
||||
def test_queryTable_isNullOnAnyField_isOk(validator):
|
||||
err = validator.validateQueryTable(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"filters": [{"field": "mandateId", "op": "IS NULL", "value": None}]},
|
||||
)
|
||||
assert err is None
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# AggregateTarget -- the highest-impact rule
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_aggregate_sumDebitAmount_isOk(validator):
|
||||
err = validator.validateAggregateQuery(
|
||||
"TrusteeDataJournalLine",
|
||||
{"aggregate": "SUM", "field": "debitAmount"},
|
||||
)
|
||||
assert err is None
|
||||
|
||||
|
||||
def test_aggregate_sumClosingBalance_isInvalidAggregateTarget(validator):
|
||||
"""The flagship bug: SUM(closingBalance) across periods. Must be blocked."""
|
||||
err = validator.validateAggregateQuery(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"aggregate": "SUM", "field": "closingBalance"},
|
||||
)
|
||||
assert err is not None
|
||||
assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET
|
||||
assert err.field == "closingBalance"
|
||||
assert "already aggregated" in err.hint
|
||||
|
||||
|
||||
def test_aggregate_avgDebitTotal_isInvalidAggregateTarget(validator):
|
||||
"""`*Total` columns are turnovers per period -- AVG across periods is nonsense."""
|
||||
err = validator.validateAggregateQuery(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"aggregate": "AVG", "field": "debitTotal"},
|
||||
)
|
||||
assert err is not None
|
||||
assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET
|
||||
|
||||
|
||||
def test_aggregate_countClosingBalance_isOk(validator):
|
||||
"""COUNT on a balance column is meaningful (how many balance rows exist)."""
|
||||
err = validator.validateAggregateQuery(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"aggregate": "COUNT", "field": "closingBalance"},
|
||||
)
|
||||
assert err is None
|
||||
|
||||
|
||||
def test_aggregate_sumOnStringField_isTypeMismatch(validator):
|
||||
err = validator.validateAggregateQuery(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"aggregate": "SUM", "field": "currency"},
|
||||
)
|
||||
assert err is not None
|
||||
assert err.code == ValidationErrorCode.TYPE_MISMATCH
|
||||
|
||||
|
||||
def test_aggregate_invalidField_returnsFieldNotFound(validator):
|
||||
err = validator.validateAggregateQuery(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"aggregate": "SUM", "field": "nonExistent"},
|
||||
)
|
||||
assert err is not None
|
||||
assert err.code == ValidationErrorCode.FIELD_NOT_FOUND
|
||||
|
||||
|
||||
def test_aggregate_invalidGroupBy_returnsFieldNotFound(validator):
|
||||
err = validator.validateAggregateQuery(
|
||||
"TrusteeDataJournalLine",
|
||||
{"aggregate": "SUM", "field": "debitAmount", "groupBy": "ghostColumn"},
|
||||
)
|
||||
assert err is not None
|
||||
assert err.code == ValidationErrorCode.FIELD_NOT_FOUND
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# OrderByValid
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_queryTable_orderByValid_isOk(validator):
|
||||
err = validator.validateQueryTable(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"orderBy": "periodYear"},
|
||||
)
|
||||
assert err is None
|
||||
|
||||
|
||||
def test_queryTable_orderByInvalid_returnsOrderByInvalid(validator):
|
||||
err = validator.validateQueryTable(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"orderBy": "periodYr"},
|
||||
)
|
||||
assert err is not None
|
||||
assert err.code == ValidationErrorCode.ORDER_BY_INVALID
|
||||
assert err.suggestion == "periodYear"
|
||||
|
||||
|
||||
def test_queryTable_orderByLiteralStringNone_isOk(validator):
|
||||
"""LLMs sometimes pass the literal string 'None'."""
|
||||
err = validator.validateQueryTable(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"orderBy": "None"},
|
||||
)
|
||||
assert err is None
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Ontology-driven override (Phase 2 readiness check)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_ontologyOverride_blocksAggregateForOntologyField():
|
||||
"""When the ontology marks a field NEVER_AGGREGATE, SUM/AVG is blocked
|
||||
even if the field name doesn't match the convention suffixes."""
|
||||
ontology = OntologyDescriptor(
|
||||
featureCode="trustee",
|
||||
constraints=[
|
||||
Constraint(
|
||||
appliesTo="TrusteeDataJournalLine.debitAmount",
|
||||
rule=ConstraintRule.NEVER_AGGREGATE,
|
||||
message="Synthetic test rule.",
|
||||
)
|
||||
],
|
||||
)
|
||||
validatorWithOntology = QueryValidator(ontology=ontology)
|
||||
err = validatorWithOntology.validateAggregateQuery(
|
||||
"TrusteeDataJournalLine",
|
||||
{"aggregate": "SUM", "field": "debitAmount"},
|
||||
)
|
||||
assert err is not None
|
||||
assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# QueryValidationError serialization (consumed by featureDataAgent)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_validationError_toShortErrorIncludesCodeAndField(validator):
|
||||
err = validator.validateAggregateQuery(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"aggregate": "SUM", "field": "closingBalance"},
|
||||
)
|
||||
assert err is not None
|
||||
short = err.toShortError()
|
||||
assert short.startswith("INVALID_AGGREGATE_TARGET:")
|
||||
assert "closingBalance" in short
|
||||
|
||||
|
||||
def test_validationError_toErrorDetailsHasFourKeys(validator):
|
||||
err = validator.validateQueryTable(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"filters": [{"field": "klosingBalance", "op": "=", "value": 0}]},
|
||||
)
|
||||
assert err is not None
|
||||
details = err.toErrorDetails()
|
||||
assert set(details.keys()) == {"code", "field", "suggestion", "hint"}
|
||||
assert details["code"] == "FIELD_NOT_FOUND"
|
||||
assert details["suggestion"] == "closingBalance"
|
||||
199
tests/unit/services/test_trusteeOntology.py
Normal file
199
tests/unit/services/test_trusteeOntology.py
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
# Copyright (c) 2026 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Unit tests for the trustee ontology and the ontology-to-prompt compiler.
|
||||
|
||||
Verifies:
|
||||
|
||||
* the descriptor passes Pydantic validation
|
||||
* `constraintsForTable` correctly scopes by table/field prefix
|
||||
* the compiler emits a stable header + every entity name + every
|
||||
constraint message
|
||||
* the QueryValidator picks up ontology constraints (NEVER_AGGREGATE on
|
||||
closingBalance) over the convention-based defaults
|
||||
* the `getAgentOntology()` hook on `mainTrustee` returns the descriptor
|
||||
* `_buildValidatorForFeature("trustee")` wires the validator with the
|
||||
ontology
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from modules.features.trustee.mainTrustee import getAgentOntology
|
||||
from modules.features.trustee.trusteeOntology import getTrusteeOntology
|
||||
from modules.serviceCenter.services.serviceAgent.datamodelOntology import (
|
||||
ConstraintRule,
|
||||
OntologyDescriptor,
|
||||
SemanticType,
|
||||
ValidationErrorCode,
|
||||
)
|
||||
from modules.serviceCenter.services.serviceAgent.featureDataAgent import (
|
||||
_buildValidatorForFeature,
|
||||
_loadFeatureOntologyBlock,
|
||||
)
|
||||
from modules.serviceCenter.services.serviceAgent.ontologyToPromptCompiler import (
|
||||
compileOntologyToPrompt,
|
||||
)
|
||||
from modules.serviceCenter.services.serviceAgent.queryValidator import QueryValidator
|
||||
from modules.shared import fkRegistry
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def _ensureModels():
|
||||
fkRegistry._ensureModelsLoaded()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OntologyDescriptor structure
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_trusteeOntology_returnsValidDescriptor():
|
||||
ont = getTrusteeOntology()
|
||||
assert isinstance(ont, OntologyDescriptor)
|
||||
assert ont.featureCode == "trustee"
|
||||
assert ont.entities and ont.relations and ont.constraints and ont.canonicalPatterns
|
||||
|
||||
|
||||
def test_trusteeOntology_hasBankAccountSpecialization():
|
||||
ont = getTrusteeOntology()
|
||||
bank = next((e for e in ont.entities if e.name == "BankAccount"), None)
|
||||
assert bank is not None
|
||||
assert bank.parentEntity == "Account"
|
||||
assert bank.semanticType == SemanticType.ACCOUNT
|
||||
|
||||
|
||||
def test_trusteeOntology_closingBalanceIsNeverAggregate():
|
||||
ont = getTrusteeOntology()
|
||||
constraints = ont.constraintsForTable("TrusteeDataAccountBalance")
|
||||
matching = [
|
||||
c for c in constraints
|
||||
if c.rule == ConstraintRule.NEVER_AGGREGATE
|
||||
and c.appliesTo == "TrusteeDataAccountBalance.closingBalance"
|
||||
]
|
||||
assert matching, "Expected NEVER_AGGREGATE constraint on closingBalance"
|
||||
|
||||
|
||||
def test_trusteeOntology_requiresPeriodFilterOnBalanceTable():
|
||||
ont = getTrusteeOntology()
|
||||
constraints = ont.constraintsForTable("TrusteeDataAccountBalance")
|
||||
table_level = [c for c in constraints if c.rule == ConstraintRule.REQUIRES_FILTER_ON]
|
||||
assert table_level, "Expected at least one REQUIRES_FILTER_ON constraint"
|
||||
required = table_level[0].params.get("requiredFields") or []
|
||||
assert "periodYear" in required
|
||||
assert "periodMonth" in required
|
||||
|
||||
|
||||
def test_constraintsForTable_filtersScopeCorrectly():
|
||||
ont = getTrusteeOntology()
|
||||
bal = ont.constraintsForTable("TrusteeDataAccountBalance")
|
||||
journal = ont.constraintsForTable("TrusteeDataJournalLine")
|
||||
for c in bal:
|
||||
assert c.appliesTo.startswith("TrusteeDataAccountBalance")
|
||||
for c in journal:
|
||||
assert c.appliesTo.startswith("TrusteeDataJournalLine")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prompt compiler
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_compiler_emitsExpectedHeader():
|
||||
block = compileOntologyToPrompt(getTrusteeOntology())
|
||||
assert block.startswith("DOMAIN ONTOLOGY (trustee):"), block.splitlines()[0]
|
||||
|
||||
|
||||
def test_compiler_includesAllEntityNames():
|
||||
ont = getTrusteeOntology()
|
||||
block = compileOntologyToPrompt(ont)
|
||||
for e in ont.entities:
|
||||
assert e.name in block, f"Entity {e.name} missing from compiled prompt"
|
||||
|
||||
|
||||
def test_compiler_includesAllConstraintMessages():
|
||||
ont = getTrusteeOntology()
|
||||
block = compileOntologyToPrompt(ont)
|
||||
for c in ont.constraints:
|
||||
assert c.message.split(".")[0] in block, f"Constraint message missing: {c.message[:40]}"
|
||||
|
||||
|
||||
def test_compiler_includesCanonicalPatternTools():
|
||||
ont = getTrusteeOntology()
|
||||
block = compileOntologyToPrompt(ont)
|
||||
for p in ont.canonicalPatterns:
|
||||
assert p.intent in block
|
||||
assert p.pattern["tool"] in block
|
||||
|
||||
|
||||
def test_compiler_deterministic():
|
||||
block1 = compileOntologyToPrompt(getTrusteeOntology())
|
||||
block2 = compileOntologyToPrompt(getTrusteeOntology())
|
||||
assert block1 == block2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# QueryValidator x ontology integration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_validator_picksUpOntologyNeverAggregate():
|
||||
validator = QueryValidator(ontology=getTrusteeOntology())
|
||||
err = validator.validateAggregateQuery(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"aggregate": "SUM", "field": "closingBalance"},
|
||||
)
|
||||
assert err is not None
|
||||
assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET
|
||||
assert err.field == "closingBalance"
|
||||
|
||||
|
||||
def test_validator_ontologyConstraintFiresOnDebitTotal():
|
||||
validator = QueryValidator(ontology=getTrusteeOntology())
|
||||
err = validator.validateAggregateQuery(
|
||||
"TrusteeDataAccountBalance",
|
||||
{"aggregate": "SUM", "field": "debitTotal"},
|
||||
)
|
||||
assert err is not None
|
||||
assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET
|
||||
|
||||
|
||||
def test_validator_allowsLegitimateAggregateOnJournalLine():
|
||||
validator = QueryValidator(ontology=getTrusteeOntology())
|
||||
err = validator.validateAggregateQuery(
|
||||
"TrusteeDataJournalLine",
|
||||
{"aggregate": "SUM", "field": "debitAmount"},
|
||||
)
|
||||
assert err is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# featureDataAgent integration hooks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_mainTrustee_getAgentOntology_returnsDescriptor():
|
||||
ont = getAgentOntology()
|
||||
assert isinstance(ont, OntologyDescriptor)
|
||||
assert ont.featureCode == "trustee"
|
||||
|
||||
|
||||
def test_loadFeatureOntologyBlock_returnsCompiledBlock():
|
||||
block = _loadFeatureOntologyBlock("trustee")
|
||||
assert block.startswith("DOMAIN ONTOLOGY (trustee):")
|
||||
assert "BankAccount" in block
|
||||
|
||||
|
||||
def test_loadFeatureOntologyBlock_unknownFeatureReturnsEmpty():
|
||||
assert _loadFeatureOntologyBlock("doesNotExist") == ""
|
||||
|
||||
|
||||
def test_buildValidatorForFeature_trustee_hasOntology():
|
||||
validator = _buildValidatorForFeature("trustee")
|
||||
assert validator._ontology is not None
|
||||
assert validator._ontology.featureCode == "trustee"
|
||||
|
||||
|
||||
def test_buildValidatorForFeature_unknownFeature_noOntology():
|
||||
validator = _buildValidatorForFeature("doesNotExist")
|
||||
assert validator._ontology is None
|
||||
Loading…
Reference in a new issue