Merge pull request #161 from valueonag/feat/demo-system-readieness

Feat/demo system readieness
This commit is contained in:
Patrick Motsch 2026-05-12 23:34:19 +02:00 committed by GitHub
commit ab43b42aa9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
56 changed files with 2180 additions and 523 deletions

3
app.py
View file

@ -604,6 +604,9 @@ app.include_router(promptRouter)
from modules.routes.routeDataConnections import router as connectionsRouter
app.include_router(connectionsRouter)
from modules.routes.routeRagInventory import router as ragInventoryRouter
app.include_router(ragInventoryRouter)
from modules.routes.routeTableViews import router as tableViewsRouter
app.include_router(tableViewsRouter)

View file

@ -19,7 +19,7 @@ APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2Z
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
@ -32,18 +32,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP.
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kxaG9WY1FJaWdCbVFVaTllUlJfU3Y3MmJkRmkzMDVDWUNtZEhlNVhISzJPcy00ZUVZcklYLXFMV0dIODV3NXNSSFBKQ0ZsZllES3diTEgySDF0T1ZCbFZHREZtcXFGSWNZN1NJbzJzczRRQWxoeVNsNzlsa0VzMHJPWHUydjBBclo=
Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyUW96aXFVOVJlLUdyRlVvT1hVU09ILWtMZnV2M19mVUxGMnFPV3FzNTdQa3dTbHVGTDBHTk01ZThLcjh6QUR5VldVZUpfcDlZNTh5YldtLWtjTll6VzJNQ3JCQ3ZubHdmd2JvaExDOXdvQ1pjWDVQTUtFWVAtUHhwS1lFQnJXWk4=
Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyd1hPd09vcVFtbVg0Sm5Nd1VYVEEtWjZMZkFndmFVS0ZlcTU0dzJnYVYzRkZWbjh0QldyZkhseDV2cUgxYkNHTzF6MXhqQlZ2N0UtbmhPeWRKUHBVdzV0Q1ROaWNuN2xjMmVzMjNZQ2ZYZ3dOTHgxaU5sTGRjVHpfakhYeWF0ZGU=
Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kySXoyd1BmTnhOd1owTUJOWm53WlZMMjFHNGJhSUwyd2NDUW9BanlRWVJPLU5jYzRlcm5QeW96d0JYUkVWVWd2dGNBVEpJbElZY2lWb0o5S0gyNnhoV1pnNXhpSFEyaklZZjcwX2lVU0ktMEJGN01DMDhXQ3k4R1BXc1Q3ejFjOEg=
Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True
APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync
# Manadate Pre-Processing Servers
PREPROCESS_ALTHAUS_CHAT_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGbEphQ3ZUMlFMQ2EwSGpoSE9NNzRJNTJtaGk1N0RGakdIYnVVeVFHZmF5OXB3QTVWLVNaZk9wNkhfQkZWRnVwRGRxem9iRzJIWXdpX1NIN2FwSExfT3c9PQ==
# Preprocessor API Configuration
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss

View file

@ -21,7 +21,7 @@ APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZ
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
@ -34,18 +34,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kydlVubld1d1h6SUNSWW1aZ3p4X3Zod1NDTjhZVnVYS2lqOERGTFp2OXJ4TGRiNlRLVFpzLUVDTUhkZGhGUWdxa1djdEV5UWkyblN1UHZoaFBjaExNTEpGMG1PRGJEbDdHVll0Ungwcl9JemZ4ZXFzZUNFQmFlZi1DZFlCekU1S3E=
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyS1hWZXEzUzZTTE5MUlJncVowMU95Y0hmV1hveDBZOWdLU1RIUWt3SGlXNGxVTXVKc2QyQmtmWTlJRU43ZnRDdnlDTGxQY0hTU25CWWFFdDhUem9HU0VYcTFJTVFEbVk0dUhmVzJNVlEzNTNWdjdmaW9WeUVDVW5PRmNFZEQzNTY=
Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyUTUwNXNGaHRNaGxxbF9sdWJ3Q0xLYU5yOHB4Yk8zMDZvQ29yaEhWOE5JMENXRk5jb2ZBdzRKQ2ZTTld6ZlIxemhOYzN1VE10TjBDRWZEMXlLVWRNYjZ0VG5RZ3I3NWt0SEJzMzdsUmRzcVNmbktRNHZqTUF6a2EyUkVUSFJnZFE=
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyV1FRVjF0c0d3d0dyWU1TdW9HdXVkdHdsVWZKYTJjbGZPRDhMRjA2M0FkaUZIVmhIUmFKNjg2ekFodHd6NG80VTI3TC1icW1LZ01jWVZuQ1pKRm5nMW5UREJEaGp2Wl9oRDRCSmZVT0JpTnkwXzgwY0pkV29yczQ5akF2d1ZGcVY=
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
# Manadate Pre-Processing Servers
PREPROCESS_ALTHAUS_CHAT_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4UkNBelhvckxCQUVjZm94N3BZUDcxaEMyckE2dm1lRVhqODhrWU1SUjNXZ3dQZlVJOWhveXFkZXpobW5xT0NneGZ2SkNUblFmYXd0WTBYNTl3UmRnSWc9PQ==
# Preprocessor API Configuration
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss

View file

@ -32,19 +32,19 @@ APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyeUZORDYxOFdlNHk1N25kV3pSQVJMUVFwLUFlMzlzQjQ1eVljOTlzX184RndsTmtTV1FjdWkyQlBiUkdCbGt5S2ltZjJxa2I2dHBMdnJqZnhFSnBCampHYjB3RG5URDM1YzZSLVd6TGdaRXRVcEdadE5zM2thNV9SZy1KZDdLSHY=
Service_MSFT_AUTH_REDIRECT_URI=https://api.poweron.swiss/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySk5uMmlWczBWTE00MHBIcWlBbVJmVmc3MlBWbDA1YTFaS3psZjVLd3d1X2FvRHV0X0c5blpLV0FpY05aMTJMMzUtcG8wakF2TlM3SGQ2VjFZM3JLT1MwTlZ0bm9BRlpkbHVPQTFNaXJvazlQRzN4M2ZZNEVhV1JHV190dWluSUk=
Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
Service_GOOGLE_AUTH_REDIRECT_URI =
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
Service_GOOGLE_DATA_REDIRECT_URI =
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kybjVVZ0FldUE1NTJiY2U1N0I0aVU0Z2hfeWlYc2tTdmlxTS1NdGxsRnFHdjZVcW5RRHZkUFhzUTVyX2RaZHlrQThRdTdCRmVBelBOcDlsbFQyd19SZExuWEM5aTcwQ0FvY3ctMUlWU1pndDE0MkdzeTZZRHkwLWU3aW56LW1jS20=
Service_GOOGLE_AUTH_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyMnFma3VPOVJtTFFrNDRLN0NkWHY2dUZDWlJzdDVMd3p3N19IY0tWdURRRzExOGZCMjJOYmpKT1E0cTVwYlgtcVJINTY0anZPc1VoTW00cHl6NVh3ZHVTek1oT1RqWUhtamRkZ1dENWlwNTlZSU1oNWczeGdEOC1Gbk5XU2RBcmI=
Service_GOOGLE_DATA_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
@ -86,13 +86,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
# Manadate Pre-Processing Servers
PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
# Preprocessor API Configuration
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss

View file

@ -20,7 +20,7 @@ APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUl
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
@ -33,18 +33,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySFR2NjBKM084QTNpeUlyUmM4R0N0SU1BZ2x4MmVTZTVHQkVzRE9GdmFkV041MzhudFhobjU0RWNnd3lqeXpKUXA5aGtNZkhtYU12QjBtX0NjemVmdEZBdC1TbXVBSXJTcF9vMlJXd0ZNRTRKRFBMUXNjTF85eTBxakR4RVNfYmU=
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyNVU4cVRIZFdjS3l2S1RJVTVlc1ozQ1liZXZDX1VwdFZQUzFtS0N6UWYyeGxkNGNmY1hoaWxEUDBXVU5QR2t3Vi1ZV1A2QkxqbnpobzJwOXdzYTBZaFZYdnNkeDE1VVl0bm4weHFiLXdON2gtZzAwMTkxNWRoZldFM2djSkNHVS0=
Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyUmJleVpTOF9OaFV3NGVfcWVBX2oxSjUwMWRGOFZRWFRIN1FZRzZ6U3VQMlg5a21RY1drTHh3U254LW4zM1A1cXQ1TTFWYlNoek9hSHJIeE4tbm1wU1lKRXlKNU5HVWI4VGZwTVE0VnJGaV8wZmNvdkVrMjJGeXdmZ3UyNmVXN1E=
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyY2pxMDh0U0RqWERianBMTTNtSUZPSzhKUzh4S0RTenR2MmxnRDlvQzJjbDVTczRWLUJtVnhxWTE2MmUxQjJia2xJcVUzVlFlUnpma040NFdHRzVNRUt0OXR0c2JkTkRmQ1RIYllXbXFFaExIQWNycFVHbUxHbmtYOVhOVUV2MFY=
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
# Manadate Pre-Processing Servers
PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
# Preprocessor API Configuration
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss

View file

@ -19,6 +19,30 @@ from modules.shared.voiceCatalog import getDefaultVoice as _catalogDefaultVoice
logger = logging.getLogger(__name__)
def _buildPrimarySttRecognitionFields(
*,
model: str,
lightweight: bool,
) -> Dict[str, Any]:
"""Shared fields for batch + streaming primary RecognitionConfig."""
base: Dict[str, Any] = {
"enable_automatic_punctuation": True,
"model": model,
}
if lightweight:
base["enable_word_time_offsets"] = False
base["enable_word_confidence"] = False
base["max_alternatives"] = 1
base["use_enhanced"] = False
else:
base["enable_word_time_offsets"] = True
base["enable_word_confidence"] = True
base["max_alternatives"] = 3
base["use_enhanced"] = True
return base
# Gemini-TTS speaker IDs from voices.list use short names (e.g. "Kore") and require
# SynthesisInput.prompt + VoiceSelectionParams.model_name (google-cloud-texttospeech >= 2.24.0).
_GEMINI_TTS_DEFAULT_MODEL = "gemini-2.5-flash-tts"
@ -73,7 +97,10 @@ class ConnectorGoogleSpeech:
sampleRate: int = None, channels: int = None,
skipFallbacks: bool = False,
phraseHints: Optional[list] = None,
alternativeLanguages: Optional[list] = None) -> Dict:
alternativeLanguages: Optional[list] = None,
model: str = "latest_long",
lightweight: bool = False,
audioFormat: Optional[str] = None) -> Dict:
"""
Convert speech to text using Google Cloud Speech-to-Text API.
@ -82,6 +109,9 @@ class ConnectorGoogleSpeech:
language: Language code (e.g., 'de-DE', 'en-US')
sample_rate: Audio sample rate (auto-detected if None)
channels: Number of audio channels (auto-detected if None)
model: Google recognition model (e.g. latest_long, latest_short)
lightweight: If True, omit word timings/confidence, single alternative, no enhanced model
audioFormat: If set (webm_opus, linear16, mp3, flac, wav), skip auto-detection
Returns:
Dict containing transcribed text, confidence, and metadata
@ -92,8 +122,24 @@ class ConnectorGoogleSpeech:
logger.warning(f"Invalid sampleRate={sampleRate}, treating as unknown for auto-detection")
sampleRate = None
# Auto-detect audio format if not provided
if sampleRate is None or channels is None:
explicitFormat = (audioFormat or "").strip().lower() or None
if explicitFormat:
if channels is None:
channels = 1
if sampleRate is None:
if explicitFormat == "webm_opus":
sampleRate = 48000
elif explicitFormat == "linear16":
sampleRate = 16000
elif explicitFormat in ("mp3", "flac"):
sampleRate = 44100
elif explicitFormat == "wav":
sampleRate = 16000
else:
sampleRate = 16000
audioFormat = explicitFormat
logger.info(f"STT explicit format: {audioFormat}, {sampleRate}Hz, {channels}ch")
elif sampleRate is None or channels is None:
validation = self.validateAudioFormat(audioContent)
if not validation["valid"]:
return {
@ -156,12 +202,7 @@ class ConnectorGoogleSpeech:
"encoding": encoding,
"audio_channel_count": channels,
"language_code": language,
"enable_automatic_punctuation": True,
"model": "latest_long",
"enable_word_time_offsets": True,
"enable_word_confidence": True,
"max_alternatives": 3,
"use_enhanced": True,
**_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
}
if phraseHints:
@ -205,8 +246,7 @@ class ConnectorGoogleSpeech:
sample_rate_hertz=16000,
audio_channel_count=1,
language_code=language,
enable_automatic_punctuation=True,
model="latest_long"
**_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
)
try:
response = await asyncio.to_thread(
@ -343,7 +383,7 @@ class ConnectorGoogleSpeech:
"error": "No recognition results (silence or unclear audio)"
}
models = ["latest_long", "phone_call", "latest_short"]
models = list(dict.fromkeys([model, "latest_long", "phone_call", "latest_short"]))
for fallback_config in fallback_configs:
for model in models:
@ -419,6 +459,9 @@ class ConnectorGoogleSpeech:
audioQueue: asyncio.Queue,
language: str = "de-DE",
phraseHints: Optional[list] = None,
model: str = "latest_long",
lightweight: bool = False,
singleUtterance: bool = False,
) -> AsyncGenerator[Dict[str, Any], None]:
"""
Stream audio chunks to Google Cloud Speech-to-Text Streaming API.
@ -429,9 +472,13 @@ class ConnectorGoogleSpeech:
Send (b"", True) to signal end of stream.
language: Language code
phraseHints: Optional boost phrases
model: Google recognition model (e.g. latest_long, latest_short)
lightweight: If True, use non-enhanced primary config (lower latency)
singleUtterance: If True, end stream after first utterance (client should reconnect)
Yields:
Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec
Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec;
optionally endOfSingleUtterance, reconnectRequired
"""
STREAM_LIMIT_SEC = 290
streamStartTs = time.time()
@ -442,9 +489,7 @@ class ConnectorGoogleSpeech:
"sample_rate_hertz": 48000,
"audio_channel_count": 1,
"language_code": language,
"enable_automatic_punctuation": True,
"model": "latest_long",
"use_enhanced": True,
**_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
}
if phraseHints:
configParams["speech_contexts"] = [speech.SpeechContext(phrases=phraseHints, boost=15.0)]
@ -453,7 +498,7 @@ class ConnectorGoogleSpeech:
streamingConfig = speech.StreamingRecognitionConfig(
config=recognitionConfig,
interim_results=True,
single_utterance=False,
single_utterance=singleUtterance,
)
import queue as threadQueue
@ -490,7 +535,22 @@ class ConnectorGoogleSpeech:
)
for response in responseStream:
elapsed = time.time() - streamStartTs
estimatedDurationSec = totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0
durationFromResults = 0.0
for result in response.results:
rt = getattr(result, "result_end_time", None)
if rt is None:
continue
if hasattr(rt, "total_seconds"):
durationFromResults = max(durationFromResults, float(rt.total_seconds()))
else:
durationFromResults = max(
durationFromResults,
float(getattr(rt, "seconds", 0)) + float(getattr(rt, "nanos", 0)) * 1e-9,
)
estimatedDurationSec = durationFromResults if durationFromResults > 0 else (
totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0.0
)
finalTexts = []
interimTexts = []
@ -524,6 +584,13 @@ class ConnectorGoogleSpeech:
"stabilityScore": 0.0,
"audioDurationSec": estimatedDurationSec,
}), loop)
speechEvt = getattr(response, "speech_event_type", None)
if speechEvt and "END_OF_SINGLE_UTTERANCE" in str(speechEvt):
asyncio.run_coroutine_threadsafe(resultOutQ.put({
"endOfSingleUtterance": True,
"audioDurationSec": estimatedDurationSec,
}), loop)
if elapsed >= STREAM_LIMIT_SEC:
logger.info("Streaming STT approaching 5-min limit, client should reconnect")
asyncio.run_coroutine_threadsafe(resultOutQ.put({

View file

@ -62,15 +62,15 @@ class DataSource(PowerOnModel):
description="Owner user ID",
json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "UserInDB", "labelField": "username"}},
)
autoSync: bool = Field(
ragIndexEnabled: bool = Field(
default=False,
description="Automatically sync on schedule",
json_schema_extra={"label": "Auto-Sync"},
description="When true this tree element is indexed into the RAG knowledge store",
json_schema_extra={"label": "Im RAG indexieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
)
lastSynced: Optional[float] = Field(
lastIndexed: Optional[float] = Field(
default=None,
description="Last sync timestamp",
json_schema_extra={"label": "Letzter Sync", "frontend_type": "timestamp"},
description="Timestamp of last successful RAG indexing run",
json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"},
)
scope: str = Field(
default="personal",

View file

@ -484,10 +484,10 @@ class UserConnection(PowerOnModel):
default=None,
description=(
"Per-connection knowledge ingestion preferences. schemaVersion=1 keys: "
"neutralizeBeforeEmbed (bool), mailContentDepth (metadata|snippet|full), "
"mailIndexAttachments (bool), filesIndexBinaries (bool), mimeAllowlist (list[str]), "
"clickupScope (titles|title_description|with_comments), "
"surfaceToggles (dict per authority), maxAgeDays (int)."
"mailContentDepth (metadata|snippet|full), mailIndexAttachments (bool), "
"filesIndexBinaries (bool), clickupScope (titles|title_description|with_comments), "
"clickupIndexAttachments (bool), maxAgeDays (int). "
"Neutralization is controlled per DataSource.neutralize (not here)."
),
json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False, "label": "Wissenspräferenzen"},
)

View file

@ -1080,6 +1080,8 @@ class CommcoachService:
audioContent=audioContent,
language=language,
skipFallbacks=True,
model="latest_short",
lightweight=True,
)
transcribedText = ""

View file

@ -40,6 +40,8 @@ class BrowserBotConnector:
botAccountPassword: Optional[str] = None,
transferMode: str = "auto",
debugMode: bool = False,
avatarMediaData: Optional[str] = None,
avatarMediaType: Optional[str] = None,
) -> Dict[str, Any]:
"""
Send join command to the Browser Bot service.
@ -79,12 +81,16 @@ class BrowserBotConnector:
"debugMode": debugMode,
}
# Add authenticated join credentials if configured
if botAccountEmail and botAccountPassword:
payload["botAccountEmail"] = botAccountEmail
payload["botAccountPassword"] = botAccountPassword
logger.info(f"Bot will join authenticated as {botAccountEmail}")
if avatarMediaData and avatarMediaType:
payload["avatarMediaData"] = avatarMediaData
payload["avatarMediaType"] = avatarMediaType
logger.info(f"Avatar media attached: {avatarMediaType}, {len(avatarMediaData)} chars")
try:
async with aiohttp.ClientSession(timeout=_BOT_TIMEOUT) as session:
async with session.post(f"{self.botUrl}/api/bot", json=payload) as resp:

View file

@ -111,6 +111,18 @@ class TeamsbotMeetingModule(PowerOnModel):
defaultDirectorPrompts: Optional[str] = Field(default=None, description="JSON list of default director prompts")
goals: Optional[str] = Field(default=None, description="Free-text goals")
kpiTargets: Optional[str] = Field(default=None, description="JSON object with structured KPI targets")
defaultMeetingLink: Optional[str] = Field(
default=None,
description="Default Teams meeting URL for new sessions in this module (user can override)",
)
defaultBotName: Optional[str] = Field(
default=None,
description="Default display name for the bot when starting a session from this module",
)
defaultAvatarFileId: Optional[str] = Field(
default=None,
description="FileItem ID for the default avatar image/video shown in the meeting",
)
status: TeamsbotModuleStatus = Field(default=TeamsbotModuleStatus.ACTIVE)
@ -217,6 +229,7 @@ class TeamsbotUserSettings(PowerOnModel):
triggerCooldownSeconds: Optional[int] = Field(default=None, description="Trigger cooldown override")
contextWindowSegments: Optional[int] = Field(default=None, description="Context window override")
debugMode: Optional[bool] = Field(default=None, description="Debug mode override")
avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video override")
# ============================================================================
@ -240,6 +253,7 @@ class TeamsbotConfig(BaseModel):
triggerCooldownSeconds: int = Field(default=3, ge=1, le=30, description="Minimum seconds between AI calls")
contextWindowSegments: int = Field(default=20, ge=5, le=100, description="Number of transcript segments to include in AI context")
debugMode: bool = Field(default=False, description="Enable debug mode: screenshots at every join step for diagnostics")
avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video shown in the meeting")
def _getEffectiveBrowserBotUrl(self) -> Optional[str]:
"""Resolve the effective browser bot URL: per-instance config takes priority, then env variable."""
@ -257,6 +271,7 @@ class TeamsbotStartSessionRequest(BaseModel):
"""Request to start a new Teams Bot session."""
meetingLink: str = Field(description="Teams meeting join link (e.g., https://teams.microsoft.com/l/meetup-join/...)")
botName: Optional[str] = Field(default=None, description="Override bot name for this session")
moduleId: Optional[str] = Field(default=None, description="Optional MeetingModule to attach this session to")
connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access")
joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.")
sessionContext: Optional[str] = Field(default=None, description="Custom context/knowledge to provide to the bot for this session (e.g. meeting agenda, documents, background info)")
@ -277,6 +292,9 @@ class CreateMeetingModuleRequest(BaseModel):
defaultDirectorPrompts: Optional[str] = None
goals: Optional[str] = None
kpiTargets: Optional[str] = None
defaultMeetingLink: Optional[str] = None
defaultBotName: Optional[str] = None
defaultAvatarFileId: Optional[str] = None
class UpdateMeetingModuleRequest(BaseModel):
@ -287,6 +305,9 @@ class UpdateMeetingModuleRequest(BaseModel):
defaultDirectorPrompts: Optional[str] = None
goals: Optional[str] = None
kpiTargets: Optional[str] = None
defaultMeetingLink: Optional[str] = None
defaultBotName: Optional[str] = None
defaultAvatarFileId: Optional[str] = None
status: Optional[TeamsbotModuleStatus] = None
@ -304,6 +325,7 @@ class TeamsbotConfigUpdateRequest(BaseModel):
triggerCooldownSeconds: Optional[int] = None
contextWindowSegments: Optional[int] = None
debugMode: Optional[bool] = None
avatarFileId: Optional[str] = None
# ============================================================================

View file

@ -25,6 +25,7 @@ from .datamodelTeamsbot import (
TeamsbotDirectorPromptStatus,
TeamsbotDirectorPromptMode,
TeamsbotMeetingModule,
TeamsbotModuleStatus,
)
logger = logging.getLogger(__name__)
@ -338,6 +339,8 @@ class TeamsbotObjects:
def getModules(self, instanceId: str) -> List[Dict[str, Any]]:
"""Get all meeting modules for a feature instance."""
records = self.db.getRecordset(TeamsbotMeetingModule, recordFilter={"instanceId": instanceId})
for r in records:
r.setdefault("status", TeamsbotModuleStatus.ACTIVE.value)
records.sort(key=lambda r: r.get("sysCreatedAt") or "", reverse=True)
return records

View file

@ -290,6 +290,19 @@ def _runMigrations():
migrated = False
# M2: MeetingModule default meeting link / bot name (additive columns)
if _tableExists("TeamsbotMeetingModule"):
for col, sqlType in (
("defaultMeetingLink", "TEXT"),
("defaultBotName", "TEXT"),
):
if not _columnExists("TeamsbotMeetingModule", col):
cur.execute(
f'ALTER TABLE "TeamsbotMeetingModule" ADD COLUMN "{col}" {sqlType} NULL',
)
logger.info(f"Migration M2: Added TeamsbotMeetingModule.{col}")
migrated = True
# M1: Create default Adhoc modules for orphaned sessions
# (only runs if TeamsbotSession table exists with moduleId column
# and there are sessions without a moduleId)

View file

@ -40,6 +40,7 @@ from .datamodelTeamsbot import (
TeamsbotDirectorPromptMode,
TeamsbotDirectorPromptStatus,
TeamsbotMeetingModule,
TeamsbotModuleStatus,
CreateMeetingModuleRequest,
UpdateMeetingModuleRequest,
DIRECTOR_PROMPT_FILE_LIMIT,
@ -203,6 +204,7 @@ async def createModule(
data["instanceId"] = instanceId
data["mandateId"] = mandateId
data["ownerUserId"] = str(context.user.id)
data.setdefault("status", TeamsbotModuleStatus.ACTIVE.value)
module = interface.createModule(data)
return {"module": module}
@ -281,6 +283,11 @@ async def startSession(
interface = _getInterface(context, instanceId)
config = _getInstanceConfig(instanceId)
if body.moduleId:
mod = interface.getModule(body.moduleId)
if not mod or str(mod.get("instanceId") or "") != str(instanceId):
raise HTTPException(status_code=400, detail="Invalid moduleId for this instance")
# Extract and validate meeting URL from user input (handles SafeLinks, invitation text, etc.)
cleanMeetingUrl = _extractTeamsMeetingUrl(body.meetingLink)
@ -288,6 +295,7 @@ async def startSession(
sessionData = TeamsbotSession(
instanceId=instanceId,
mandateId=mandateId,
moduleId=body.moduleId,
meetingLink=cleanMeetingUrl,
botName=body.botName or config.botName,
sessionContext=body.sessionContext,
@ -426,6 +434,54 @@ async def listSessions(
return {"sessions": sessions}
@router.get("/{instanceId}/dashboard/stream")
@limiter.limit("60/minute")
async def streamDashboard(
request: Request,
instanceId: str,
context: RequestContext = Depends(getRequestContext),
):
"""
SSE channel for the Teamsbot dashboard: repeated snapshots of sessions and meeting modules.
Push interval: 3s while any own session is pending/joining/active, otherwise 20s.
Same session visibility rules as GET /sessions (own sessions unless platform admin).
"""
_validateInstanceAccess(instanceId, context)
interface = _getInterface(context, instanceId)
userId = None if context.isPlatformAdmin else str(context.user.id)
activeStatuses = {
TeamsbotSessionStatus.PENDING.value,
TeamsbotSessionStatus.JOINING.value,
TeamsbotSessionStatus.ACTIVE.value,
}
async def eventGenerator():
while True:
sessionRows = []
try:
sessionRows = interface.getSessions(instanceId, includeEnded=True, userId=userId)
moduleRows = interface.getModules(instanceId)
payload = {"type": "dashboardState", "sessions": sessionRows, "modules": moduleRows}
yield f"data: {json.dumps(payload, default=str)}\n\n"
except asyncio.CancelledError:
raise
except Exception as ex:
logger.warning("dashboard stream tick failed: %s", ex)
yield f"data: {json.dumps({'type': 'error', 'message': 'dashboard_tick_failed'})}\n\n"
hasActive = any((s.get("status") in activeStatuses) for s in sessionRows)
await asyncio.sleep(3.0 if hasActive else 20.0)
return StreamingResponse(
eventGenerator(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
@router.get("/{instanceId}/sessions/{sessionId}")
@limiter.limit("30/minute")
async def getSession(
@ -634,12 +690,10 @@ def _getEffectiveConfig(instanceId: str, userId: str, interface) -> TeamsbotConf
if not userSettings:
return baseConfig
# Merge: user settings override instance defaults (only non-None values)
# Merge: user settings override instance defaults (only non-None values).
# Derive mergeable fields from TeamsbotConfig so new fields are picked up automatically.
overrides = {}
for field in ["botName", "aiSystemPrompt", "responseMode",
"responseChannel", "transferMode", "language", "voiceId",
"triggerIntervalSeconds", "triggerCooldownSeconds", "contextWindowSegments",
"debugMode"]:
for field in TeamsbotConfig.model_fields:
value = userSettings.get(field)
if value is not None:
overrides[field] = value

View file

@ -83,10 +83,10 @@ _EPHEMERAL_PHRASE_INTENTS: Dict[str, str] = {
),
"agentRound": (
"One short sentence (max ~14 words) the assistant says BETWEEN rounds "
"of a longer agent task to signal that work is still in progress. "
"Include the placeholder tokens '{round}' and '{maxRounds}' so the "
"caller can substitute the actual numbers — e.g. 'Step {round} of "
"{maxRounds}, still working.'"
"of a longer agent task to update the audience on what it is doing. "
"Include the placeholder token '{activity}' which will be filled with "
"the current activity — e.g. 'I am {activity}, one moment...' or "
"'Currently {activity}, almost there...'. Do NOT include step numbers."
),
}
@ -602,6 +602,13 @@ class TeamsbotService:
self._lastTranscriptText: Optional[str] = None
self._lastTranscriptId: Optional[str] = None
self._lastSttTime: float = 0.0
# Audio chunk aggregation: collect chunks and send to STT only
# after a speech pause or when the buffer reaches a target duration.
self._audioBuffer: bytes = b""
self._audioBufferStartTime: float = 0.0
self._audioBufferLastChunkTime: float = 0.0
self._audioBufferSampleRate: int = 16000
self._lastBotResponseText: Optional[str] = None
self._lastBotResponseTs: float = 0.0
@ -732,6 +739,12 @@ class TeamsbotService:
hasAuth = bool(botAccountEmail and botAccountPassword)
logger.info(f"Joining meeting for session {sessionId}: auth={hasAuth}, email={botAccountEmail or 'N/A'}, transferMode={self.config.transferMode}")
avatarMediaData = None
avatarMediaType = None
avatarFileId = self._resolveAvatarFileId(session, interface)
if avatarFileId:
avatarMediaData, avatarMediaType = self._loadAvatarFileData(avatarFileId, interface)
result = await self.browserBotConnector.joinMeeting(
sessionId=sessionId,
meetingUrl=meetingLink,
@ -743,6 +756,8 @@ class TeamsbotService:
botAccountPassword=botAccountPassword,
transferMode=self.config.transferMode if hasattr(self.config, 'transferMode') else "auto",
debugMode=self.config.debugMode if hasattr(self.config, 'debugMode') else False,
avatarMediaData=avatarMediaData,
avatarMediaType=avatarMediaType,
)
if result.get("success"):
@ -767,6 +782,37 @@ class TeamsbotService:
})
await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)})
def _resolveAvatarFileId(self, session, interface):
"""Resolve avatarFileId: module override > config default."""
moduleId = session.get("moduleId")
if moduleId:
module = interface.getModule(moduleId)
if module and module.get("defaultAvatarFileId"):
return module["defaultAvatarFileId"]
return getattr(self.config, "avatarFileId", None)
def _loadAvatarFileData(self, fileId, _teamsbotInterface):
"""Load avatar file as base64 data + mime type. Returns (data, mimeType) or (None, None)."""
import base64
from modules.interfaces import interfaceDbManagement
try:
mgmt = interfaceDbManagement.getInterface(self.currentUser, self.mandateId)
fileRecord = mgmt.getFile(fileId)
if not fileRecord:
logger.warning(f"Avatar file {fileId} not found")
return None, None
mimeType = getattr(fileRecord, "mimeType", None) or "image/png"
rawBytes = mgmt.getFileData(fileId)
if not rawBytes:
logger.warning(f"Avatar file {fileId} has no data")
return None, None
b64 = base64.b64encode(rawBytes).decode("ascii")
logger.info(f"Avatar file loaded: {fileId}, {mimeType}, {len(b64)} chars base64")
return b64, mimeType
except Exception as e:
logger.error(f"Failed to load avatar file {fileId}: {e}")
return None, None
async def leaveMeeting(self, sessionId: str):
"""Send leave command to the Browser Bot service."""
from . import interfaceFeatureTeamsbot as interfaceDb
@ -1164,6 +1210,14 @@ class TeamsbotService:
interface.updateSession(sessionId, updates)
await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
# Flush remaining audio buffer before generating summary
if dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
if self._audioBuffer:
logger.info(f"[AudioChunk] Flushing remaining buffer on session end ({len(self._audioBuffer)} bytes)")
self._audioBuffer = b""
self._audioBufferStartTime = 0.0
self._audioBufferLastChunkTime = 0.0
# Generate summary when session ends
if dbStatus == TeamsbotSessionStatus.ENDED.value:
asyncio.create_task(self._generateMeetingSummary(sessionId))
@ -1178,11 +1232,18 @@ class TeamsbotService:
voiceInterface,
websocket: WebSocket,
):
"""Process an audio chunk from WebRTC capture — run STT and feed into transcript pipeline."""
"""Process an audio chunk from WebRTC capture. The bot-side VAD
(AudioWorklet / ScriptProcessor) already segments speech into 1-8s
voiced chunks. Here we apply a minimum-duration safety net: very short
chunks (<1s) are buffered until they reach 1s; everything else goes
straight to STT. A wall-clock timeout flushes stale buffers."""
import base64
_MIN_CHUNK_SEC = 1.0
_STALE_TIMEOUT_SEC = 3.0
try:
audioBytes = base64.b64decode(audioBase64)
if len(audioBytes) < 1000:
if len(audioBytes) < 500:
return
if captureDiagnostics:
@ -1195,14 +1256,12 @@ class TeamsbotService:
f"rms={rms}, nativeRate={nativeSampleRate}, bytes={len(audioBytes)}"
)
# Use RMS from capture diagnostics to skip real silence.
# Byte-variation heuristics produced false positives and dropped valid speech.
isSilent = False
if captureDiagnostics and captureDiagnostics.get("rms") is not None:
try:
rmsVal = float(captureDiagnostics.get("rms"))
if rmsVal < 0.0003:
logger.debug(f"[AudioChunk] Skipping silent audio ({len(audioBytes)} bytes, rms={rmsVal:.6f})")
return
isSilent = True
except Exception:
pass
@ -1210,21 +1269,51 @@ class TeamsbotService:
logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}")
return
# Treat sampleRate=0 as unknown (triggers auto-detection)
effectiveSampleRate = sampleRate if sampleRate and sampleRate > 0 else None
now = time.time()
effectiveRate = sampleRate if sampleRate and sampleRate > 0 else 16000
if not isSilent:
if not self._audioBuffer:
self._audioBufferStartTime = now
self._audioBuffer += audioBytes
self._audioBufferLastChunkTime = now
self._audioBufferSampleRate = effectiveRate
bufferDuration = len(self._audioBuffer) / (effectiveRate * 2) if self._audioBuffer else 0.0
bufferAge = (now - self._audioBufferStartTime) if self._audioBuffer else 0.0
shouldFlush = (
self._audioBuffer
and (
bufferDuration >= _MIN_CHUNK_SEC
or (bufferAge >= _STALE_TIMEOUT_SEC and bufferDuration > 0.3)
)
)
if not shouldFlush:
return
flushBytes = self._audioBuffer
flushRate = self._audioBufferSampleRate
self._audioBuffer = b""
self._audioBufferStartTime = 0.0
self._audioBufferLastChunkTime = 0.0
flushDuration = len(flushBytes) / (flushRate * 2)
logger.info(f"[AudioChunk] Flushing buffer: {len(flushBytes)} bytes, {flushDuration:.1f}s, {flushRate}Hz")
phraseHints = list(self._knownSpeakers)
if self.config.botName:
phraseHints.append(self.config.botName)
sttResult = await voiceInterface.speechToText(
audioContent=audioBytes,
audioContent=flushBytes,
language=self.config.language or "de-DE",
sampleRate=effectiveSampleRate,
sampleRate=flushRate,
channels=1,
skipFallbacks=True,
phraseHints=phraseHints if phraseHints else None,
alternativeLanguages=["en-US"],
audioFormat="linear16",
)
if sttResult and sttResult.get("success") and sttResult.get("text"):
@ -1252,19 +1341,18 @@ class TeamsbotService:
def _registerSpeakerHint(self, speaker: str, text: str, sessionId: str = ""):
"""Track current speaker from captions for STT attribution.
When the first non-bot caption arrives, retroactively attributes
any STT segments that were created before a speaker was known."""
Retroactively attributes any unattributed STT segments whenever a
new non-bot caption speaker arrives (not just the first time)."""
if not speaker:
return
normalizedSpeaker = speaker.strip()
if not normalizedSpeaker or self._isBotSpeaker(normalizedSpeaker):
return
prevSpeaker = self._lastCaptionSpeaker
self._lastCaptionSpeaker = normalizedSpeaker
self._knownSpeakers.add(normalizedSpeaker)
if prevSpeaker is None and self._unattributedTranscriptIds:
if self._unattributedTranscriptIds:
from . import interfaceFeatureTeamsbot as interfaceDb
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
for tid in self._unattributedTranscriptIds:
@ -3243,17 +3331,53 @@ class TeamsbotService:
return await self._pickEphemeralPhrase("agentBusy")
async def _interimAgentRoundMessage(
self, roundNum: int, maxRounds: int
self, lastToolLabel: Optional[str] = None
) -> Optional[str]:
"""Per-round progress notice for long agent runs (meeting voice /
chat, ephemeral). Phrasing is AI-localised once per session;
``{round}`` and ``{maxRounds}`` placeholders are substituted at
render time. Returns ``None`` if generation failed."""
return await self._pickEphemeralPhrase(
"agentRound",
substitutions={"round": roundNum, "maxRounds": maxRounds},
chat, ephemeral). Generates a single short phrase in the bot's
configured language that describes the current activity. Unlike
the cached ephemeral phrases, this is a per-call AI generation
to avoid mixing English displayLabels into non-English speech."""
targetLang = (self.config.language or "").strip() or "en-US"
botName = (self.config.botName or "the assistant").strip()
activityHint = lastToolLabel or "working on the task"
prompt = (
f"You are a meeting assistant named '{botName}'.\n"
f"Target spoken language (BCP-47): {targetLang}\n\n"
f"The assistant is currently busy with: {activityHint}\n\n"
f"Generate ONE short sentence (max 12 words) in {targetLang} "
f"that tells the audience what the assistant is doing right now. "
f"Natural, spoken style. No step numbers. No quotes around the output.\n"
f"Output ONLY the sentence, nothing else."
)
try:
aiService = createAiService(
self.currentUser, self.mandateId, self.instanceId
)
await aiService.ensureAiObjectsInitialized()
request = AiCallRequest(
prompt=prompt,
context="",
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
),
)
response = await aiService.callAi(request)
except Exception as aiErr:
logger.debug(f"Agent round phrase generation failed: {aiErr}")
return None
if not response or response.errorCount != 0 or not response.content:
return None
result = response.content.strip().strip('"').strip("'")
if len(result) > 200:
result = result[:200]
return result
async def _notifyMeetingEphemeral(self, sessionId: str, text: str) -> None:
"""Deliver a short line to the meeting (TTS + chat per config) without
persisting botResponses/transcripts, so the main agent answer stays the
@ -3370,6 +3494,7 @@ class TeamsbotService:
finalText: str = ""
rounds = 0
lastToolLabel: Optional[str] = None
try:
async for event in agentService.runAgent(
prompt=taskText,
@ -3390,11 +3515,9 @@ class TeamsbotService:
"round": roundNum,
"maxRounds": maxR,
})
# Runde 1: schon allgemeiner Start-Hinweis; ab Runde 2 ins Meeting melden.
# Director prompts bleiben still — keine Zwischen-Updates ins Meeting.
if roundNum >= 2 and not directorPromptMode:
try:
roundText = await self._interimAgentRoundMessage(roundNum, maxR)
roundText = await self._interimAgentRoundMessage(lastToolLabel)
if roundText:
await self._notifyMeetingEphemeral(sessionId, roundText)
except Exception as roundNoticeErr:
@ -3402,12 +3525,26 @@ class TeamsbotService:
f"Session {sessionId}: Per-round agent notice failed: {roundNoticeErr}"
)
elif event.type == AgentEventTypeEnum.TOOL_CALL:
toolName = (event.data or {}).get("toolName") if event.data else None
evtData = event.data or {}
toolName = evtData.get("toolName")
lastToolLabel = evtData.get("displayLabel")
await _emitSessionEvent(sessionId, "agentRun", {
"source": sourceLabel,
"promptId": promptId,
"status": "toolCall",
"toolName": toolName,
"displayLabel": lastToolLabel,
})
elif event.type == AgentEventTypeEnum.TOOL_RESULT:
evtData = event.data or {}
resultSnippet = (evtData.get("data") or "")[:200]
await _emitSessionEvent(sessionId, "agentRun", {
"source": sourceLabel,
"promptId": promptId,
"status": "toolResult",
"toolName": evtData.get("toolName", ""),
"success": evtData.get("success", True),
"summary": resultSnippet,
})
elif event.type == AgentEventTypeEnum.FILE_CREATED:
await _emitSessionEvent(sessionId, "documentCreated", event.data or {})

View file

@ -33,11 +33,6 @@ UI_OBJECTS = [
"label": t("Einstellungen", context="UI"),
"meta": {"area": "settings"}
},
{
"objectKey": "ui.feature.workspace.rag-insights",
"label": t("Wissens-Insights", context="UI"),
"meta": {"area": "rag-insights"},
},
]
RESOURCE_OBJECTS = [
@ -86,7 +81,6 @@ TEMPLATE_ROLES = [
{"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
{"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
{"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
{"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True},
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "n", "update": "n", "delete": "n"},
]
},
@ -97,7 +91,6 @@ TEMPLATE_ROLES = [
{"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
{"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
{"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
{"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True},
{"context": "RESOURCE", "item": "resource.feature.workspace.start", "view": True},
{"context": "RESOURCE", "item": "resource.feature.workspace.stop", "view": True},
{"context": "RESOURCE", "item": "resource.feature.workspace.files", "view": True},

View file

@ -2192,49 +2192,4 @@ async def putWorkspaceUserSettings(
# =========================================================================
# RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
# =========================================================================
def _collectWorkspaceFileIdsForStats(instanceId: str, mandateId: Optional[str]) -> List[str]:
"""All FileItem ids for this feature instance (any user). Knowledge rows are often stored
without featureInstanceId; we correlate by file id from the Management DB."""
from modules.datamodels.datamodelFiles import FileItem
from modules.interfaces.interfaceDbManagement import ComponentObjects
co = ComponentObjects()
rows = co.db.getRecordset(FileItem, recordFilter={"featureInstanceId": instanceId})
out: List[str] = []
m = str(mandateId) if mandateId else ""
for r in rows or []:
rid = r.get("id") if isinstance(r, dict) else getattr(r, "id", None)
if not rid:
continue
if m:
mid = r.get("mandateId") if isinstance(r, dict) else getattr(r, "mandateId", "") or ""
if mid and mid != m:
continue
out.append(str(rid))
return out
@router.get("/{instanceId}/rag-statistics")
@limiter.limit("60/minute")
async def getRagStatistics(
request: Request,
instanceId: str = Path(...),
days: int = Query(90, ge=7, le=365, description="Timeline window in days"),
context: RequestContext = Depends(getRequestContext),
):
"""Aggregated, non-identifying knowledge-store metrics for this workspace instance."""
mandateId, _instanceConfig = _validateInstanceAccess(instanceId, context)
workspaceFileIds = _collectWorkspaceFileIdsForStats(instanceId, mandateId)
kdb = getKnowledgeInterface(context.user)
stats = kdb.getRagStatisticsForInstance(
featureInstanceId=instanceId,
mandateId=str(mandateId) if mandateId else "",
timelineDays=days,
workspaceFileIds=workspaceFileIds,
)
if isinstance(stats, dict):
stats.setdefault("scope", {})
stats["scope"]["workspaceFileIdsResolved"] = len(workspaceFileIds)
return JSONResponse(stats)

View file

@ -133,6 +133,60 @@ class KnowledgeObjects:
return {"indexRows": indexCount, "chunks": chunkCount}
def deleteFileContentIndexByDataSource(self, dataSourceId: str) -> Dict[str, int]:
"""Delete all FileContentIndex rows whose provenance.dataSourceId matches.
Used when a user disables ragIndexEnabled on a DataSource to purge
only those chunks that were ingested from that specific tree element.
"""
if not dataSourceId:
return {"indexRows": 0, "chunks": 0}
allRows = self.db.getRecordset(FileContentIndex)
matchedRows = []
for row in allRows:
prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None)
if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId:
matchedRows.append(row)
mandateIds: set = set()
chunkCount = 0
indexCount = 0
for row in matchedRows:
fid = row.get("id") if isinstance(row, dict) else getattr(row, "id", None)
mid = row.get("mandateId") if isinstance(row, dict) else getattr(row, "mandateId", "")
if not fid:
continue
chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fid})
for chunk in chunks:
if self.db.recordDelete(ContentChunk, chunk["id"]):
chunkCount += 1
if self.db.recordDelete(FileContentIndex, fid):
indexCount += 1
if mid:
mandateIds.add(str(mid))
for mid in mandateIds:
try:
from modules.interfaces.interfaceDbBilling import _getRootInterface
_getRootInterface().reconcileMandateStorageBilling(mid)
except Exception as ex:
logger.warning("reconcileMandateStorageBilling after datasource purge failed: %s", ex)
return {"indexRows": indexCount, "chunks": chunkCount}
def listFileContentIndexByDataSource(self, dataSourceId: str) -> List[Dict[str, Any]]:
"""List all FileContentIndex rows whose provenance.dataSourceId matches."""
if not dataSourceId:
return []
allRows = self.db.getRecordset(FileContentIndex)
out = []
for row in allRows:
prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None)
if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId:
out.append(dict(row) if not isinstance(row, dict) else row)
return out
def deleteFileContentIndex(self, fileId: str) -> bool:
"""Delete a FileContentIndex and all associated ContentChunks."""
existing = self.getFileContentIndex(fileId)

View file

@ -1274,17 +1274,20 @@ class ComponentObjects:
if getattr(permissions, "update", None) != AccessLevel.ALL:
raise PermissionError("Setting global scope requires ALL permission")
self.db.recordModify(FileFolder, folderId, {"scope": scope})
allFolderIds = self._collectChildFolderIds(folderId)
for fid in allFolderIds:
self.db.recordModify(FileFolder, fid, {"scope": scope})
filesUpdated = 0
if cascadeToFiles:
items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId})
for item in items:
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
if owner == self.userId:
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
self.db.recordModify(FileItem, iid, {"scope": scope})
filesUpdated += 1
for fid in allFolderIds:
items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid})
for item in items:
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
if owner == self.userId:
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
self.db.recordModify(FileItem, iid, {"scope": scope})
filesUpdated += 1
return {"folderId": folderId, "scope": scope, "filesUpdated": filesUpdated}
@ -1294,16 +1297,19 @@ class ComponentObjects:
raise FileNotFoundError(f"Folder {folderId} not found")
self._requireFolderWriteAccess(folder, folderId, "update")
self.db.recordModify(FileFolder, folderId, {"neutralize": neutralize})
allFolderIds = self._collectChildFolderIds(folderId)
for fid in allFolderIds:
self.db.recordModify(FileFolder, fid, {"neutralize": neutralize})
items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId})
filesUpdated = 0
for item in items:
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
if owner == self.userId:
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
self.db.recordModify(FileItem, iid, {"neutralize": neutralize})
filesUpdated += 1
for fid in allFolderIds:
items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid})
for item in items:
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
if owner == self.userId:
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
self.db.recordModify(FileItem, iid, {"neutralize": neutralize})
filesUpdated += 1
return {"folderId": folderId, "neutralize": neutralize, "filesUpdated": filesUpdated}

View file

@ -69,7 +69,10 @@ class VoiceObjects:
sampleRate: int = None, channels: int = None,
skipFallbacks: bool = False,
phraseHints: list = None,
alternativeLanguages: list = None) -> Dict[str, Any]:
alternativeLanguages: list = None,
model: str = "latest_long",
lightweight: bool = False,
audioFormat: Optional[str] = None) -> Dict[str, Any]:
"""
Convert speech to text using Google Cloud Speech-to-Text API.
@ -81,6 +84,9 @@ class VoiceObjects:
skipFallbacks: If True, skip fallback attempts (use when audio format is known)
phraseHints: Optional list of phrases to boost recognition (names, terms)
alternativeLanguages: Optional list of additional language codes for multi-language
model: Google STT model (e.g. latest_long, latest_short)
lightweight: If True, omit word-level features and enhanced model
audioFormat: If set (webm_opus, linear16, ...), skip format auto-detection
Returns:
Dict containing transcribed text, confidence, and metadata
@ -97,6 +103,9 @@ class VoiceObjects:
skipFallbacks=skipFallbacks,
phraseHints=phraseHints,
alternativeLanguages=alternativeLanguages,
model=model,
lightweight=lightweight,
audioFormat=audioFormat,
)
if result["success"]:
@ -120,13 +129,23 @@ class VoiceObjects:
audioQueue: asyncio.Queue,
language: str = "de-DE",
phraseHints: Optional[list] = None,
model: str = "latest_long",
lightweight: bool = False,
singleUtterance: bool = False,
) -> AsyncGenerator[Dict[str, Any], None]:
"""
Stream audio to Google Streaming STT and yield interim/final results.
Billing is recorded for each final result.
"""
connector = self._getGoogleSpeechConnector()
async for event in connector.streamingRecognize(audioQueue, language, phraseHints):
async for event in connector.streamingRecognize(
audioQueue,
language,
phraseHints,
model=model,
lightweight=lightweight,
singleUtterance=singleUtterance,
):
if event.get("isFinal") and self.billingCallback:
durationSec = event.get("audioDurationSec", 0)
priceCHF = connector.calculateSttCostCHF(durationSec)

View file

@ -1986,10 +1986,10 @@ def getUserViewTransactions(
if not pagination:
raise HTTPException(status_code=400, detail="pagination required for groupSummary")
import json as _json
from collections import defaultdict
from modules.interfaces.interfaceDbApp import getInterface as getAppInterface
from modules.routes.routeHelpers import (
applyViewToParams,
build_group_summary_groups,
effective_group_by_levels,
resolveView,
)
@ -2018,28 +2018,7 @@ def getUserViewTransactions(
summary_params,
ctx.user,
)
counts: Dict[str, int] = defaultdict(int)
labels: Dict[str, str] = {}
null_key = "\x00NULL"
for item in all_rows:
raw = item.get(field)
if raw is None or raw == "":
nk = null_key
labels[nk] = null_label
else:
nk = str(raw)
if nk not in labels:
labels[nk] = nk
counts[nk] += 1
groups_out: List[Dict[str, Any]] = []
for nk in sorted(counts.keys(), key=lambda x: (x == null_key, labels.get(x, x).lower())):
groups_out.append(
{
"value": None if nk == null_key else nk,
"label": labels.get(nk, nk),
"totalCount": counts[nk],
}
)
groups_out = build_group_summary_groups(all_rows, field, null_label, groupByLevels=levels)
return JSONResponse(content={"groups": groups_out})
paginationParams = None

View file

@ -130,7 +130,7 @@ def get_auth_authority_options(
# ============================================================================
@router.get("/")
@limiter.limit("30/minute")
@limiter.limit("60/minute")
async def get_connections(
request: Request,
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
@ -197,7 +197,9 @@ async def get_connections(
"lastChecked": connection.lastChecked,
"expiresAt": connection.expiresAt,
"tokenStatus": tokenStatus,
"tokenExpiresAt": tokenExpiresAt
"tokenExpiresAt": tokenExpiresAt,
"knowledgeIngestionEnabled": getattr(connection, "knowledgeIngestionEnabled", False),
"knowledgePreferences": getattr(connection, "knowledgePreferences", None) or {},
})
return items
@ -264,7 +266,7 @@ async def get_connections(
})
enrichRowsWithFkLabels(enhanced_connections_dict, UserConnection)
filtered = apply_strategy_b_filters_and_sort(enhanced_connections_dict, paginationParams, currentUser)
groups_out = build_group_summary_groups(filtered, field, null_label)
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
return JSONResponse(content={"groups": groups_out})
try:
@ -725,3 +727,171 @@ def delete_connection(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to delete connection: {str(e)}"
)
# =========================================================================
# Knowledge Consent & Control Endpoints
# =========================================================================
def _findOwnConnection(interface, userId: str, connectionId: str):
"""Find a connection owned by the user. Returns None if not found."""
connections = interface.getUserConnections(userId)
for conn in connections:
if conn.id == connectionId:
return conn
return None
@router.patch("/{connectionId}/knowledge-consent")
@limiter.limit("10/minute")
def _updateKnowledgeConsent(
request: Request,
connectionId: str = Path(..., description="Connection ID"),
enabled: bool = Body(..., embed=True),
currentUser: User = Depends(getCurrentUser),
) -> Dict[str, Any]:
"""Master switch: can PowerOn ingest data from this connection into the RAG knowledge store?
enabled=False: purge ALL chunks for this connection + cancel running jobs.
enabled=True: set flag; enqueue bootstrap only if rag-enabled DataSources exist.
"""
try:
interface = getInterface(currentUser)
connection = _findOwnConnection(interface, currentUser.id, connectionId)
if not connection:
raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
from modules.interfaces.interfaceDbApp import getRootInterface
rootIf = getRootInterface()
rootIf.db.recordModify(UserConnection, connectionId, {"knowledgeIngestionEnabled": enabled})
purged = None
cancelled = 0
bootstrapEnqueued = False
if not enabled:
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
purged = getKnowledgeInterface(None).deleteFileContentIndexByConnectionId(connectionId)
from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection
cancelled = cancelJobsByConnection(connectionId)
else:
from modules.datamodels.datamodelDataSource import DataSource
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId, "ragIndexEnabled": True})
if dataSources:
import asyncio
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
authority = connection.authority.value if hasattr(connection.authority, "value") else str(connection.authority or "")
async def _enqueue():
await startJob(
"connection.bootstrap",
{"connectionId": connectionId, "authority": authority.lower()},
triggeredBy=str(currentUser.id),
)
try:
loop = asyncio.get_event_loop()
if loop.is_running():
loop.create_task(_enqueue())
else:
loop.run_until_complete(_enqueue())
except RuntimeError:
asyncio.run(_enqueue())
bootstrapEnqueued = True
import json as _json
from modules.shared.auditLogger import audit_logger
from modules.datamodels.datamodelAudit import AuditCategory
audit_logger.logEvent(
userId=str(currentUser.id),
mandateId=str(getattr(connection, "mandateId", "") or ""),
category=AuditCategory.PERMISSION.value,
action="knowledge_consent_changed",
details=_json.dumps({"connectionId": connectionId, "enabled": enabled}),
)
logger.info("Knowledge consent %s for connection %s by user %s",
"enabled" if enabled else "disabled", connectionId, currentUser.id)
return {
"connectionId": connectionId,
"knowledgeIngestionEnabled": enabled,
"purged": purged,
"cancelledJobs": cancelled,
"bootstrapEnqueued": bootstrapEnqueued,
}
except HTTPException:
raise
except Exception as e:
logger.error("Error updating knowledge consent: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/{connectionId}/knowledge-preferences")
@limiter.limit("20/minute")
def _updateKnowledgePreferences(
request: Request,
connectionId: str = Path(..., description="Connection ID"),
preferences: Dict[str, Any] = Body(..., embed=True),
currentUser: User = Depends(getCurrentUser),
) -> Dict[str, Any]:
"""Update per-connection knowledge ingestion preferences (mail depth, attachments, etc.)."""
_ALLOWED_KEYS = {"mailContentDepth", "mailIndexAttachments", "filesIndexBinaries",
"clickupScope", "clickupIndexAttachments", "maxAgeDays"}
try:
interface = getInterface(currentUser)
connection = _findOwnConnection(interface, currentUser.id, connectionId)
if not connection:
raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
existing = getattr(connection, "knowledgePreferences", None) or {}
cleaned = {k: v for k, v in preferences.items() if k in _ALLOWED_KEYS}
merged = {**existing, **cleaned, "schemaVersion": 1}
from modules.interfaces.interfaceDbApp import getRootInterface
getRootInterface().db.recordModify(UserConnection, connectionId, {"knowledgePreferences": merged})
logger.info("Knowledge preferences updated for connection %s", connectionId)
return {"connectionId": connectionId, "knowledgePreferences": merged, "updated": True}
except HTTPException:
raise
except Exception as e:
logger.error("Error updating knowledge preferences: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/{connectionId}/knowledge-stop")
@limiter.limit("10/minute")
def _stopKnowledgeJobs(
request: Request,
connectionId: str = Path(..., description="Connection ID"),
currentUser: User = Depends(getCurrentUser),
) -> Dict[str, Any]:
"""Cancel all running/pending bootstrap jobs for this connection."""
try:
interface = getInterface(currentUser)
connection = _findOwnConnection(interface, currentUser.id, connectionId)
if not connection:
raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection
cancelled = cancelJobsByConnection(connectionId)
import json as _json
from modules.shared.auditLogger import audit_logger
from modules.datamodels.datamodelAudit import AuditCategory
audit_logger.logEvent(
userId=str(currentUser.id),
mandateId=str(getattr(connection, "mandateId", "") or ""),
category=AuditCategory.PERMISSION.value,
action="knowledge_jobs_stopped",
details=_json.dumps({"connectionId": connectionId, "cancelledCount": cancelled}),
)
logger.info("Stopped %d knowledge jobs for connection %s", cancelled, connectionId)
return {"connectionId": connectionId, "cancelled": cancelled}
except HTTPException:
raise
except Exception as e:
logger.error("Error stopping knowledge jobs: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

View file

@ -413,7 +413,7 @@ def patch_folder_scope(
scope = body.get("scope")
if not scope:
raise HTTPException(status_code=400, detail="scope is required")
cascadeToFiles = body.get("cascadeToFiles", False)
cascadeToFiles = body.get("cascadeChildren", body.get("cascadeToFiles", False))
managementInterface = interfaceDbManagement.getInterface(
currentUser,
mandateId=str(context.mandateId) if context.mandateId else None,
@ -543,7 +543,7 @@ def get_files(
FileItem,
)
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
groups_out = build_group_summary_groups(filtered, field, null_label)
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
return JSONResponse(content={"groups": groups_out})
if mode == "filterValues":

View file

@ -100,7 +100,7 @@ def get_prompts(
result if isinstance(result, list) else (result.items if hasattr(result, "items") else [])
)
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
groups_out = build_group_summary_groups(filtered, field, null_label)
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
return JSONResponse(content={"groups": groups_out})
if mode == "filterValues":

View file

@ -1,6 +1,6 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize tagging."""
"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize/rag-index tagging."""
import logging
from typing import Any, Dict, List, Optional
@ -125,3 +125,75 @@ def _updateNeutralizeFields(
except Exception as e:
logger.error("Error updating neutralizeFields: %s", e)
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/{sourceId}/rag-index")
@limiter.limit("30/minute")
def _updateDataSourceRagIndex(
request: Request,
sourceId: str = Path(..., description="ID of the DataSource"),
ragIndexEnabled: bool = Body(..., embed=True),
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]:
"""Toggle RAG indexing for a DataSource.
true: sets flag + enqueues mini-bootstrap for this DataSource only.
false: sets flag + synchronously purges all chunks from this DataSource.
"""
try:
from modules.interfaces.interfaceDbApp import getRootInterface
rootIf = getRootInterface()
rec = rootIf.db.getRecord(DataSource, sourceId)
if not rec:
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
rootIf.db.recordModify(DataSource, sourceId, {"ragIndexEnabled": ragIndexEnabled})
logger.info("Updated ragIndexEnabled=%s for DataSource %s", ragIndexEnabled, sourceId)
if ragIndexEnabled:
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
import asyncio
connectionId = rec.get("connectionId") or rec.get("connection_id") or ""
conn = rootIf.getUserConnectionById(connectionId) if connectionId else None
authority = ""
if conn:
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
async def _enqueue():
await startJob(
"connection.bootstrap",
{"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": [sourceId]},
triggeredBy=str(context.user.id),
)
try:
loop = asyncio.get_event_loop()
if loop.is_running():
loop.create_task(_enqueue())
else:
loop.run_until_complete(_enqueue())
except RuntimeError:
asyncio.run(_enqueue())
else:
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
purgeResult = getKnowledgeInterface(None).deleteFileContentIndexByDataSource(sourceId)
logger.info("Purged %d index rows / %d chunks for DataSource %s",
purgeResult.get("indexRows", 0), purgeResult.get("chunks", 0), sourceId)
import json
from modules.shared.auditLogger import audit_logger
from modules.datamodels.datamodelAudit import AuditCategory
audit_logger.logEvent(
userId=str(context.user.id),
mandateId=context.mandateId,
category=AuditCategory.PERMISSION.value,
action="rag_index_toggled",
details=json.dumps({"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled}),
)
return {"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled, "updated": True}
except HTTPException:
raise
except Exception as e:
logger.error("Error updating datasource ragIndexEnabled: %s", e)
raise HTTPException(status_code=500, detail=str(e))

View file

@ -825,45 +825,106 @@ def build_group_summary_groups(
items: List[Dict[str, Any]],
field: str,
null_label: str = "",
groupByLevels: List[Dict[str, Any]] | None = None,
) -> List[Dict[str, Any]]:
"""
Build {"value", "label", "totalCount"} for mode=groupSummary (single grouping level).
Build {"value", "label", "totalCount"} summaries for mode=groupSummary.
When *groupByLevels* contains more than one level the function produces one
entry per unique combination of all level values (flat permutations).
``value`` becomes a ``///``-joined composite key and ``label`` the ``/``-joined
human-readable label so the frontend can split them back.
"""
from collections import defaultdict
counts: Dict[str, int] = defaultdict(int)
display_by_key: Dict[str, str] = {}
null_key = "\x00NULL"
label_attr = f"{field}Label"
fields: list[dict] = []
if groupByLevels and len(groupByLevels) > 1:
for lvl in groupByLevels:
f = lvl.get("field", "")
nl = str(lvl.get("nullLabel") or null_label)
if f:
fields.append({"field": f, "nullLabel": nl})
if not fields:
fields = [{"field": field, "nullLabel": null_label}]
nullKey = "\x00NULL"
if len(fields) == 1:
f = fields[0]["field"]
nl = fields[0]["nullLabel"]
counts: Dict[str, int] = defaultdict(int)
displayByKey: Dict[str, str] = {}
labelAttr = f"{f}Label"
for item in items:
raw = item.get(f)
if raw is None or raw == "":
nk = nullKey
display = nl
else:
nk = str(raw)
display = None
lbl = item.get(labelAttr)
if lbl is not None and lbl != "":
display = str(lbl)
if display is None:
display = nk
counts[nk] += 1
if nk not in displayByKey:
displayByKey[nk] = display
orderedKeys = sorted(
counts.keys(),
key=lambda x: (x == nullKey, str(displayByKey.get(x, x)).lower()),
)
return [
{
"value": None if nk == nullKey else nk,
"label": displayByKey.get(nk, nk),
"totalCount": counts[nk],
}
for nk in orderedKeys
]
counts = defaultdict(int)
displayByComposite: Dict[str, list] = {}
filtersByComposite: Dict[str, dict] = {}
for item in items:
raw = item.get(field)
if raw is None or raw == "":
nk = null_key
display = null_label
else:
nk = str(raw)
display = None
lbl = item.get(label_attr)
if lbl is not None and lbl != "":
display = str(lbl)
if display is None:
display = nk
counts[nk] += 1
if nk not in display_by_key:
display_by_key[nk] = display
parts: list[str] = []
labels: list[str] = []
filterMap: dict = {}
for fd in fields:
f = fd["field"]
nl = fd["nullLabel"]
labelAttr = f"{f}Label"
raw = item.get(f)
if raw is None or raw == "":
parts.append(nullKey)
labels.append(nl)
filterMap[f] = None
else:
parts.append(str(raw))
lbl = item.get(labelAttr)
labels.append(str(lbl) if lbl not in (None, "") else str(raw))
filterMap[f] = str(raw)
compositeKey = "///".join(parts)
counts[compositeKey] += 1
if compositeKey not in displayByComposite:
displayByComposite[compositeKey] = labels
filtersByComposite[compositeKey] = filterMap
ordered_keys = sorted(
orderedKeys = sorted(
counts.keys(),
key=lambda x: (x == null_key, str(display_by_key.get(x, x)).lower()),
key=lambda x: tuple(
(seg == nullKey, seg.lower()) for seg in x.split("///")
),
)
return [
{
"value": None if nk == null_key else nk,
"label": display_by_key.get(nk, nk),
"totalCount": counts[nk],
"value": ck.replace(nullKey, "__null__") if nullKey in ck else ck,
"label": " / ".join(displayByComposite[ck]),
"totalCount": counts[ck],
"filters": filtersByComposite[ck],
}
for nk in ordered_keys
for ck in orderedKeys
]

View file

@ -0,0 +1,277 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""RAG Inventory API — global knowledge-store visibility for users, admins, platform."""
import logging
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, HTTPException, Depends, Request
from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext
from modules.datamodels.datamodelUam import User
from modules.shared.i18nRegistry import apiRouteContext
routeApiMsg = apiRouteContext("routeRagInventory")
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/api/rag/inventory",
tags=["RAG Inventory"],
responses={
401: {"description": "Unauthorized"},
403: {"description": "Forbidden"},
500: {"description": "Internal server error"},
},
)
def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> List[Dict[str, Any]]:
from modules.datamodels.datamodelDataSource import DataSource
from modules.datamodels.datamodelKnowledge import FileContentIndex
out = []
for conn in connections:
connectionId = str(conn.id)
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
connIndexRows = knowledgeIf.db.getRecordset(FileContentIndex, recordFilter={"connectionId": connectionId})
connChunkTotal = len(connIndexRows)
chunksByDs: Dict[str, int] = {}
unassigned = 0
for idx in connIndexRows:
prov = (idx.get("provenance") if isinstance(idx, dict) else getattr(idx, "provenance", None)) or {}
dsIdRef = prov.get("dataSourceId", "") if isinstance(prov, dict) else ""
if dsIdRef:
chunksByDs[dsIdRef] = chunksByDs.get(dsIdRef, 0) + 1
else:
unassigned += 1
dsItems = []
for ds in dataSources:
dsId = ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")
dsItems.append({
"id": dsId,
"label": ds.get("label") if isinstance(ds, dict) else getattr(ds, "label", ""),
"path": ds.get("path") if isinstance(ds, dict) else getattr(ds, "path", ""),
"sourceType": ds.get("sourceType") if isinstance(ds, dict) else getattr(ds, "sourceType", ""),
"ragIndexEnabled": ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False),
"neutralize": ds.get("neutralize") if isinstance(ds, dict) else getattr(ds, "neutralize", False),
"lastIndexed": ds.get("lastIndexed") if isinstance(ds, dict) else getattr(ds, "lastIndexed", None),
"chunkCount": chunksByDs.get(dsId, 0),
})
if unassigned > 0 and len(dsItems) == 1:
dsItems[0]["chunkCount"] += unassigned
jobs = jobService.listJobs(jobType="connection.bootstrap", limit=5)
connJobs = [j for j in jobs if (j.get("payload") or {}).get("connectionId") == connectionId]
runningJobs = [
{"jobId": j["id"], "progress": j.get("progress", 0), "progressMessage": j.get("progressMessage", "")}
for j in connJobs
if j.get("status") in ("PENDING", "RUNNING")
]
lastError = None
for j in connJobs:
if j.get("status") == "ERROR":
lastError = {"jobId": j["id"], "errorMessage": j.get("errorMessage", "")}
break
out.append({
"id": connectionId,
"authority": conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority),
"externalEmail": getattr(conn, "externalEmail", ""),
"knowledgeIngestionEnabled": getattr(conn, "knowledgeIngestionEnabled", False),
"preferences": getattr(conn, "knowledgePreferences", None) or {},
"dataSources": dsItems,
"totalChunks": connChunkTotal,
"runningJobs": runningJobs,
"lastError": lastError,
})
return out
@router.get("/me")
@limiter.limit("30/minute")
def _getInventoryMe(
request: Request,
currentUser: User = Depends(getCurrentUser),
) -> Dict[str, Any]:
"""Personal RAG inventory: own connections + DataSources + chunk counts."""
try:
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
rootIf = getRootInterface()
knowledgeIf = getKnowledgeInterface(None)
connections = rootIf.getUserConnections(currentUser.id)
items = _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService)
totalChunks = sum(c.get("totalChunks", 0) for c in items)
return {"connections": items, "totals": {"chunks": totalChunks}}
except Exception as e:
logger.error("Error in RAG inventory /me: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/mandate")
@limiter.limit("20/minute")
def _getInventoryMandate(
request: Request,
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]:
"""Mandate-level RAG aggregation (requires mandate membership)."""
if not context.mandateId:
raise HTTPException(status_code=403, detail=routeApiMsg("Mandate context required"))
try:
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface, aggregateMandateRagTotalBytes
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
rootIf = getRootInterface()
knowledgeIf = getKnowledgeInterface(None)
mandateId = str(context.mandateId) if context.mandateId else ""
from modules.datamodels.datamodelUam import UserConnection
allConnections = rootIf.db.getRecordset(UserConnection, recordFilter={"mandateId": mandateId})
connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
totalChunks = sum(c.get("totalChunks", 0) for c in items)
totalBytes = aggregateMandateRagTotalBytes(mandateId)
return {"connections": items, "totals": {"chunks": totalChunks, "bytes": totalBytes}}
except HTTPException:
raise
except Exception as e:
logger.error("Error in RAG inventory /mandate: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/platform")
@limiter.limit("10/minute")
def _getInventoryPlatform(
request: Request,
context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]:
"""Platform-wide RAG statistics (sysadmin only)."""
if not context.isSysAdmin:
raise HTTPException(status_code=403, detail=routeApiMsg("Platform admin required"))
try:
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
from modules.datamodels.datamodelUam import UserConnection
rootIf = getRootInterface()
knowledgeIf = getKnowledgeInterface(None)
allConnections = rootIf.db.getRecordset(UserConnection)
connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
totalChunks = sum(c.get("totalChunks", 0) for c in items)
return {"connections": items, "totals": {"chunks": totalChunks}}
except HTTPException:
raise
except Exception as e:
logger.error("Error in RAG inventory /platform: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/reindex/{connectionId}")
@limiter.limit("10/minute")
def _reindexConnection(
request: Request,
connectionId: str,
currentUser: User = Depends(getCurrentUser),
) -> Dict[str, Any]:
"""Re-trigger bootstrap for a connection (re-index all ragIndexEnabled DataSources).
Submits a new connection.bootstrap job, regardless of previous failures.
"""
try:
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
from modules.datamodels.datamodelDataSource import DataSource
import asyncio
rootIf = getRootInterface()
conn = rootIf.getUserConnectionById(connectionId)
if conn is None:
raise HTTPException(status_code=404, detail="Connection not found")
if str(conn.userId) != str(currentUser.id):
raise HTTPException(status_code=403, detail="Not your connection")
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
ragDs = [ds for ds in dataSources if (ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False))]
if not ragDs:
return {"status": "skipped", "reason": "no_rag_enabled_datasources"}
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
dsIds = [(ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")) for ds in ragDs]
async def _enqueue():
return await startJob(
"connection.bootstrap",
{"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": dsIds},
triggeredBy=str(currentUser.id),
)
try:
loop = asyncio.get_event_loop()
if loop.is_running():
future = asyncio.ensure_future(_enqueue())
jobId = None
else:
jobId = loop.run_until_complete(_enqueue())
except RuntimeError:
jobId = asyncio.run(_enqueue())
logger.info("Reindex triggered for connection %s (%d DataSources)", connectionId, len(dsIds))
return {"status": "queued", "connectionId": connectionId, "dataSourceCount": len(dsIds), "jobId": jobId}
except HTTPException:
raise
except Exception as e:
logger.error("Error triggering reindex: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/jobs")
@limiter.limit("60/minute")
def _getActiveJobs(
request: Request,
currentUser: User = Depends(getCurrentUser),
) -> List[Dict[str, Any]]:
"""Active RAG jobs for the current user (used by header badge)."""
try:
from modules.serviceCenter.services.serviceBackgroundJobs import listJobs
from modules.interfaces.interfaceDbApp import getRootInterface
rootIf = getRootInterface()
connections = rootIf.getUserConnections(currentUser.id)
connectionMap = {str(c.id): c for c in connections}
connectionIds = set(connectionMap.keys())
jobs = listJobs(jobType="connection.bootstrap", limit=50)
active = []
for j in jobs:
if j.get("status") not in ("PENDING", "RUNNING"):
continue
payload = j.get("payload") or {}
connId = payload.get("connectionId")
if connId in connectionIds:
conn = connectionMap[connId]
active.append({
"jobId": j["id"],
"connectionId": connId,
"connectionLabel": getattr(conn, "displayLabel", None) or getattr(conn, "authority", connId),
"jobType": j.get("jobType", "connection.bootstrap"),
"progress": j.get("progress", 0),
"progressMessage": j.get("progressMessage", ""),
})
return active
except Exception as e:
logger.error("Error in RAG inventory /jobs: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

View file

@ -155,12 +155,13 @@ async def sttStream(
Protocol:
Client sends JSON:
{"type": "open", "language": "de-DE"}
{"type": "open", "language": "de-DE", "model": "latest_short", "lightweight": true, "singleUtterance": true}
{"type": "audio", "chunk": "<base64>"}
{"type": "close"}
Server sends JSON:
{"type": "interim", "text": "..."}
{"type": "final", "text": "...", "confidence": 0.95}
{"type": "end_of_single_utterance", "audioDurationSec": 0.0}
{"type": "error", "message": "..."}
{"type": "closed"}
"""
@ -205,7 +206,12 @@ async def sttStream(
logger.warning(f"STT billing pre-flight skipped: {e}")
audioQueue: asyncio.Queue = asyncio.Queue()
language = "de-DE"
sttOpenOptions: Dict[str, Any] = {
"language": "de-DE",
"model": "latest_long",
"lightweight": False,
"singleUtterance": False,
}
streamingTask: Optional[asyncio.Task] = None
voiceInterface: Optional[VoiceObjects] = None
@ -233,10 +239,23 @@ async def sttStream(
voiceInterface.billingCallback = _billingCb
try:
async for event in voiceInterface.streamingSpeechToText(audioQueue, language):
async for event in voiceInterface.streamingSpeechToText(
audioQueue,
sttOpenOptions["language"],
phraseHints=None,
model=sttOpenOptions["model"],
lightweight=sttOpenOptions["lightweight"],
singleUtterance=sttOpenOptions["singleUtterance"],
):
if event.get("reconnectRequired"):
await _sendJson({"type": "reconnect_required"})
return
if event.get("endOfSingleUtterance"):
await _sendJson({
"type": "end_of_single_utterance",
"audioDurationSec": event.get("audioDurationSec", 0.0),
})
continue
if event.get("isFinal"):
if event.get("transcript"):
await _sendJson({"type": "final", "text": event["transcript"], "confidence": event.get("confidence", 0.0)})
@ -258,7 +277,10 @@ async def sttStream(
msgType = (msg.get("type") or "").strip()
if msgType == "open":
language = msg.get("language") or "de-DE"
sttOpenOptions["language"] = msg.get("language") or "de-DE"
sttOpenOptions["model"] = msg.get("model") or "latest_long"
sttOpenOptions["lightweight"] = bool(msg.get("lightweight"))
sttOpenOptions["singleUtterance"] = bool(msg.get("singleUtterance"))
if streamingTask and not streamingTask.done():
await audioQueue.put((b"", True))
streamingTask.cancel()

View file

@ -335,9 +335,14 @@ async def runAgentLoop(
# Execute tool calls
for tc in toolCalls:
toolDef = toolRegistry.getTool(tc.name)
yield AgentEvent(
type=AgentEventTypeEnum.TOOL_CALL,
data={"toolName": tc.name, "args": tc.args}
data={
"toolName": tc.name,
"displayLabel": toolDef.displayLabel if toolDef else None,
"args": tc.args,
}
)
results = await _executeToolCalls(toolCalls, toolRegistry, {

View file

@ -184,4 +184,5 @@ def _registerConnectionTools(registry: ToolRegistry, services):
"required": ["connectionId", "to", "subject", "body"],
},
readOnly=False,
displayLabel="composing an email",
)

View file

@ -297,6 +297,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
},
},
readOnly=False,
displayLabel="creating a document",
)
# ── textToSpeech tool ──────────────────────────────────────────────
@ -573,6 +574,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
"required": ["prompt"],
},
readOnly=False,
displayLabel="generating an image",
)
# ── createChart tool ─────────────────────────────────────────────────
@ -770,6 +772,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
"required": ["datasets"],
},
readOnly=False,
displayLabel="creating a chart",
)
# ── Phase 3: speechToText, detectLanguage, neutralizeData, executeCode ──
@ -917,5 +920,6 @@ def _registerMediaTools(registry: ToolRegistry, services):
},
"required": ["code"]
},
readOnly=True
readOnly=True,
displayLabel="running calculations",
)

View file

@ -310,11 +310,15 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
return ToolResult(toolCallId="", toolName="writeFile", success=False, error="name is required for mode=create")
fileItem, _ = dbMgmt.saveUploadedFile(content.encode("utf-8"), name)
fiId = context.get("featureInstanceId") or (services.featureInstanceId if services else "")
updateFields: Dict[str, Any] = {}
if fiId:
dbMgmt.updateFile(fileItem.id, {"featureInstanceId": fiId})
# File group tree removed — groupId arg and instance-group assignment no longer apply
updateFields["featureInstanceId"] = fiId
if args.get("folderId"):
updateFields["folderId"] = args["folderId"]
if args.get("tags"):
dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
updateFields["tags"] = args["tags"]
if updateFields:
dbMgmt.updateFile(fileItem.id, updateFields)
chatDocId = _attachFileAsChatDocument(
services, fileItem,
@ -359,7 +363,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
},
"required": ["fileId"]
},
readOnly=True
readOnly=True,
displayLabel="reviewing a document",
)
registry.register(
@ -406,7 +411,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
"properties": {"query": {"type": "string", "description": "Search query"}},
"required": ["query"]
},
readOnly=True
readOnly=True,
displayLabel="researching on the web",
)
registry.register(
@ -427,7 +433,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
"writeFile", _writeFile,
description=(
"Create, append, or overwrite a file. Modes:\n"
"- create (default): create a new file (name required).\n"
"- create (default): create a new file (name required). Use folderId to place it in a specific folder.\n"
"- append: append content to an existing file (fileId required). "
"Use for large content that exceeds a single tool call (~8000 chars per call).\n"
"- overwrite: replace entire file content (fileId required).\n"
@ -443,7 +449,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
"content": {"type": "string", "description": "Content to write/append"},
"mode": {"type": "string", "enum": ["create", "append", "overwrite"], "description": "Write mode (default: create)"},
"fileId": {"type": "string", "description": "File ID (required for mode=append/overwrite)"},
"groupId": {"type": "string", "description": "Group ID to place the file in (mode=create only). Omit to use the instance default group."},
"folderId": {"type": "string", "description": "Folder ID to place the file in (mode=create only). Use listFolders to find IDs. Omit for root."},
"tags": {"type": "array", "items": {"type": "string"}, "description": "Tags (mode=create only)"},
},
"required": ["content"]
@ -581,7 +587,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
},
"required": ["url"]
},
readOnly=True
readOnly=True,
displayLabel="reading a webpage",
)
registry.register(
@ -701,7 +708,147 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
readOnly=False
)
# Group tree tools removed — file grouping now uses view-based display grouping (TableListView)
# ---- Folder management tools ----
async def _createFolder(args: Dict[str, Any], context: Dict[str, Any]):
name = args.get("name", "")
parentId = args.get("parentId") or None
if not name:
return ToolResult(toolCallId="", toolName="createFolder", success=False, error="name is required")
try:
chatService = services.chat
dbMgmt = chatService.interfaceDbComponent
folder = dbMgmt.createFolder(name, parentId=parentId)
folderId = folder.get("id") if isinstance(folder, dict) else getattr(folder, "id", None)
folderName = folder.get("name") if isinstance(folder, dict) else getattr(folder, "name", name)
return ToolResult(
toolCallId="", toolName="createFolder", success=True,
data=f"Folder '{folderName}' created (id: {folderId})" + (f" inside parent {parentId}" if parentId else ""),
sideEvents=[{"type": "folderCreated", "data": {"folderId": folderId, "folderName": folderName, "parentId": parentId}}],
)
except Exception as e:
return ToolResult(toolCallId="", toolName="createFolder", success=False, error=str(e))
async def _listFolders(args: Dict[str, Any], context: Dict[str, Any]):
try:
chatService = services.chat
dbMgmt = chatService.interfaceDbComponent
folders = dbMgmt.getOwnFolderTree()
if not folders:
return ToolResult(toolCallId="", toolName="listFolders", success=True, data="No folders found.")
lines = []
folderMap: Dict[Optional[str], List] = {}
for f in folders:
pid = f.get("parentId") if isinstance(f, dict) else getattr(f, "parentId", None)
folderMap.setdefault(pid, []).append(f)
def _walk(parentId: Optional[str], indent: int):
for f in sorted(folderMap.get(parentId, []), key=lambda x: (x.get("name") if isinstance(x, dict) else getattr(x, "name", "")).lower()):
fId = f.get("id") if isinstance(f, dict) else getattr(f, "id", "")
fName = f.get("name") if isinstance(f, dict) else getattr(f, "name", "")
prefix = " " * indent
lines.append(f"{prefix}- {fName} (id: {fId})")
_walk(fId, indent + 1)
_walk(None, 0)
return ToolResult(toolCallId="", toolName="listFolders", success=True, data="\n".join(lines))
except Exception as e:
return ToolResult(toolCallId="", toolName="listFolders", success=False, error=str(e))
async def _moveFile(args: Dict[str, Any], context: Dict[str, Any]):
fileId = args.get("fileId", "")
folderId = args.get("folderId")
if not fileId:
return ToolResult(toolCallId="", toolName="moveFile", success=False, error="fileId is required")
try:
chatService = services.chat
dbMgmt = chatService.interfaceDbComponent
file = dbMgmt.getFile(fileId)
if not file:
return ToolResult(toolCallId="", toolName="moveFile", success=False, error=f"File {fileId} not found")
dbMgmt.updateFile(fileId, {"folderId": folderId or None})
targetLabel = f"folder {folderId}" if folderId else "root"
return ToolResult(
toolCallId="", toolName="moveFile", success=True,
data=f"File '{file.fileName}' (id: {fileId}) moved to {targetLabel}",
sideEvents=[{"type": "fileUpdated", "data": {"fileId": fileId, "fileName": file.fileName}}],
)
except Exception as e:
return ToolResult(toolCallId="", toolName="moveFile", success=False, error=str(e))
registry.register(
"createFolder", _createFolder,
description=(
"Create a new folder in the workspace file tree. "
"Use parentId to create nested folders. Returns the new folder ID."
),
parameters={
"type": "object",
"properties": {
"name": {"type": "string", "description": "Folder name"},
"parentId": {"type": "string", "description": "Parent folder ID for nesting. Omit to create at root level."},
},
"required": ["name"]
},
readOnly=False
)
registry.register(
"listFolders", _listFolders,
description=(
"List all folders in the workspace as an indented tree. "
"Use to find folder IDs for createFolder (parentId), writeFile (folderId), or moveFile."
),
parameters={"type": "object", "properties": {}},
readOnly=True
)
async def _renameFolder(args: Dict[str, Any], context: Dict[str, Any]):
folderId = args.get("folderId", "")
newName = args.get("newName", "")
if not folderId or not newName:
return ToolResult(toolCallId="", toolName="renameFolder", success=False, error="folderId and newName are required")
try:
chatService = services.chat
dbMgmt = chatService.interfaceDbComponent
folder = dbMgmt.renameFolder(folderId, newName)
return ToolResult(
toolCallId="", toolName="renameFolder", success=True,
data=f"Folder {folderId} renamed to '{newName}'",
sideEvents=[{"type": "folderUpdated", "data": {"folderId": folderId, "folderName": newName}}],
)
except Exception as e:
return ToolResult(toolCallId="", toolName="renameFolder", success=False, error=str(e))
registry.register(
"renameFolder", _renameFolder,
description="Rename an existing folder in the workspace file tree.",
parameters={
"type": "object",
"properties": {
"folderId": {"type": "string", "description": "The folder ID to rename"},
"newName": {"type": "string", "description": "New folder name"},
},
"required": ["folderId", "newName"]
},
readOnly=False
)
registry.register(
"moveFile", _moveFile,
description=(
"Move a file into a specific folder. Set folderId to null or omit to move the file back to the root level."
),
parameters={
"type": "object",
"properties": {
"fileId": {"type": "string", "description": "The file ID to move"},
"folderId": {"type": "string", "description": "Target folder ID. Omit or null to move to root."},
},
"required": ["fileId"]
},
readOnly=False
)
registry.register(
"replaceInFile", _replaceInFile,

View file

@ -41,6 +41,12 @@ class ToolDefinition(BaseModel):
"""Schema for a tool available to the agent."""
name: str = Field(description="Unique tool name")
description: str = Field(description="What this tool does")
displayLabel: Optional[str] = Field(
default=None,
description="Short human-readable activity phrase (e.g. 'researching on the web'). "
"Used for live progress messages in meetings. English gerund phrase; "
"localised by the caller."
)
parameters: Dict[str, Any] = Field(
default_factory=dict,
description="JSON Schema for tool parameters"

View file

@ -23,7 +23,7 @@ class ToolRegistry:
def register(self, name: str, handler: Callable[..., Awaitable[ToolResult]],
description: str = "", parameters: Dict[str, Any] = None,
readOnly: bool = False, featureType: str = None,
toolSet: str = None):
toolSet: str = None, displayLabel: str = None):
"""Register a tool with its handler function."""
if name in self._tools:
logger.warning(f"Tool '{name}' already registered, overwriting")
@ -31,6 +31,7 @@ class ToolRegistry:
self._tools[name] = ToolDefinition(
name=name,
description=description,
displayLabel=displayLabel,
parameters=parameters or {},
readOnly=readOnly,
featureType=featureType,

View file

@ -567,11 +567,14 @@ mit Web-Recherche, E-Mail-Versand, Dokumenten-Erzeugung und Datenquellen-Zugriff
Setze "needsAgent": true und "agentReason": "<kurze Beschreibung der Aufgabe in einem Satz>"
WENN die Aufgabe eines oder mehrere dieser Merkmale hat:
- Recherche im Internet noetig (z.B. "recherchier was im Internet ueber XY", "schau mal nach", "google das")
- E-Mail an Teilnehmer/Kontakte versenden
- Dokument (PDF, Word, Excel) generieren oder im SharePoint/Drive ablegen
- Mehrere Schritte oder Tool-Aufrufe noetig (Zusammenfassung + Versand, Recherche + Empfehlung etc.)
- Daten aus externen Quellen abrufen (Outlook-Kontakte, SharePoint-Dateien, Kalender etc.)
- Recherche im Internet oder aktuelle Informationen noetig
- Informationen beschaffen die du NICHT im Transkript oder in deinem Vorwissen hast
- E-Mail versenden
- Dokument generieren oder in einer Datenquelle ablegen
- Mehrere Schritte oder Tool-Aufrufe noetig
- Daten aus externen Quellen abrufen
Wenn du den gewuenschten Inhalt nicht selbst liefern kannst, setze needsAgent=true.
Wenn needsAgent=true:
- Setze shouldRespond=false (der Agent uebernimmt; du sprichst NICHT eigenstaendig).

View file

@ -7,6 +7,9 @@ from .mainBackgroundJobService import (
startJob,
getJobStatus,
listJobs,
cancelJob,
cancelJobsByConnection,
isTerminalStatus,
JobProgressCallback,
)
@ -15,5 +18,8 @@ __all__ = [
"startJob",
"getJobStatus",
"listJobs",
"cancelJob",
"cancelJobsByConnection",
"isTerminalStatus",
"JobProgressCallback",
]

View file

@ -30,6 +30,7 @@ clear message. No silent zombies.
import asyncio
import logging
import time
from datetime import datetime, timezone
from typing import Any, Awaitable, Callable, Dict, List, Optional
@ -49,7 +50,46 @@ JOBS_DATABASE = APP_CONFIG.get("DB_DATABASE", "poweron_app")
registerDatabase(JOBS_DATABASE)
JobProgressCallback = Callable[[int, Optional[str]], None]
_CANCEL_CHECK_INTERVAL_S = 3.0
class JobProgressCallback:
"""Callable progress reporter with cooperative cancel-check for long-running walkers."""
def __init__(self, jobId: str):
self._jobId = jobId
self._cancelledCache: Optional[bool] = None
self._lastCheckedAt: float = 0.0
def __call__(self, progress: int, message: Optional[str] = None) -> None:
try:
clamped = max(0, min(100, int(progress)))
fields: Dict[str, Any] = {"progress": clamped}
if message is not None:
fields["progressMessage"] = message[:500]
_updateJob(self._jobId, fields)
except Exception as ex:
logger.warning("Progress update failed for job %s: %s", self._jobId, ex)
def isCancelled(self) -> bool:
"""Check if this job was cancelled. Reads DB at most every 3s to limit load."""
now = time.time()
if self._cancelledCache is True:
return True
if now - self._lastCheckedAt < _CANCEL_CHECK_INTERVAL_S:
return self._cancelledCache or False
self._lastCheckedAt = now
try:
job = _loadJob(self._jobId)
if job and job.get("status") == BackgroundJobStatusEnum.CANCELLED.value:
self._cancelledCache = True
return True
except Exception:
pass
self._cancelledCache = False
return False
JobHandler = Callable[[Dict[str, Any], JobProgressCallback], Awaitable[Optional[Dict[str, Any]]]]
@ -155,16 +195,7 @@ def _markError(jobId: str, errorMessage: str) -> None:
def _makeProgressCallback(jobId: str) -> JobProgressCallback:
def _cb(progress: int, message: Optional[str] = None) -> None:
try:
clamped = max(0, min(100, int(progress)))
fields: Dict[str, Any] = {"progress": clamped}
if message is not None:
fields["progressMessage"] = message[:500]
_updateJob(jobId, fields)
except Exception as ex:
logger.warning("Progress update failed for job %s: %s", jobId, ex)
return _cb
return JobProgressCallback(jobId)
async def _runJob(jobId: str) -> None:
@ -220,12 +251,51 @@ def isTerminalStatus(status: str) -> bool:
return status in {s.value for s in TERMINAL_JOB_STATUSES}
def cancelJob(jobId: str, *, reason: str = "user_requested") -> bool:
"""Mark a job as CANCELLED. Walkers detect this via JobProgressCallback.isCancelled().
Returns False if the job is already in a terminal state or does not exist.
"""
job = _loadJob(jobId)
if not job:
return False
if isTerminalStatus(job.get("status", "")):
return False
_updateJob(jobId, {
"status": BackgroundJobStatusEnum.CANCELLED.value,
"errorMessage": f"cancelled: {reason}"[:1000],
"finishedAt": datetime.now(timezone.utc).timestamp(),
})
logger.info("BackgroundJob %s cancelled (reason=%s)", jobId, reason)
return True
def cancelJobsByConnection(connectionId: str, *, jobType: str = "connection.bootstrap") -> int:
"""Cancel all RUNNING/PENDING jobs whose payload.connectionId matches.
Returns count of jobs marked as cancelled.
"""
db = _getDb()
rows = db.getRecordset(BackgroundJob, recordFilter={"jobType": jobType})
count = 0
for row in rows:
status = row.get("status", "")
if status not in (BackgroundJobStatusEnum.PENDING.value, BackgroundJobStatusEnum.RUNNING.value):
continue
payload = row.get("payload") or {}
if payload.get("connectionId") == connectionId:
if cancelJob(row["id"], reason=f"connection_stop:{connectionId[:8]}"):
count += 1
return count
def recoverInterruptedJobs() -> int:
"""Flip any RUNNING jobs to ERROR (called at worker boot).
"""Flip any RUNNING jobs to ERROR and re-queue bootstrap jobs (called at worker boot).
A RUNNING job in the DB after process restart means the previous worker
died mid-execution; the asyncio task is gone and the job will never
finish on its own.
finish on its own. For connection.bootstrap jobs, a fresh job is
automatically re-queued so the user doesn't have to manually retry.
"""
db = _getDb()
try:
@ -234,12 +304,34 @@ def recoverInterruptedJobs() -> int:
logger.warning("recoverInterruptedJobs: failed to scan RUNNING jobs: %s", ex)
return 0
count = 0
requeued = 0
for row in rows:
try:
_markError(row["id"], "Interrupted by worker restart")
count += 1
except Exception as ex:
logger.warning("recoverInterruptedJobs: could not mark %s as ERROR: %s", row.get("id"), ex)
continue
if row.get("jobType") == "connection.bootstrap":
payload = row.get("payload") or {}
if payload.get("connectionId"):
try:
newJob = BackgroundJob(
jobType="connection.bootstrap",
payload=payload,
triggeredBy="recovery.requeue",
)
record = db.recordCreate(BackgroundJob, _serialiseDatetimes(newJob.model_dump()))
asyncio.create_task(_runJob(record["id"]))
requeued += 1
logger.info(
"recoverInterruptedJobs: re-queued bootstrap for connectionId=%s (new jobId=%s)",
payload["connectionId"], record["id"],
)
except Exception as reqEx:
logger.warning("recoverInterruptedJobs: re-queue failed for %s: %s", row.get("id"), reqEx)
if count:
logger.warning("Recovered %d interrupted background job(s) after restart", count)
logger.warning("Recovered %d interrupted background job(s) after restart (re-queued %d)", count, requeued)
return count

View file

@ -77,6 +77,7 @@ class ContainerExtractor(Extractor):
"""Extract by recursively unpacking the container."""
fileName = context.get("fileName", "archive")
mimeType = context.get("mimeType", "application/octet-stream")
cascadeDepth = context.get("_cascadeDepth", 0)
rootId = makeId()
parts: List[ContentPart] = [
@ -97,7 +98,7 @@ class ContainerExtractor(Extractor):
parts.extend(lazy)
return parts
state = {"totalSize": 0, "fileCount": 0}
state = {"totalSize": 0, "fileCount": 0, "cascadeDepth": cascadeDepth}
try:
childParts = _resolveContainerRecursive(
fileBytes, mimeType, fileName, rootId, "", 0, state
@ -209,7 +210,12 @@ def _addFilePart(
if extractor and not isinstance(extractor, ContainerExtractor):
try:
childParts = extractor.extract(data, {"fileName": fileName, "mimeType": detectedMime})
cascadeDepth = state.get("cascadeDepth", 0)
childParts = extractor.extract(data, {
"fileName": fileName,
"mimeType": detectedMime,
"_cascadeDepth": cascadeDepth + 1,
})
for part in childParts:
part.parentId = parentId
if not part.metadata:

View file

@ -53,12 +53,13 @@ class EmailExtractor(Extractor):
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
fileName = context.get("fileName", "email")
lower = (fileName or "").lower()
depth = context.get("_cascadeDepth", 0)
if lower.endswith(".msg"):
return self._extractMsg(fileBytes, fileName)
return self._extractEml(fileBytes, fileName)
return self._extractMsg(fileBytes, fileName, depth)
return self._extractEml(fileBytes, fileName, depth)
def _extractEml(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
def _extractEml(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]:
"""Parse standard EML (RFC 822) using stdlib email."""
rootId = makeId()
parts: List[ContentPart] = []
@ -91,7 +92,7 @@ class EmailExtractor(Extractor):
attachName = part.get_filename() or "attachment"
attachData = part.get_payload(decode=True)
if attachData:
parts.extend(_delegateAttachment(attachData, attachName, rootId))
parts.extend(_delegateAttachment(attachData, attachName, rootId, depth))
continue
if contentType == "text/plain":
@ -113,7 +114,7 @@ class EmailExtractor(Extractor):
return parts
def _extractMsg(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
def _extractMsg(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]:
"""Parse Outlook MSG files using extract-msg (optional)."""
rootId = makeId()
parts: List[ContentPart] = []
@ -179,7 +180,7 @@ class EmailExtractor(Extractor):
attachName = getattr(attachment, "longFilename", None) or getattr(attachment, "shortFilename", None) or "attachment"
attachData = getattr(attachment, "data", None)
if attachData:
parts.extend(_delegateAttachment(attachData, attachName, rootId))
parts.extend(_delegateAttachment(attachData, attachName, rootId, depth))
try:
msgFile.close()
@ -199,18 +200,39 @@ def _buildHeaderText(msg) -> str:
return "\n".join(lines)
def _delegateAttachment(attachData: bytes, attachName: str, parentId: str) -> List[ContentPart]:
"""Delegate an attachment to the appropriate type-specific extractor."""
_MAX_CASCADE_DEPTH = 10
def _delegateAttachment(attachData: bytes, attachName: str, parentId: str, depth: int = 0) -> List[ContentPart]:
"""Delegate an attachment to the appropriate type-specific extractor.
Passes ``_cascadeDepth`` through the context so nested EmailContainerEmail
chains share a global depth counter and don't recurse infinitely.
"""
if depth >= _MAX_CASCADE_DEPTH:
logger.warning(f"Cascade depth {depth} reached for {attachName}, skipping extraction")
import base64
encodedData = base64.b64encode(attachData).decode("utf-8") if attachData else ""
return [ContentPart(
id=makeId(), parentId=parentId, label=attachName,
typeGroup="binary", mimeType="application/octet-stream",
data=encodedData,
metadata={"size": len(attachData), "emailAttachment": attachName, "cascadeDepthExceeded": True},
)]
guessedMime, _ = mimetypes.guess_type(attachName)
detectedMime = guessedMime or "application/octet-stream"
from ..subRegistry import ExtractorRegistry
registry = ExtractorRegistry()
from ..subRegistry import getExtractorRegistry
registry = getExtractorRegistry()
extractor = registry.resolve(detectedMime, attachName)
if extractor and not isinstance(extractor, EmailExtractor):
if extractor:
try:
childParts = extractor.extract(attachData, {"fileName": attachName, "mimeType": detectedMime})
childParts = extractor.extract(attachData, {
"fileName": attachName,
"mimeType": detectedMime,
"_cascadeDepth": depth + 1,
})
for part in childParts:
part.parentId = parentId
if not part.metadata:

View file

@ -33,6 +33,7 @@ class ExtractionService:
self._interfaceDbComponent = getComponentInterface(
context.user,
mandateId=context.mandate_id,
featureInstanceId=context.feature_instance_id,
)
self._extractorRegistry = getExtractorRegistry()
if ExtractionService._sharedChunkerRegistry is None:

View file

@ -122,21 +122,54 @@ def _onConnectionRevoked(
)
_SOURCE_TYPE_MAP = {
"msft": {
"sharepoint": ("sharepointFolder", "onedriveFolder"),
"outlook": ("outlookFolder", "calendarFolder", "contactFolder"),
},
"google": {
"drive": ("googleDriveFolder",),
"gmail": ("gmailFolder",),
},
"clickup": {
"clickup": ("clickupList",),
},
"infomaniak": {
"kdrive": ("kdriveFolder",),
},
}
def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list] = None):
"""Load DataSource rows with ragIndexEnabled=true for a connection.
If dataSourceIds is provided (mini-bootstrap), filter to only those IDs.
"""
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.datamodels.datamodelDataSource import DataSource
rootIf = getRootInterface()
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
if dataSourceIds:
return [ds for ds in allDs if ds.get("id") in dataSourceIds and ds.get("ragIndexEnabled")]
return [ds for ds in allDs if ds.get("ragIndexEnabled")]
async def _bootstrapJobHandler(
job: Dict[str, Any],
progressCb,
) -> Dict[str, Any]:
"""Dispatch bootstrap by authority. Each authority runs its own sub-bootstraps."""
"""Dispatch bootstrap by authority, iterating only over ragIndexEnabled DataSources."""
payload = job.get("payload") or {}
connectionId = payload.get("connectionId")
authority = (payload.get("authority") or "").lower()
dataSourceIds = payload.get("dataSourceIds")
if not connectionId:
raise ValueError("connection.bootstrap requires payload.connectionId")
progressCb(5, f"resolving {authority} connection")
# Defensive consent check: if the connection has since disabled knowledge ingestion
# (e.g. user toggled setting after the job was enqueued), skip all walkers.
# Defensive consent check
try:
from modules.interfaces.interfaceDbApp import getRootInterface
_root = getRootInterface()
@ -156,6 +189,21 @@ async def _bootstrapJobHandler(
except Exception as _guardErr:
logger.debug("Could not load connection for consent guard: %s", _guardErr)
# Load only ragIndexEnabled DataSources for this connection
dataSources = _loadRagEnabledDataSources(connectionId, dataSourceIds)
if not dataSources:
logger.info(
"ingestion.connection.bootstrap.skipped — no rag-enabled DataSources connectionId=%s",
connectionId,
extra={
"event": "ingestion.connection.bootstrap.skipped",
"connectionId": connectionId,
"authority": authority,
"reason": "no_data_sources",
},
)
return {"connectionId": connectionId, "authority": authority, "skipped": True, "reason": "no_data_sources"}
def _normalize(res: Any, label: str) -> Dict[str, Any]:
if isinstance(res, Exception):
logger.error(
@ -165,6 +213,10 @@ async def _bootstrapJobHandler(
return {"error": str(res)}
return res or {}
def _filterDs(walkerKey: str) -> list:
sourceTypes = _SOURCE_TYPE_MAP.get(authority, {}).get(walkerKey, ())
return [ds for ds in dataSources if ds.get("sourceType") in sourceTypes]
if authority == "msft":
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
bootstrapSharepoint,
@ -174,9 +226,14 @@ async def _bootstrapJobHandler(
)
progressCb(10, "sharepoint + outlook")
spDs = _filterDs("sharepoint")
olDs = _filterDs("outlook")
async def _noopResult():
return {"skipped": True, "reason": "no_datasources"}
spResult, olResult = await asyncio.gather(
bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb),
bootstrapOutlook(connectionId=connectionId, progressCb=progressCb),
bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb, dataSources=spDs) if spDs else _noopResult(),
bootstrapOutlook(connectionId=connectionId, progressCb=progressCb, dataSources=olDs) if olDs else _noopResult(),
return_exceptions=True,
)
return {
@ -195,9 +252,14 @@ async def _bootstrapJobHandler(
)
progressCb(10, "drive + gmail")
gdDs = _filterDs("drive")
gmDs = _filterDs("gmail")
async def _noopResult():
return {"skipped": True, "reason": "no_datasources"}
gdResult, gmResult = await asyncio.gather(
bootstrapGdrive(connectionId=connectionId, progressCb=progressCb),
bootstrapGmail(connectionId=connectionId, progressCb=progressCb),
bootstrapGdrive(connectionId=connectionId, progressCb=progressCb, dataSources=gdDs) if gdDs else _noopResult(),
bootstrapGmail(connectionId=connectionId, progressCb=progressCb, dataSources=gmDs) if gmDs else _noopResult(),
return_exceptions=True,
)
return {
@ -213,7 +275,8 @@ async def _bootstrapJobHandler(
)
progressCb(10, "clickup tasks")
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb)
cuDs = _filterDs("clickup")
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb, dataSources=cuDs) if cuDs else {"skipped": True, "reason": "no_datasources"}
return {
"connectionId": connectionId,
"authority": authority,

View file

@ -9,7 +9,7 @@ is None).
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@ -21,10 +21,11 @@ _DEFAULT_CLICKUP_SCOPE = "title_description"
@dataclass
class ConnectionIngestionPrefs:
"""Parsed per-connection preferences for knowledge ingestion walkers."""
"""Parsed per-connection preferences for knowledge ingestion walkers.
# PII
neutralizeBeforeEmbed: bool = False
Neutralization is now controlled per DataSource.neutralize (not here).
Surface toggles are obsolete walker iterates only over ragIndexEnabled DataSources.
"""
# Mail (Outlook + Gmail)
mailContentDepth: str = _DEFAULT_MAIL_DEPTH # "metadata" | "snippet" | "full"
@ -32,18 +33,11 @@ class ConnectionIngestionPrefs:
# Files (Drive / SharePoint / OneDrive)
filesIndexBinaries: bool = True
mimeAllowlist: List[str] = field(default_factory=list) # empty = all allowed
# ClickUp
clickupScope: str = _DEFAULT_CLICKUP_SCOPE # "titles" | "title_description" | "with_comments"
clickupIndexAttachments: bool = False
# Per-authority surface toggles (default everything on)
gmailEnabled: bool = True
driveEnabled: bool = True
sharepointEnabled: bool = True
outlookEnabled: bool = True
# Time window
maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS # 0 = no limit
@ -78,22 +72,12 @@ def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs:
v = raw.get(key)
return int(v) if isinstance(v, int) else default
surface = raw.get("surfaceToggles") or {}
google_surf = surface.get("google") or {}
msft_surf = surface.get("msft") or {}
return ConnectionIngestionPrefs(
neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False),
mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH),
mailIndexAttachments=_bool("mailIndexAttachments", False),
filesIndexBinaries=_bool("filesIndexBinaries", True),
mimeAllowlist=list(raw.get("mimeAllowlist") or []),
clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE),
clickupIndexAttachments=_bool("clickupIndexAttachments", False),
gmailEnabled=bool(google_surf.get("gmail", True)),
driveEnabled=bool(google_surf.get("drive", True)),
sharepointEnabled=bool(msft_surf.get("sharepoint", True)),
outlookEnabled=bool(msft_surf.get("outlook", True)),
maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS),
)
except Exception as exc:

View file

@ -23,7 +23,7 @@ import logging
import time
from dataclasses import dataclass, field
from datetime import datetime, timedelta, timezone
from typing import Any, Callable, Dict, List, Optional
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@ -150,8 +150,6 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -
"data": description,
"contextRef": {"part": "description"},
})
# text_content is ClickUp's rendered-markdown version; include if it adds
# something beyond the plain description (common for bullet lists, checklists).
textContent = _truncate(task.get("text_content"), limits.maxDescriptionChars)
if textContent and textContent != description:
parts.append({
@ -166,33 +164,35 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -
async def bootstrapClickup(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
dataSources: Optional[List[Dict[str, Any]]] = None,
progressCb: Optional[Any] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[ClickupBootstrapLimits] = None,
) -> Dict[str, Any]:
"""Walk workspaces → lists → tasks and ingest each task as a virtual doc."""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
"""Walk workspaces → lists → tasks and ingest each task as a virtual doc.
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
Each DataSource defines the neutralize policy for its subtree.
"""
if not dataSources:
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
if not limits:
limits = ClickupBootstrapLimits(
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
neutralize=prefs.neutralizeBeforeEmbed,
clickupScope=prefs.clickupScope,
)
limits = ClickupBootstrapLimits()
startMs = time.time()
result = ClickupBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=clickup connectionId=%s",
connectionId,
"ingestion.connection.bootstrap.started part=clickup connectionId=%s dataSources=%d",
connectionId, len(dataSources),
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "clickup",
"connectionId": connectionId,
"dataSourceCount": len(dataSources),
},
)
@ -215,30 +215,56 @@ async def bootstrapClickup(
return _finalizeResult(connectionId, result, startMs)
teams = (teamsResp or {}).get("teams") or []
for team in teams[: limits.maxWorkspaces]:
cancelled = False
for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
break
teamId = str(team.get("id", "") or "")
if not teamId:
continue
result.workspaces += 1
try:
await _walkTeam(
svc=svc,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
team=team,
limits=limits,
result=result,
progressCb=progressCb,
)
except Exception as exc:
logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
result.errors.append(f"team({teamId}): {exc}")
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
cancelled = True
break
return _finalizeResult(connectionId, result, startMs)
dsId = ds.get("id", "")
dsNeutralize = ds.get("neutralize", False)
dsLimits = ClickupBootstrapLimits(
maxTasks=limits.maxTasks,
maxWorkspaces=limits.maxWorkspaces,
maxListsPerWorkspace=limits.maxListsPerWorkspace,
maxDescriptionChars=limits.maxDescriptionChars,
maxAgeDays=limits.maxAgeDays,
includeClosed=limits.includeClosed,
neutralize=dsNeutralize,
clickupScope=limits.clickupScope,
)
for team in teams[:dsLimits.maxWorkspaces]:
if result.indexed + result.skippedDuplicate >= dsLimits.maxTasks:
break
teamId = str(team.get("id", "") or "")
if not teamId:
continue
result.workspaces += 1
try:
await _walkTeam(
svc=svc,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
team=team,
limits=dsLimits,
result=result,
progressCb=progressCb,
dataSourceId=dsId,
)
except Exception as exc:
logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
result.errors.append(f"team({teamId}): {exc}")
finalResult = _finalizeResult(connectionId, result, startMs)
if cancelled:
finalResult["cancelled"] = True
return finalResult
async def _resolveDependencies(connectionId: str):
@ -280,8 +306,12 @@ async def _walkTeam(
team: Dict[str, Any],
limits: ClickupBootstrapLimits,
result: ClickupBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
teamId = str(team.get("id", "") or "")
spacesResp = await svc.getSpaces(teamId)
spaces = (spacesResp or {}).get("spaces") or []
@ -294,14 +324,12 @@ async def _walkTeam(
if not spaceId:
continue
# Folderless lists directly under the space
folderless = await svc.getFolderlessLists(spaceId)
for lst in (folderless or {}).get("lists") or []:
if len(listsCollected) >= limits.maxListsPerWorkspace:
break
listsCollected.append({**lst, "_space": space})
# Lists inside folders
foldersResp = await svc.getFolders(spaceId)
for folder in (foldersResp or {}).get("folders") or []:
if len(listsCollected) >= limits.maxListsPerWorkspace:
@ -318,6 +346,8 @@ async def _walkTeam(
for lst in listsCollected:
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
return
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
result.lists += 1
await _walkList(
svc=svc,
@ -330,6 +360,7 @@ async def _walkTeam(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
@ -344,13 +375,16 @@ async def _walkList(
lst: Dict[str, Any],
limits: ClickupBootstrapLimits,
result: ClickupBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
listId = str(lst.get("id", "") or "")
if not listId:
return
page = 0
while result.indexed + result.skippedDuplicate < limits.maxTasks:
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
resp = await svc.getTasksInList(
listId,
page=page,
@ -371,7 +405,6 @@ async def _walkList(
if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
result.skippedPolicy += 1
continue
# Inject the list/folder/space metadata we already loaded.
task["list"] = task.get("list") or {"id": listId, "name": lst.get("name")}
task["folder"] = task.get("folder") or lst.get("_folder") or {}
task["space"] = task.get("space") or lst.get("_space") or {}
@ -385,9 +418,10 @@ async def _walkList(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
if len(tasks) < 100: # ClickUp page-size hint: fewer than 100 => last page
if len(tasks) < 100:
return
page += 1
@ -402,7 +436,8 @@ async def _ingestTask(
task: Dict[str, Any],
limits: ClickupBootstrapLimits,
result: ClickupBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -431,6 +466,7 @@ async def _ingestTask(
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "clickup",
"service": "clickup",
"externalItemId": taskId,
@ -456,8 +492,10 @@ async def _ingestTask(
else:
result.failed += 1
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
processed = result.indexed + result.skippedDuplicate
processed = result.indexed + result.skippedDuplicate
if progressCb is not None and processed % 50 == 0:
if hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
try:
progressCb(
min(90, 10 + int(80 * processed / max(1, limits.maxTasks))),

View file

@ -12,6 +12,7 @@ via export), runs the standard extraction pipeline and routes results through
from __future__ import annotations
import asyncio
import hashlib
import logging
import time
@ -30,7 +31,6 @@ SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
MAX_DEPTH_DEFAULT = 4
MAX_AGE_DAYS_DEFAULT = 365
# Google Drive uses virtual mime-types for folders and non-downloadable assets.
FOLDER_MIME = "application/vnd.google-apps.folder"
@ -41,12 +41,8 @@ class GdriveBootstrapLimits:
maxFileSize: int = MAX_FILE_SIZE_DEFAULT
skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
maxDepth: int = MAX_DEPTH_DEFAULT
# Only ingest files modified within the last N days. None disables filter.
maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
# Pass-through to IngestionJob.neutralize
neutralize: bool = False
# Whether to skip binary/non-text files
filesIndexBinaries: bool = True
@dataclass
@ -95,10 +91,8 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
if not maxAgeDays:
return True
if not modifiedIso:
# No timestamp -> be permissive (Drive native docs sometimes omit it on export).
return True
try:
# Google returns RFC 3339 with `Z` or offset; python 3.11+ parses both.
ts = datetime.fromisoformat(modifiedIso.replace("Z", "+00:00"))
except Exception:
return True
@ -111,34 +105,36 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
async def bootstrapGdrive(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
dataSources: Optional[List[Dict[str, Any]]] = None,
progressCb: Optional[Any] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[GdriveBootstrapLimits] = None,
runExtractionFn: Optional[Callable[..., Any]] = None,
) -> Dict[str, Any]:
"""Walk My Drive starting from the virtual root folder."""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
"""Walk My Drive starting from the virtual root folder.
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
Each DataSource defines the root path + neutralize policy for its subtree.
"""
if not dataSources:
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
if not limits:
limits = GdriveBootstrapLimits(
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
neutralize=prefs.neutralizeBeforeEmbed,
filesIndexBinaries=prefs.filesIndexBinaries,
)
limits = GdriveBootstrapLimits()
startMs = time.time()
result = GdriveBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=gdrive connectionId=%s",
connectionId,
"ingestion.connection.bootstrap.started part=gdrive connectionId=%s dataSources=%d",
connectionId, len(dataSources),
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "gdrive",
"connectionId": connectionId,
"dataSourceCount": len(dataSources),
},
)
@ -158,25 +154,51 @@ async def bootstrapGdrive(
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
try:
await _walkFolder(
adapter=adapter,
knowledgeService=knowledgeService,
runExtractionFn=runExtractionFn,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderPath="/", # DriveAdapter.browse maps "" / "/" -> "root"
depth=0,
limits=limits,
result=result,
progressCb=progressCb,
)
except Exception as exc:
logger.error("gdrive walk failed for %s: %s", connectionId, exc, exc_info=True)
result.errors.append(f"walk: {exc}")
cancelled = False
for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxItems:
break
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
cancelled = True
break
return _finalizeResult(connectionId, result, startMs)
dsPath = ds.get("path", "/")
dsId = ds.get("id", "")
dsNeutralize = ds.get("neutralize", False)
dsMaxAgeDays = ds.get("maxAgeDays", limits.maxAgeDays)
dsLimits = GdriveBootstrapLimits(
maxItems=limits.maxItems,
maxBytes=limits.maxBytes,
maxFileSize=limits.maxFileSize,
skipMimePrefixes=limits.skipMimePrefixes,
maxDepth=limits.maxDepth,
maxAgeDays=dsMaxAgeDays,
neutralize=dsNeutralize,
)
try:
await _walkFolder(
adapter=adapter,
knowledgeService=knowledgeService,
runExtractionFn=runExtractionFn,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderPath=dsPath,
depth=0,
limits=dsLimits,
result=result,
progressCb=progressCb,
dataSourceId=dsId,
)
except Exception as exc:
logger.error("gdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
result.errors.append(f"walk({dsPath}): {exc}")
finalResult = _finalizeResult(connectionId, result, startMs)
if cancelled:
finalResult["cancelled"] = True
return finalResult
async def _resolveDependencies(connectionId: str):
@ -220,10 +242,13 @@ async def _walkFolder(
depth: int,
limits: GdriveBootstrapLimits,
result: GdriveBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
if depth > limits.maxDepth:
return
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
try:
entries = await adapter.browse(folderPath)
except Exception as exc:
@ -236,6 +261,8 @@ async def _walkFolder(
return
if result.bytesProcessed >= limits.maxBytes:
return
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
return
entryPath = getattr(entry, "path", "") or ""
metadata = getattr(entry, "metadata", {}) or {}
@ -254,6 +281,7 @@ async def _walkFolder(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
continue
@ -288,6 +316,7 @@ async def _walkFolder(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
@ -306,7 +335,8 @@ async def _ingestOne(
revision: Optional[str],
limits: GdriveBootstrapLimits,
result: GdriveBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -321,14 +351,13 @@ async def _ingestOne(
result.errors.append(f"download({entryPath}): {exc}")
return
# Adapter.download returns raw bytes today; guard DownloadResult shape too.
fileBytes: bytes
if isinstance(downloaded, (bytes, bytearray)):
fileBytes = bytes(downloaded)
else:
fileBytes = bytes(getattr(downloaded, "data", b"") or b"")
if getattr(downloaded, "mimeType", None):
mimeType = downloaded.mimeType # export may have changed the type
mimeType = downloaded.mimeType
if not fileBytes:
result.failed += 1
return
@ -354,6 +383,15 @@ async def _ingestOne(
result.skippedPolicy += 1
return
provenance: Dict[str, Any] = {
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "google",
"service": "drive",
"externalItemId": externalItemId,
"entryPath": entryPath,
"tier": "body",
}
try:
handle = await knowledgeService.requestIngestion(
IngestionJob(
@ -366,14 +404,7 @@ async def _ingestOne(
contentObjects=contentObjects,
contentVersion=revision,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"authority": "google",
"service": "drive",
"externalItemId": externalItemId,
"entryPath": entryPath,
"tier": "body",
},
provenance=provenance,
)
)
except Exception as exc:
@ -388,6 +419,8 @@ async def _ingestOne(
result.indexed += 1
else:
result.failed += 1
if handle.error:
result.errors.append(f"ingest({entryPath}): {handle.error}")
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
processed = result.indexed + result.skippedDuplicate
@ -411,6 +444,8 @@ async def _ingestOne(
},
)
await asyncio.sleep(0)
def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
durationMs = int((time.time() - startMs) * 1000)

View file

@ -175,35 +175,36 @@ def _buildContentObjects(
async def bootstrapGmail(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
dataSources: Optional[List[Dict[str, Any]]] = None,
progressCb: Optional[Any] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[GmailBootstrapLimits] = None,
googleGetFn: Optional[Callable[..., Any]] = None,
) -> Dict[str, Any]:
"""Enumerate Gmail labels (INBOX + SENT default) and ingest messages."""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
"""Enumerate Gmail labels (INBOX + SENT default) and ingest messages.
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
Each DataSource defines the neutralize policy for its scope.
"""
if not dataSources:
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
if not limits:
limits = GmailBootstrapLimits(
includeAttachments=prefs.mailIndexAttachments,
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
mailContentDepth=prefs.mailContentDepth,
neutralize=prefs.neutralizeBeforeEmbed,
)
limits = GmailBootstrapLimits()
startMs = time.time()
result = GmailBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=gmail connectionId=%s",
connectionId,
"ingestion.connection.bootstrap.started part=gmail connectionId=%s dataSources=%d",
connectionId, len(dataSources),
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "gmail",
"connectionId": connectionId,
"dataSourceCount": len(dataSources),
},
)
@ -221,26 +222,51 @@ async def bootstrapGmail(
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
for labelId in limits.labels:
cancelled = False
for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
break
try:
await _ingestLabel(
googleGetFn=googleGetFn,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
labelId=labelId,
limits=limits,
result=result,
progressCb=progressCb,
)
except Exception as exc:
logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
result.errors.append(f"label({labelId}): {exc}")
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
cancelled = True
break
return _finalizeResult(connectionId, result, startMs)
dsId = ds.get("id", "")
dsNeutralize = ds.get("neutralize", False)
dsLimits = GmailBootstrapLimits(
maxMessages=limits.maxMessages,
labels=limits.labels,
maxBodyChars=limits.maxBodyChars,
includeAttachments=limits.includeAttachments,
maxAttachmentBytes=limits.maxAttachmentBytes,
maxAgeDays=limits.maxAgeDays,
mailContentDepth=limits.mailContentDepth,
neutralize=dsNeutralize,
)
for labelId in dsLimits.labels:
if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages:
break
try:
await _ingestLabel(
googleGetFn=googleGetFn,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
labelId=labelId,
limits=dsLimits,
result=result,
progressCb=progressCb,
dataSourceId=dsId,
)
except Exception as exc:
logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
result.errors.append(f"label({labelId}): {exc}")
finalResult = _finalizeResult(connectionId, result, startMs)
if cancelled:
finalResult["cancelled"] = True
return finalResult
async def _resolveDependencies(connectionId: str):
@ -282,7 +308,8 @@ async def _ingestLabel(
labelId: str,
limits: GmailBootstrapLimits,
result: GmailBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
if remaining <= 0:
@ -316,6 +343,8 @@ async def _ingestLabel(
for stub in messageStubs:
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
break
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
return
msgId = stub.get("id")
if not msgId:
continue
@ -337,6 +366,7 @@ async def _ingestLabel(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
nextPageToken = page.get("nextPageToken")
@ -355,7 +385,8 @@ async def _ingestMessage(
message: Dict[str, Any],
limits: GmailBootstrapLimits,
result: GmailBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -386,6 +417,7 @@ async def _ingestMessage(
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "google",
"service": "gmail",
"externalItemId": messageId,
@ -420,6 +452,7 @@ async def _ingestMessage(
parentSyntheticId=syntheticId,
limits=limits,
result=result,
dataSourceId=dataSourceId,
)
except Exception as exc:
logger.warning("gmail attachments %s failed: %s", messageId, exc)
@ -461,6 +494,7 @@ async def _ingestAttachments(
parentSyntheticId: str,
limits: GmailBootstrapLimits,
result: GmailBootstrapResult,
dataSourceId: str = "",
) -> None:
"""Child ingestion jobs for file attachments. Skips inline images (cid: refs)."""
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -561,6 +595,7 @@ async def _ingestAttachments(
contentObjects=contentObjects,
provenance={
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "google",
"service": "gmail",
"parentId": parentSyntheticId,

View file

@ -18,7 +18,7 @@ import hashlib
import logging
import time
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional
from typing import Any, Dict, List, Optional
from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
@ -139,34 +139,35 @@ def _buildContentObjects(
async def bootstrapOutlook(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
dataSources: Optional[List[Dict[str, Any]]] = None,
progressCb: Optional[Any] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[OutlookBootstrapLimits] = None,
) -> Dict[str, Any]:
"""Enumerate Outlook folders (inbox + sent by default) and ingest messages."""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
"""Enumerate Outlook folders (inbox + sent by default) and ingest messages.
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
Each DataSource defines the neutralize policy for its messages.
"""
if not dataSources:
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
if not limits:
limits = OutlookBootstrapLimits(
includeAttachments=prefs.mailIndexAttachments,
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
mailContentDepth=prefs.mailContentDepth,
neutralize=prefs.neutralizeBeforeEmbed,
)
limits = OutlookBootstrapLimits()
startMs = time.time()
result = OutlookBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=outlook connectionId=%s",
connectionId,
"ingestion.connection.bootstrap.started part=outlook connectionId=%s dataSources=%d",
connectionId, len(dataSources),
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "outlook",
"connectionId": connectionId,
"dataSourceCount": len(dataSources),
},
)
@ -176,27 +177,52 @@ async def bootstrapOutlook(
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
folderIds = await _selectFolderIds(adapter, limits)
for folderId in folderIds:
cancelled = False
for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
break
try:
await _ingestFolder(
adapter=adapter,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderId=folderId,
limits=limits,
result=result,
progressCb=progressCb,
)
except Exception as exc:
logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
result.errors.append(f"folder({folderId}): {exc}")
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
cancelled = True
break
return _finalizeResult(connectionId, result, startMs)
dsId = ds.get("id", "")
dsNeutralize = ds.get("neutralize", False)
dsLimits = OutlookBootstrapLimits(
maxMessages=limits.maxMessages,
maxFolders=limits.maxFolders,
maxBodyChars=limits.maxBodyChars,
includeAttachments=limits.includeAttachments,
maxAttachmentBytes=limits.maxAttachmentBytes,
maxAgeDays=limits.maxAgeDays,
mailContentDepth=limits.mailContentDepth,
neutralize=dsNeutralize,
)
folderIds = await _selectFolderIds(adapter, dsLimits)
for folderId in folderIds:
if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages:
break
try:
await _ingestFolder(
adapter=adapter,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderId=folderId,
limits=dsLimits,
result=result,
progressCb=progressCb,
dataSourceId=dsId,
)
except Exception as exc:
logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
result.errors.append(f"folder({folderId}): {exc}")
finalResult = _finalizeResult(connectionId, result, startMs)
if cancelled:
finalResult["cancelled"] = True
return finalResult
async def _resolveDependencies(connectionId: str):
@ -266,8 +292,12 @@ async def _ingestFolder(
folderId: str,
limits: OutlookBootstrapLimits,
result: OutlookBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
if remaining <= 0:
return
@ -307,6 +337,8 @@ async def _ingestFolder(
for message in page.get("value", []) or []:
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
break
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
return
await _ingestMessage(
adapter=adapter,
knowledgeService=knowledgeService,
@ -317,6 +349,7 @@ async def _ingestFolder(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
nextLink = page.get("@odata.nextLink")
@ -338,7 +371,8 @@ async def _ingestMessage(
message: Dict[str, Any],
limits: OutlookBootstrapLimits,
result: OutlookBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -369,6 +403,7 @@ async def _ingestMessage(
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "msft",
"service": "outlook",
"externalItemId": messageId,
@ -402,6 +437,7 @@ async def _ingestMessage(
parentSyntheticId=syntheticId,
limits=limits,
result=result,
dataSourceId=dataSourceId,
)
except Exception as exc:
logger.warning("outlook attachments %s failed: %s", messageId, exc)
@ -443,6 +479,7 @@ async def _ingestAttachments(
parentSyntheticId: str,
limits: OutlookBootstrapLimits,
result: OutlookBootstrapResult,
dataSourceId: str = "",
) -> None:
"""Child ingestion jobs for file attachments (skip inline & oversized)."""
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -531,6 +568,7 @@ async def _ingestAttachments(
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "msft",
"service": "outlook",
"parentId": parentSyntheticId,

View file

@ -94,35 +94,36 @@ def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
async def bootstrapSharepoint(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
dataSources: Optional[List[Dict[str, Any]]] = None,
progressCb: Optional[Any] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[SharepointBootstrapLimits] = None,
runExtractionFn: Optional[Callable[..., Any]] = None,
) -> Dict[str, Any]:
"""Enumerate SharePoint drives and ingest every reachable file via the façade.
"""Enumerate SharePoint drives and ingest files via the facade.
Parameters allow injection for tests; production callers pass only
`connectionId` (and optionally a progressCb) and everything else is
resolved against the registered services.
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
Each DataSource defines the root path + neutralize policy for its subtree.
"""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
if not dataSources:
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
if not limits:
limits = SharepointBootstrapLimits(neutralize=prefs.neutralizeBeforeEmbed)
limits = SharepointBootstrapLimits()
startMs = time.time()
result = SharepointBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=sharepoint connectionId=%s",
connectionId,
"ingestion.connection.bootstrap.started part=sharepoint connectionId=%s dataSources=%d",
connectionId, len(dataSources),
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "sharepoint",
"connectionId": connectionId,
"dataSourceCount": len(dataSources),
},
)
@ -142,17 +143,27 @@ async def bootstrapSharepoint(
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
try:
sites = await adapter.browse("/", limit=limits.maxSites)
except Exception as exc:
logger.error("sharepoint site discovery failed for %s: %s", connectionId, exc, exc_info=True)
result.errors.append(f"site_discovery: {exc}")
return _finalizeResult(connectionId, result, startMs)
for site in sites[: limits.maxSites]:
cancelled = False
for ds in dataSources:
if result.indexed + result.skippedDuplicate >= limits.maxItems:
break
sitePath = getattr(site, "path", "") or ""
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
cancelled = True
break
dsPath = ds.get("path", "")
dsId = ds.get("id", "")
dsNeutralize = ds.get("neutralize", False)
dsLimits = SharepointBootstrapLimits(
maxItems=limits.maxItems,
maxBytes=limits.maxBytes,
maxFileSize=limits.maxFileSize,
skipMimePrefixes=limits.skipMimePrefixes,
maxDepth=limits.maxDepth,
maxSites=limits.maxSites,
neutralize=dsNeutralize,
)
try:
await _walkFolder(
adapter=adapter,
@ -161,17 +172,21 @@ async def bootstrapSharepoint(
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderPath=sitePath,
folderPath=dsPath,
depth=0,
limits=limits,
limits=dsLimits,
result=result,
progressCb=progressCb,
dataSourceId=dsId,
)
except Exception as exc:
logger.error("sharepoint walk failed for site %s: %s", sitePath, exc, exc_info=True)
result.errors.append(f"walk({sitePath}): {exc}")
logger.error("sharepoint walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
result.errors.append(f"walk({dsPath}): {exc}")
return _finalizeResult(connectionId, result, startMs)
finalResult = _finalizeResult(connectionId, result, startMs)
if cancelled:
finalResult["cancelled"] = True
return finalResult
async def _resolveDependencies(connectionId: str):
@ -221,10 +236,13 @@ async def _walkFolder(
depth: int,
limits: SharepointBootstrapLimits,
result: SharepointBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
if depth > limits.maxDepth:
return
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
return
try:
entries = await adapter.browse(folderPath)
except Exception as exc:
@ -237,6 +255,8 @@ async def _walkFolder(
return
if result.bytesProcessed >= limits.maxBytes:
return
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
return
entryPath = getattr(entry, "path", "") or ""
if getattr(entry, "isFolder", False):
@ -252,6 +272,7 @@ async def _walkFolder(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
continue
@ -283,6 +304,7 @@ async def _walkFolder(
limits=limits,
result=result,
progressCb=progressCb,
dataSourceId=dataSourceId,
)
@ -301,7 +323,8 @@ async def _ingestOne(
revision: Optional[str],
limits: SharepointBootstrapLimits,
result: SharepointBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
progressCb: Optional[Any],
dataSourceId: str = "",
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -339,6 +362,7 @@ async def _ingestOne(
provenance: Dict[str, Any] = {
"connectionId": connectionId,
"dataSourceId": dataSourceId,
"authority": "msft",
"service": "sharepoint",
"externalItemId": externalItemId,

View file

@ -0,0 +1,78 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Resolve effective policies (neutralize, ragIndexEnabled) for DataSource tree hierarchies.
Tree-inheritance rule: nearest ancestor DataSource with an explicit value wins.
If no ancestor has a value, the default (False) is used.
"""
from __future__ import annotations
import logging
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
def resolveEffectiveNeutralize(
ds: Dict[str, Any],
allDataSources: List[Dict[str, Any]],
) -> bool:
"""Compute effective neutralize by walking up the path tree.
A DataSource at /sites/HR/Documents inherits from /sites/HR if
that ancestor has neutralize=True and the child has no explicit override.
"""
ownValue = ds.get("neutralize")
if ownValue is not None and ownValue is not False:
return True
if ownValue is False:
return False
return _findAncestorPolicy(ds, allDataSources, "neutralize")
def resolveEffectiveRagIndexEnabled(
ds: Dict[str, Any],
allDataSources: List[Dict[str, Any]],
) -> bool:
"""Compute effective ragIndexEnabled by walking up the path tree."""
ownValue = ds.get("ragIndexEnabled")
if ownValue is True:
return True
if ownValue is False:
return False
return _findAncestorPolicy(ds, allDataSources, "ragIndexEnabled")
def _findAncestorPolicy(
ds: Dict[str, Any],
allDataSources: List[Dict[str, Any]],
field: str,
) -> bool:
"""Walk ancestors (longest-prefix match) to find an inherited policy value."""
dsPath = ds.get("path", "")
connectionId = ds.get("connectionId", "")
if not dsPath:
return False
ancestors = []
for candidate in allDataSources:
if candidate.get("id") == ds.get("id"):
continue
if candidate.get("connectionId") != connectionId:
continue
candidatePath = candidate.get("path", "")
if not candidatePath:
continue
if dsPath.startswith(candidatePath) and len(candidatePath) < len(dsPath):
ancestors.append(candidate)
ancestors.sort(key=lambda a: len(a.get("path", "")), reverse=True)
for ancestor in ancestors:
val = ancestor.get(field)
if val is True:
return True
if val is False:
return False
return False

View file

@ -98,7 +98,8 @@ class WebService:
searchUrls = []
searchResultsWithContent = []
if needsSearch and (not allUrls or len(allUrls) < maxNumberPages):
self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content")
if operationId:
self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content")
try:
searchUrls, searchResultsWithContent = await self._performWebSearch(
@ -113,16 +114,14 @@ class WebService:
searchUrls = []
searchResultsWithContent = []
# Prioritize Tavily search URLs over AI-extracted URLs (they're more relevant)
if searchUrls:
# Prepend Tavily URLs to the list (they're more relevant)
allUrls = searchUrls + allUrls
logger.info(f"Using {len(searchUrls)} Tavily URLs + {len(allUrls) - len(searchUrls)} other URLs = {len(allUrls)} total")
else:
# If Tavily search failed, use AI-extracted URLs
logger.warning("Tavily search returned no URLs, using AI-extracted URLs only")
self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
if operationId:
self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
# If we have search results (even without content), use them directly instead of crawling
# Tavily search results are more relevant than generic AI-extracted URLs

View file

@ -144,6 +144,14 @@ NAVIGATION_SECTIONS = [
"path": "/automations",
"order": 30,
},
{
"id": "rag-inventory",
"objectKey": "ui.system.ragInventory",
"label": t("RAG-Inventar"),
"icon": "FaDatabase",
"path": "/rag-inventory",
"order": 35,
},
{
"id": "store",
"objectKey": "ui.system.store",

View file

@ -110,6 +110,9 @@ asyncpg==0.30.0
## Stripe payments
stripe>=11.0.0
## Outlook MSG file extraction
extract-msg>=0.55.0
## Geospatial libraries for STAC connector
pyproj>=3.6.0 # For coordinate transformations (EPSG:2056 <-> EPSG:4326)
shapely>=2.0.0 # For geometric operations (intersections, area calculations)

View file

@ -0,0 +1,88 @@
#!/usr/bin/env python3
"""Migration: Rename DataSource.autoSync -> ragIndexEnabled, lastSynced -> lastIndexed.
This is a one-off migration for the RAG consent & control unification.
Safe to run multiple times (checks column existence before acting).
Usage:
python script_db_migrate_datasource_rag.py [--dry-run]
"""
import os
import sys
import argparse
import logging
from pathlib import Path
scriptPath = Path(__file__).resolve()
gatewayPath = scriptPath.parent.parent
sys.path.insert(0, str(gatewayPath))
os.chdir(str(gatewayPath))
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
logger = logging.getLogger(__name__)
import psycopg2
from modules.shared.configuration import APP_CONFIG
def _getConnection():
return psycopg2.connect(
host=APP_CONFIG.get("DB_HOST", "localhost"),
port=int(APP_CONFIG.get("DB_PORT", "5432")),
database=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
user=APP_CONFIG.get("DB_USER"),
password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
)
def _columnExists(cur, table: str, column: str) -> bool:
cur.execute(
"""SELECT 1 FROM information_schema.columns
WHERE table_schema = 'public' AND table_name = %s AND column_name = %s""",
(table, column),
)
return cur.fetchone() is not None
def migrate(dryRun: bool = False):
conn = _getConnection()
conn.autocommit = False
cur = conn.cursor()
renames = [
("DataSource", "autoSync", "ragIndexEnabled"),
("DataSource", "lastSynced", "lastIndexed"),
]
executed = []
for table, oldCol, newCol in renames:
if _columnExists(cur, table, oldCol) and not _columnExists(cur, table, newCol):
sql = f'ALTER TABLE public."{table}" RENAME COLUMN "{oldCol}" TO "{newCol}";'
logger.info("EXEC: %s", sql)
if not dryRun:
cur.execute(sql)
executed.append(sql)
elif _columnExists(cur, table, newCol):
logger.info("SKIP: %s.%s already exists (migration already applied)", table, newCol)
elif not _columnExists(cur, table, oldCol):
logger.warning("SKIP: %s.%s does not exist (table schema may differ)", table, oldCol)
if not dryRun and executed:
conn.commit()
logger.info("Migration committed (%d statements)", len(executed))
elif dryRun and executed:
conn.rollback()
logger.info("DRY RUN — would execute %d statements", len(executed))
else:
logger.info("Nothing to do — schema already up to date")
cur.close()
conn.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--dry-run", action="store_true", help="Print SQL without executing")
args = parser.parse_args()
migrate(dryRun=args.dry_run)

View file

@ -0,0 +1,23 @@
# Copyright (c) 2025 Patrick Motsch
"""Unit tests for Google STT helper config (no API calls)."""
from modules.connectors.connectorVoiceGoogle import _buildPrimarySttRecognitionFields
def test_buildPrimaryStt_lightweight_stripsHeavyFeatures():
d = _buildPrimarySttRecognitionFields(model="latest_short", lightweight=True)
assert d["model"] == "latest_short"
assert d["enable_word_time_offsets"] is False
assert d["enable_word_confidence"] is False
assert d["max_alternatives"] == 1
assert d["use_enhanced"] is False
assert d["enable_automatic_punctuation"] is True
def test_buildPrimaryStt_full_matchesLegacyDefaults():
d = _buildPrimarySttRecognitionFields(model="latest_long", lightweight=False)
assert d["model"] == "latest_long"
assert d["enable_word_time_offsets"] is True
assert d["enable_word_confidence"] is True
assert d["max_alternatives"] == 3
assert d["use_enhanced"] is True

View file

@ -100,6 +100,9 @@ def _adapter(svc):
return SimpleNamespace(_svc=svc)
_DEFAULT_DS = [{"id": "ds-1", "neutralize": False}]
def test_bootstrap_walks_team_space_lists_and_tasks():
svc = _FakeClickupService(taskCount=2)
knowledge = _FakeKnowledgeService()
@ -108,6 +111,7 @@ def test_bootstrap_walks_team_space_lists_and_tasks():
async def _run():
return await bootstrapClickup(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
@ -126,10 +130,10 @@ def test_bootstrap_walks_team_space_lists_and_tasks():
assert job.mimeType == "application/vnd.clickup.task+json"
assert job.mandateId == "m1"
assert job.provenance["connectionId"] == "c1"
assert job.provenance["dataSourceId"] == "ds-1"
assert job.provenance["authority"] == "clickup"
assert job.provenance["teamId"] == "team-1"
assert job.contentVersion # numeric millisecond string
# At least the header content-object is present.
ids = [co["contentObjectId"] for co in job.contentObjects]
assert "header" in ids
@ -146,6 +150,7 @@ def test_bootstrap_reports_duplicates_on_second_run():
async def _run():
return await bootstrapClickup(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
@ -165,6 +170,7 @@ def test_bootstrap_skips_tasks_older_than_maxAgeDays():
async def _run():
return await bootstrapClickup(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
@ -185,6 +191,7 @@ def test_bootstrap_maxTasks_caps_ingestion():
async def _run():
return await bootstrapClickup(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
@ -195,9 +202,41 @@ def test_bootstrap_maxTasks_caps_ingestion():
assert result["indexed"] == 3
def test_bootstrap_skips_when_no_datasources():
async def _run():
return await bootstrapClickup(connectionId="c1")
result = asyncio.run(_run())
assert result["skipped"] is True
assert result["reason"] == "no_datasources"
def test_bootstrap_honours_datasource_neutralize():
svc = _FakeClickupService(taskCount=1)
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapClickup(
connectionId="c1",
dataSources=[{"id": "ds-n", "neutralize": True}],
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
limits=ClickupBootstrapLimits(maxAgeDays=None),
)
asyncio.run(_run())
for job in knowledge.calls:
assert job.neutralize is True
assert job.provenance["dataSourceId"] == "ds-n"
if __name__ == "__main__":
test_bootstrap_walks_team_space_lists_and_tasks()
test_bootstrap_reports_duplicates_on_second_run()
test_bootstrap_skips_tasks_older_than_maxAgeDays()
test_bootstrap_maxTasks_caps_ingestion()
test_bootstrap_skips_when_no_datasources()
test_bootstrap_honours_datasource_neutralize()
print("OK — bootstrapClickup tests passed")

View file

@ -119,6 +119,9 @@ def _fakeRunExtraction(data, name, mime, options):
)
_DEFAULT_DS = [{"id": "ds1", "path": "/", "neutralize": False}]
def test_bootstrap_walks_drive_and_subfolders():
adapter = _FakeDriveAdapter()
knowledge = _FakeKnowledgeService()
@ -127,6 +130,7 @@ def test_bootstrap_walks_drive_and_subfolders():
async def _run():
return await bootstrapGdrive(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
@ -160,6 +164,7 @@ def test_bootstrap_reports_duplicates_on_second_run():
async def _run():
return await bootstrapGdrive(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
@ -180,11 +185,11 @@ def test_bootstrap_skips_files_older_than_maxAgeDays():
async def _run():
return await bootstrapGdrive(
connectionId="c1",
dataSources=[{"id": "ds1", "path": "/", "neutralize": False, "maxAgeDays": 180}],
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
runExtractionFn=_fakeRunExtraction,
limits=GdriveBootstrapLimits(maxAgeDays=180),
)
result = asyncio.run(_run())
@ -200,6 +205,7 @@ def test_bootstrap_passes_connection_provenance():
async def _run():
return await bootstrapGdrive(
connectionId="c1",
dataSources=_DEFAULT_DS,
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
@ -212,14 +218,25 @@ def test_bootstrap_passes_connection_provenance():
assert job.sourceKind == "gdrive_item"
assert job.mandateId == "m1"
assert job.provenance["connectionId"] == "c1"
assert job.provenance["dataSourceId"] == "ds1"
assert job.provenance["authority"] == "google"
assert job.provenance["service"] == "drive"
assert job.contentVersion # modifiedTime ISO string
def test_bootstrap_skips_when_no_datasources():
async def _run():
return await bootstrapGdrive(connectionId="c1")
result = asyncio.run(_run())
assert result["skipped"] is True
assert result["reason"] == "no_datasources"
if __name__ == "__main__":
test_bootstrap_walks_drive_and_subfolders()
test_bootstrap_reports_duplicates_on_second_run()
test_bootstrap_skips_files_older_than_maxAgeDays()
test_bootstrap_passes_connection_provenance()
test_bootstrap_skips_when_no_datasources()
print("OK — bootstrapGdrive tests passed")

View file

@ -111,6 +111,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
async def _run():
return await bootstrapOutlook(
connectionId="c1",
dataSources=[{"id": "ds1", "neutralize": False}],
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
@ -129,6 +130,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
assert job.sourceKind == "outlook_message"
assert job.mimeType == "message/rfc822"
assert job.provenance["connectionId"] == "c1"
assert job.provenance["dataSourceId"] == "ds1"
assert job.provenance["service"] == "outlook"
assert job.contentVersion == "ck1"
assert any(co["contentObjectId"] == "header" for co in job.contentObjects)
@ -146,6 +148,7 @@ def test_bootstrap_outlook_follows_pagination():
async def _run():
return await bootstrapOutlook(
connectionId="c1",
dataSources=[{"id": "ds1", "neutralize": False}],
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
@ -171,6 +174,7 @@ def test_bootstrap_outlook_reports_duplicates():
async def _run():
return await bootstrapOutlook(
connectionId="c1",
dataSources=[{"id": "ds1", "neutralize": False}],
adapter=adapter,
connection=connection,
knowledgeService=knowledge,