Merge pull request #161 from valueonag/feat/demo-system-readieness

Feat/demo system readieness
2026-05-12 23:34:19 +02:00 · 2026-05-12 23:34:19 +02:00 · ab43b42aa9
commit ab43b42aa9
parent fac191fc77 e3284994d0
56 changed files with 2180 additions and 523 deletions
--- a/app.py
+++ b/app.py
@ -604,6 +604,9 @@ app.include_router(promptRouter)
 from modules.routes.routeDataConnections import router as connectionsRouter
 app.include_router(connectionsRouter)
 from modules.routes.routeRagInventory import router as ragInventoryRouter
 app.include_router(ragInventoryRouter)
 from modules.routes.routeTableViews import router as tableViewsRouter
 app.include_router(tableViewsRouter)
--- a/env-gateway-dev.env
+++ b/env-gateway-dev.env
@ -19,7 +19,7 @@ APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2Z
 APP_TOKEN_EXPIRY=300
 # CORS Configuration
-APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss
+APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
 # Logging configuration
 APP_LOGGING_LOG_LEVEL = DEBUG
@ -32,18 +32,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
 APP_LOGGING_BACKUP_COUNT = 5
 # OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP.
-Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
+Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
-Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
+Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kxaG9WY1FJaWdCbVFVaTllUlJfU3Y3MmJkRmkzMDVDWUNtZEhlNVhISzJPcy00ZUVZcklYLXFMV0dIODV3NXNSSFBKQ0ZsZllES3diTEgySDF0T1ZCbFZHREZtcXFGSWNZN1NJbzJzczRRQWxoeVNsNzlsa0VzMHJPWHUydjBBclo=
 Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback
-Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
+Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
-Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
+Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyUW96aXFVOVJlLUdyRlVvT1hVU09ILWtMZnV2M19mVUxGMnFPV3FzNTdQa3dTbHVGTDBHTk01ZThLcjh6QUR5VldVZUpfcDlZNTh5YldtLWtjTll6VzJNQ3JCQ3ZubHdmd2JvaExDOXdvQ1pjWDVQTUtFWVAtUHhwS1lFQnJXWk4=
 Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback
-Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
+Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
+Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyd1hPd09vcVFtbVg0Sm5Nd1VYVEEtWjZMZkFndmFVS0ZlcTU0dzJnYVYzRkZWbjh0QldyZkhseDV2cUgxYkNHTzF6MXhqQlZ2N0UtbmhPeWRKUHBVdzV0Q1ROaWNuN2xjMmVzMjNZQ2ZYZ3dOTHgxaU5sTGRjVHpfakhYeWF0ZGU=
 Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback
-Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
+Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
+Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kySXoyd1BmTnhOd1owTUJOWm53WlZMMjFHNGJhSUwyd2NDUW9BanlRWVJPLU5jYzRlcm5QeW96d0JYUkVWVWd2dGNBVEpJbElZY2lWb0o5S0gyNnhoV1pnNXhpSFEyaklZZjcwX2lVU0ktMEJGN01DMDhXQ3k4R1BXc1Q3ejFjOEg=
 Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug
 APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True
 APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync
 # Manadate Pre-Processing Servers
 PREPROCESS_ALTHAUS_CHAT_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGbEphQ3ZUMlFMQ2EwSGpoSE9NNzRJNTJtaGk1N0RGakdIYnVVeVFHZmF5OXB3QTVWLVNaZk9wNkhfQkZWRnVwRGRxem9iRzJIWXdpX1NIN2FwSExfT3c9PQ==
 # Preprocessor API Configuration
 PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
 PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
 # Azure Communication Services Email Configuration
 MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
 MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
--- a/env-gateway-int.env
+++ b/env-gateway-int.env
@ -21,7 +21,7 @@ APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZ
 APP_TOKEN_EXPIRY=300
 # CORS Configuration
-APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
+APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
 # Logging configuration
 APP_LOGGING_LOG_LEVEL = DEBUG
@ -34,18 +34,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
 APP_LOGGING_BACKUP_COUNT = 5
 # OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
-Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
+Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
-Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
+Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kydlVubld1d1h6SUNSWW1aZ3p4X3Zod1NDTjhZVnVYS2lqOERGTFp2OXJ4TGRiNlRLVFpzLUVDTUhkZGhGUWdxa1djdEV5UWkyblN1UHZoaFBjaExNTEpGMG1PRGJEbDdHVll0Ungwcl9JemZ4ZXFzZUNFQmFlZi1DZFlCekU1S3E=
 Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/login/callback
-Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
+Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
-Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
+Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyS1hWZXEzUzZTTE5MUlJncVowMU95Y0hmV1hveDBZOWdLU1RIUWt3SGlXNGxVTXVKc2QyQmtmWTlJRU43ZnRDdnlDTGxQY0hTU25CWWFFdDhUem9HU0VYcTFJTVFEbVk0dUhmVzJNVlEzNTNWdjdmaW9WeUVDVW5PRmNFZEQzNTY=
 Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback
-Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
+Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
+Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyUTUwNXNGaHRNaGxxbF9sdWJ3Q0xLYU5yOHB4Yk8zMDZvQ29yaEhWOE5JMENXRk5jb2ZBdzRKQ2ZTTld6ZlIxemhOYzN1VE10TjBDRWZEMXlLVWRNYjZ0VG5RZ3I3NWt0SEJzMzdsUmRzcVNmbktRNHZqTUF6a2EyUkVUSFJnZFE=
 Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback
-Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
+Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
+Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyV1FRVjF0c0d3d0dyWU1TdW9HdXVkdHdsVWZKYTJjbGZPRDhMRjA2M0FkaUZIVmhIUmFKNjg2ekFodHd6NG80VTI3TC1icW1LZ01jWVZuQ1pKRm5nMW5UREJEaGp2Wl9oRDRCSmZVT0JpTnkwXzgwY0pkV29yczQ5akF2d1ZGcVY=
 Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
 APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
 APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
 # Manadate Pre-Processing Servers
 PREPROCESS_ALTHAUS_CHAT_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4UkNBelhvckxCQUVjZm94N3BZUDcxaEMyckE2dm1lRVhqODhrWU1SUjNXZ3dQZlVJOWhveXFkZXpobW5xT0NneGZ2SkNUblFmYXd0WTBYNTl3UmRnSWc9PQ==
 # Preprocessor API Configuration
 PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
 PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
 # Azure Communication Services Email Configuration
 MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
 MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
--- a/env-gateway-prod-forgejo.env
+++ b/env-gateway-prod-forgejo.env
@ -32,19 +32,19 @@ APP_LOGGING_ROTATION_SIZE = 10485760
 APP_LOGGING_BACKUP_COUNT = 5
 # OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
-Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
+Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
-Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
+Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyeUZORDYxOFdlNHk1N25kV3pSQVJMUVFwLUFlMzlzQjQ1eVljOTlzX184RndsTmtTV1FjdWkyQlBiUkdCbGt5S2ltZjJxa2I2dHBMdnJqZnhFSnBCampHYjB3RG5URDM1YzZSLVd6TGdaRXRVcEdadE5zM2thNV9SZy1KZDdLSHY=
 Service_MSFT_AUTH_REDIRECT_URI=https://api.poweron.swiss/api/msft/auth/login/callback
-Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
+Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
-Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
+Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySk5uMmlWczBWTE00MHBIcWlBbVJmVmc3MlBWbDA1YTFaS3psZjVLd3d1X2FvRHV0X0c5blpLV0FpY05aMTJMMzUtcG8wakF2TlM3SGQ2VjFZM3JLT1MwTlZ0bm9BRlpkbHVPQTFNaXJvazlQRzN4M2ZZNEVhV1JHV190dWluSUk=
 Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback
-Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
+Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
+Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kybjVVZ0FldUE1NTJiY2U1N0I0aVU0Z2hfeWlYc2tTdmlxTS1NdGxsRnFHdjZVcW5RRHZkUFhzUTVyX2RaZHlrQThRdTdCRmVBelBOcDlsbFQyd19SZExuWEM5aTcwQ0FvY3ctMUlWU1pndDE0MkdzeTZZRHkwLWU3aW56LW1jS20=
-Service_GOOGLE_AUTH_REDIRECT_URI = 
+Service_GOOGLE_AUTH_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/login/callback
-Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
+Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
+Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyMnFma3VPOVJtTFFrNDRLN0NkWHY2dUZDWlJzdDVMd3p3N19IY0tWdURRRzExOGZCMjJOYmpKT1E0cTVwYlgtcVJINTY0anZPc1VoTW00cHl6NVh3ZHVTek1oT1RqWUhtamRkZ1dENWlwNTlZSU1oNWczeGdEOC1Gbk5XU2RBcmI=
-Service_GOOGLE_DATA_REDIRECT_URI = 
+Service_GOOGLE_DATA_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/connect/callback
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
 Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
@ -86,13 +86,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
 APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
 APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
 # Manadate Pre-Processing Servers
 PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
 # Preprocessor API Configuration
 PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
 PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
 # Azure Communication Services Email Configuration
 MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
 MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
--- a/env-gateway-prod.env
+++ b/env-gateway-prod.env
@ -20,7 +20,7 @@ APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUl
 APP_TOKEN_EXPIRY=300
 # CORS Configuration
-APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
+APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
 # Logging configuration
 APP_LOGGING_LOG_LEVEL = DEBUG
@ -33,18 +33,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
 APP_LOGGING_BACKUP_COUNT = 5
 # OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
-Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
+Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
-Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
+Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySFR2NjBKM084QTNpeUlyUmM4R0N0SU1BZ2x4MmVTZTVHQkVzRE9GdmFkV041MzhudFhobjU0RWNnd3lqeXpKUXA5aGtNZkhtYU12QjBtX0NjemVmdEZBdC1TbXVBSXJTcF9vMlJXd0ZNRTRKRFBMUXNjTF85eTBxakR4RVNfYmU=
 Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/login/callback
-Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
+Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
-Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
+Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyNVU4cVRIZFdjS3l2S1RJVTVlc1ozQ1liZXZDX1VwdFZQUzFtS0N6UWYyeGxkNGNmY1hoaWxEUDBXVU5QR2t3Vi1ZV1A2QkxqbnpobzJwOXdzYTBZaFZYdnNkeDE1VVl0bm4weHFiLXdON2gtZzAwMTkxNWRoZldFM2djSkNHVS0=
 Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback
-Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
+Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
+Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyUmJleVpTOF9OaFV3NGVfcWVBX2oxSjUwMWRGOFZRWFRIN1FZRzZ6U3VQMlg5a21RY1drTHh3U254LW4zM1A1cXQ1TTFWYlNoek9hSHJIeE4tbm1wU1lKRXlKNU5HVWI4VGZwTVE0VnJGaV8wZmNvdkVrMjJGeXdmZ3UyNmVXN1E=
 Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback
-Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
+Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
+Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyY2pxMDh0U0RqWERianBMTTNtSUZPSzhKUzh4S0RTenR2MmxnRDlvQzJjbDVTczRWLUJtVnhxWTE2MmUxQjJia2xJcVUzVlFlUnpma040NFdHRzVNRUt0OXR0c2JkTkRmQ1RIYllXbXFFaExIQWNycFVHbUxHbmtYOVhOVUV2MFY=
 Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
 APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
 APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
 # Manadate Pre-Processing Servers
 PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
 # Preprocessor API Configuration
 PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
 PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
 # Azure Communication Services Email Configuration
 MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
 MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
--- a/modules/connectors/connectorVoiceGoogle.py
+++ b/modules/connectors/connectorVoiceGoogle.py
@ -19,6 +19,30 @@ from modules.shared.voiceCatalog import getDefaultVoice as _catalogDefaultVoice
 logger = logging.getLogger(__name__)
 def _buildPrimarySttRecognitionFields(
    *,
    model: str,
    lightweight: bool,
 ) -> Dict[str, Any]:
    """Shared fields for batch + streaming primary RecognitionConfig."""
    base: Dict[str, Any] = {
        "enable_automatic_punctuation": True,
        "model": model,
    }
    if lightweight:
        base["enable_word_time_offsets"] = False
        base["enable_word_confidence"] = False
        base["max_alternatives"] = 1
        base["use_enhanced"] = False
    else:
        base["enable_word_time_offsets"] = True
        base["enable_word_confidence"] = True
        base["max_alternatives"] = 3
        base["use_enhanced"] = True
    return base
 # Gemini-TTS speaker IDs from voices.list use short names (e.g. "Kore") and require
 # SynthesisInput.prompt + VoiceSelectionParams.model_name (google-cloud-texttospeech >= 2.24.0).
 _GEMINI_TTS_DEFAULT_MODEL = "gemini-2.5-flash-tts"
@ -73,7 +97,10 @@ class ConnectorGoogleSpeech:
                           sampleRate: int = None, channels: int = None,
                           skipFallbacks: bool = False,
                           phraseHints: Optional[list] = None,
-                           alternativeLanguages: Optional[list] = None) -> Dict:
+                           alternativeLanguages: Optional[list] = None,
                           model: str = "latest_long",
                           lightweight: bool = False,
                           audioFormat: Optional[str] = None) -> Dict:
        """
        Convert speech to text using Google Cloud Speech-to-Text API.
@ -82,6 +109,9 @@ class ConnectorGoogleSpeech:
            language: Language code (e.g., 'de-DE', 'en-US')
            sample_rate: Audio sample rate (auto-detected if None)
            channels: Number of audio channels (auto-detected if None)
            model: Google recognition model (e.g. latest_long, latest_short)
            lightweight: If True, omit word timings/confidence, single alternative, no enhanced model
            audioFormat: If set (webm_opus, linear16, mp3, flac, wav), skip auto-detection
        Returns:
            Dict containing transcribed text, confidence, and metadata
@ -92,8 +122,24 @@ class ConnectorGoogleSpeech:
                logger.warning(f"Invalid sampleRate={sampleRate}, treating as unknown for auto-detection")
                sampleRate = None
-            # Auto-detect audio format if not provided
+            explicitFormat = (audioFormat or "").strip().lower() or None
-            if sampleRate is None or channels is None:
+            if explicitFormat:
                if channels is None:
                    channels = 1
                if sampleRate is None:
                    if explicitFormat == "webm_opus":
                        sampleRate = 48000
                    elif explicitFormat == "linear16":
                        sampleRate = 16000
                    elif explicitFormat in ("mp3", "flac"):
                        sampleRate = 44100
                    elif explicitFormat == "wav":
                        sampleRate = 16000
                    else:
                        sampleRate = 16000
                audioFormat = explicitFormat
                logger.info(f"STT explicit format: {audioFormat}, {sampleRate}Hz, {channels}ch")
            elif sampleRate is None or channels is None:
                validation = self.validateAudioFormat(audioContent)
                if not validation["valid"]:
                    return {
@ -156,12 +202,7 @@ class ConnectorGoogleSpeech:
                "encoding": encoding,
                "audio_channel_count": channels,
                "language_code": language,
-                "enable_automatic_punctuation": True,
+                **_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
                "model": "latest_long",
                "enable_word_time_offsets": True,
                "enable_word_confidence": True,
                "max_alternatives": 3,
                "use_enhanced": True,
            }
            if phraseHints:
@ -205,8 +246,7 @@ class ConnectorGoogleSpeech:
                        sample_rate_hertz=16000,
                        audio_channel_count=1,
                        language_code=language,
-                        enable_automatic_punctuation=True,
+                        **_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
                        model="latest_long"
                    )
                    try:
                        response = await asyncio.to_thread(
@ -343,7 +383,7 @@ class ConnectorGoogleSpeech:
                            "error": "No recognition results (silence or unclear audio)"
                        }
-                models = ["latest_long", "phone_call", "latest_short"]
+                models = list(dict.fromkeys([model, "latest_long", "phone_call", "latest_short"]))
                for fallback_config in fallback_configs:
                    for model in models:
@ -419,6 +459,9 @@ class ConnectorGoogleSpeech:
        audioQueue: asyncio.Queue,
        language: str = "de-DE",
        phraseHints: Optional[list] = None,
        model: str = "latest_long",
        lightweight: bool = False,
        singleUtterance: bool = False,
    ) -> AsyncGenerator[Dict[str, Any], None]:
        """
        Stream audio chunks to Google Cloud Speech-to-Text Streaming API.
@ -429,9 +472,13 @@ class ConnectorGoogleSpeech:
                        Send (b"", True) to signal end of stream.
            language: Language code
            phraseHints: Optional boost phrases
            model: Google recognition model (e.g. latest_long, latest_short)
            lightweight: If True, use non-enhanced primary config (lower latency)
            singleUtterance: If True, end stream after first utterance (client should reconnect)
        Yields:
-            Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec
+            Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec;
            optionally endOfSingleUtterance, reconnectRequired
        """
        STREAM_LIMIT_SEC = 290
        streamStartTs = time.time()
@ -442,9 +489,7 @@ class ConnectorGoogleSpeech:
            "sample_rate_hertz": 48000,
            "audio_channel_count": 1,
            "language_code": language,
-            "enable_automatic_punctuation": True,
+            **_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
            "model": "latest_long",
            "use_enhanced": True,
        }
        if phraseHints:
            configParams["speech_contexts"] = [speech.SpeechContext(phrases=phraseHints, boost=15.0)]
@ -453,7 +498,7 @@ class ConnectorGoogleSpeech:
        streamingConfig = speech.StreamingRecognitionConfig(
            config=recognitionConfig,
            interim_results=True,
-            single_utterance=False,
+            single_utterance=singleUtterance,
        )
        import queue as threadQueue
@ -490,7 +535,22 @@ class ConnectorGoogleSpeech:
                )
                for response in responseStream:
                    elapsed = time.time() - streamStartTs
-                    estimatedDurationSec = totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0
+
                    durationFromResults = 0.0
                    for result in response.results:
                        rt = getattr(result, "result_end_time", None)
                        if rt is None:
                            continue
                        if hasattr(rt, "total_seconds"):
                            durationFromResults = max(durationFromResults, float(rt.total_seconds()))
                        else:
                            durationFromResults = max(
                                durationFromResults,
                                float(getattr(rt, "seconds", 0)) + float(getattr(rt, "nanos", 0)) * 1e-9,
                            )
                    estimatedDurationSec = durationFromResults if durationFromResults > 0 else (
                        totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0.0
                    )
                    finalTexts = []
                    interimTexts = []
@ -524,6 +584,13 @@ class ConnectorGoogleSpeech:
                            "stabilityScore": 0.0,
                            "audioDurationSec": estimatedDurationSec,
                        }), loop)
                    speechEvt = getattr(response, "speech_event_type", None)
                    if speechEvt and "END_OF_SINGLE_UTTERANCE" in str(speechEvt):
                        asyncio.run_coroutine_threadsafe(resultOutQ.put({
                            "endOfSingleUtterance": True,
                            "audioDurationSec": estimatedDurationSec,
                        }), loop)
                    if elapsed >= STREAM_LIMIT_SEC:
                        logger.info("Streaming STT approaching 5-min limit, client should reconnect")
                        asyncio.run_coroutine_threadsafe(resultOutQ.put({
--- a/modules/datamodels/datamodelDataSource.py
+++ b/modules/datamodels/datamodelDataSource.py
@ -62,15 +62,15 @@ class DataSource(PowerOnModel):
        description="Owner user ID",
        json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "UserInDB", "labelField": "username"}},
    )
-    autoSync: bool = Field(
+    ragIndexEnabled: bool = Field(
        default=False,
-        description="Automatically sync on schedule",
+        description="When true this tree element is indexed into the RAG knowledge store",
-        json_schema_extra={"label": "Auto-Sync"},
+        json_schema_extra={"label": "Im RAG indexieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
    )
-    lastSynced: Optional[float] = Field(
+    lastIndexed: Optional[float] = Field(
        default=None,
-        description="Last sync timestamp",
+        description="Timestamp of last successful RAG indexing run",
-        json_schema_extra={"label": "Letzter Sync", "frontend_type": "timestamp"},
+        json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"},
    )
    scope: str = Field(
        default="personal",
--- a/modules/datamodels/datamodelUam.py
+++ b/modules/datamodels/datamodelUam.py
@ -484,10 +484,10 @@ class UserConnection(PowerOnModel):
        default=None,
        description=(
            "Per-connection knowledge ingestion preferences. schemaVersion=1 keys: "
-            "neutralizeBeforeEmbed (bool), mailContentDepth (metadata|snippet|full), "
+            "mailContentDepth (metadata|snippet|full), mailIndexAttachments (bool), "
-            "mailIndexAttachments (bool), filesIndexBinaries (bool), mimeAllowlist (list[str]), "
+            "filesIndexBinaries (bool), clickupScope (titles|title_description|with_comments), "
-            "clickupScope (titles|title_description|with_comments), "
+            "clickupIndexAttachments (bool), maxAgeDays (int). "
-            "surfaceToggles (dict per authority), maxAgeDays (int)."
+            "Neutralization is controlled per DataSource.neutralize (not here)."
        ),
        json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False, "label": "Wissenspräferenzen"},
    )
--- a/modules/features/commcoach/serviceCommcoach.py
+++ b/modules/features/commcoach/serviceCommcoach.py
@ -1080,6 +1080,8 @@ class CommcoachService:
            audioContent=audioContent,
            language=language,
            skipFallbacks=True,
            model="latest_short",
            lightweight=True,
        )
        transcribedText = ""
--- a/modules/features/teamsbot/browserBotConnector.py
+++ b/modules/features/teamsbot/browserBotConnector.py
@ -40,6 +40,8 @@ class BrowserBotConnector:
        botAccountPassword: Optional[str] = None,
        transferMode: str = "auto",
        debugMode: bool = False,
        avatarMediaData: Optional[str] = None,
        avatarMediaType: Optional[str] = None,
    ) -> Dict[str, Any]:
        """
        Send join command to the Browser Bot service.
@ -79,12 +81,16 @@ class BrowserBotConnector:
            "debugMode": debugMode,
        }
        # Add authenticated join credentials if configured
        if botAccountEmail and botAccountPassword:
            payload["botAccountEmail"] = botAccountEmail
            payload["botAccountPassword"] = botAccountPassword
            logger.info(f"Bot will join authenticated as {botAccountEmail}")
        if avatarMediaData and avatarMediaType:
            payload["avatarMediaData"] = avatarMediaData
            payload["avatarMediaType"] = avatarMediaType
            logger.info(f"Avatar media attached: {avatarMediaType}, {len(avatarMediaData)} chars")
        try:
            async with aiohttp.ClientSession(timeout=_BOT_TIMEOUT) as session:
                async with session.post(f"{self.botUrl}/api/bot", json=payload) as resp:
--- a/modules/features/teamsbot/datamodelTeamsbot.py
+++ b/modules/features/teamsbot/datamodelTeamsbot.py
@ -111,6 +111,18 @@ class TeamsbotMeetingModule(PowerOnModel):
    defaultDirectorPrompts: Optional[str] = Field(default=None, description="JSON list of default director prompts")
    goals: Optional[str] = Field(default=None, description="Free-text goals")
    kpiTargets: Optional[str] = Field(default=None, description="JSON object with structured KPI targets")
    defaultMeetingLink: Optional[str] = Field(
        default=None,
        description="Default Teams meeting URL for new sessions in this module (user can override)",
    )
    defaultBotName: Optional[str] = Field(
        default=None,
        description="Default display name for the bot when starting a session from this module",
    )
    defaultAvatarFileId: Optional[str] = Field(
        default=None,
        description="FileItem ID for the default avatar image/video shown in the meeting",
    )
    status: TeamsbotModuleStatus = Field(default=TeamsbotModuleStatus.ACTIVE)
@ -217,6 +229,7 @@ class TeamsbotUserSettings(PowerOnModel):
    triggerCooldownSeconds: Optional[int] = Field(default=None, description="Trigger cooldown override")
    contextWindowSegments: Optional[int] = Field(default=None, description="Context window override")
    debugMode: Optional[bool] = Field(default=None, description="Debug mode override")
    avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video override")
 # ============================================================================
@ -240,6 +253,7 @@ class TeamsbotConfig(BaseModel):
    triggerCooldownSeconds: int = Field(default=3, ge=1, le=30, description="Minimum seconds between AI calls")
    contextWindowSegments: int = Field(default=20, ge=5, le=100, description="Number of transcript segments to include in AI context")
    debugMode: bool = Field(default=False, description="Enable debug mode: screenshots at every join step for diagnostics")
    avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video shown in the meeting")
    def _getEffectiveBrowserBotUrl(self) -> Optional[str]:
        """Resolve the effective browser bot URL: per-instance config takes priority, then env variable."""
@ -257,6 +271,7 @@ class TeamsbotStartSessionRequest(BaseModel):
    """Request to start a new Teams Bot session."""
    meetingLink: str = Field(description="Teams meeting join link (e.g., https://teams.microsoft.com/l/meetup-join/...)")
    botName: Optional[str] = Field(default=None, description="Override bot name for this session")
    moduleId: Optional[str] = Field(default=None, description="Optional MeetingModule to attach this session to")
    connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access")
    joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.")
    sessionContext: Optional[str] = Field(default=None, description="Custom context/knowledge to provide to the bot for this session (e.g. meeting agenda, documents, background info)")
@ -277,6 +292,9 @@ class CreateMeetingModuleRequest(BaseModel):
    defaultDirectorPrompts: Optional[str] = None
    goals: Optional[str] = None
    kpiTargets: Optional[str] = None
    defaultMeetingLink: Optional[str] = None
    defaultBotName: Optional[str] = None
    defaultAvatarFileId: Optional[str] = None
 class UpdateMeetingModuleRequest(BaseModel):
@ -287,6 +305,9 @@ class UpdateMeetingModuleRequest(BaseModel):
    defaultDirectorPrompts: Optional[str] = None
    goals: Optional[str] = None
    kpiTargets: Optional[str] = None
    defaultMeetingLink: Optional[str] = None
    defaultBotName: Optional[str] = None
    defaultAvatarFileId: Optional[str] = None
    status: Optional[TeamsbotModuleStatus] = None
@ -304,6 +325,7 @@ class TeamsbotConfigUpdateRequest(BaseModel):
    triggerCooldownSeconds: Optional[int] = None
    contextWindowSegments: Optional[int] = None
    debugMode: Optional[bool] = None
    avatarFileId: Optional[str] = None
 # ============================================================================
--- a/modules/features/teamsbot/interfaceFeatureTeamsbot.py
+++ b/modules/features/teamsbot/interfaceFeatureTeamsbot.py
@ -25,6 +25,7 @@ from .datamodelTeamsbot import (
    TeamsbotDirectorPromptStatus,
    TeamsbotDirectorPromptMode,
    TeamsbotMeetingModule,
    TeamsbotModuleStatus,
 )
 logger = logging.getLogger(__name__)
@ -338,6 +339,8 @@ class TeamsbotObjects:
    def getModules(self, instanceId: str) -> List[Dict[str, Any]]:
        """Get all meeting modules for a feature instance."""
        records = self.db.getRecordset(TeamsbotMeetingModule, recordFilter={"instanceId": instanceId})
        for r in records:
            r.setdefault("status", TeamsbotModuleStatus.ACTIVE.value)
        records.sort(key=lambda r: r.get("sysCreatedAt") or "", reverse=True)
        return records
--- a/modules/features/teamsbot/mainTeamsbot.py
+++ b/modules/features/teamsbot/mainTeamsbot.py
@ -290,6 +290,19 @@ def _runMigrations():
        migrated = False
        # M2: MeetingModule default meeting link / bot name (additive columns)
        if _tableExists("TeamsbotMeetingModule"):
            for col, sqlType in (
                ("defaultMeetingLink", "TEXT"),
                ("defaultBotName", "TEXT"),
            ):
                if not _columnExists("TeamsbotMeetingModule", col):
                    cur.execute(
                        f'ALTER TABLE "TeamsbotMeetingModule" ADD COLUMN "{col}" {sqlType} NULL',
                    )
                    logger.info(f"Migration M2: Added TeamsbotMeetingModule.{col}")
                    migrated = True
        # M1: Create default Adhoc modules for orphaned sessions
        #     (only runs if TeamsbotSession table exists with moduleId column
        #      and there are sessions without a moduleId)
--- a/modules/features/teamsbot/routeFeatureTeamsbot.py
+++ b/modules/features/teamsbot/routeFeatureTeamsbot.py
@ -40,6 +40,7 @@ from .datamodelTeamsbot import (
    TeamsbotDirectorPromptMode,
    TeamsbotDirectorPromptStatus,
    TeamsbotMeetingModule,
    TeamsbotModuleStatus,
    CreateMeetingModuleRequest,
    UpdateMeetingModuleRequest,
    DIRECTOR_PROMPT_FILE_LIMIT,
@ -203,6 +204,7 @@ async def createModule(
    data["instanceId"] = instanceId
    data["mandateId"] = mandateId
    data["ownerUserId"] = str(context.user.id)
    data.setdefault("status", TeamsbotModuleStatus.ACTIVE.value)
    module = interface.createModule(data)
    return {"module": module}
@ -281,6 +283,11 @@ async def startSession(
    interface = _getInterface(context, instanceId)
    config = _getInstanceConfig(instanceId)
    if body.moduleId:
        mod = interface.getModule(body.moduleId)
        if not mod or str(mod.get("instanceId") or "") != str(instanceId):
            raise HTTPException(status_code=400, detail="Invalid moduleId for this instance")
    # Extract and validate meeting URL from user input (handles SafeLinks, invitation text, etc.)
    cleanMeetingUrl = _extractTeamsMeetingUrl(body.meetingLink)
@ -288,6 +295,7 @@ async def startSession(
    sessionData = TeamsbotSession(
        instanceId=instanceId,
        mandateId=mandateId,
        moduleId=body.moduleId,
        meetingLink=cleanMeetingUrl,
        botName=body.botName or config.botName,
        sessionContext=body.sessionContext,
@ -426,6 +434,54 @@ async def listSessions(
    return {"sessions": sessions}
@router.get("/{instanceId}/dashboard/stream")
@limiter.limit("60/minute")
 async def streamDashboard(
    request: Request,
    instanceId: str,
    context: RequestContext = Depends(getRequestContext),
 ):
    """
    SSE channel for the Teamsbot dashboard: repeated snapshots of sessions and meeting modules.
    Push interval: 3s while any own session is pending/joining/active, otherwise 20s.
    Same session visibility rules as GET /sessions (own sessions unless platform admin).
    """
    _validateInstanceAccess(instanceId, context)
    interface = _getInterface(context, instanceId)
    userId = None if context.isPlatformAdmin else str(context.user.id)
    activeStatuses = {
        TeamsbotSessionStatus.PENDING.value,
        TeamsbotSessionStatus.JOINING.value,
        TeamsbotSessionStatus.ACTIVE.value,
    }
    async def eventGenerator():
        while True:
            sessionRows = []
            try:
                sessionRows = interface.getSessions(instanceId, includeEnded=True, userId=userId)
                moduleRows = interface.getModules(instanceId)
                payload = {"type": "dashboardState", "sessions": sessionRows, "modules": moduleRows}
                yield f"data: {json.dumps(payload, default=str)}\n\n"
            except asyncio.CancelledError:
                raise
            except Exception as ex:
                logger.warning("dashboard stream tick failed: %s", ex)
                yield f"data: {json.dumps({'type': 'error', 'message': 'dashboard_tick_failed'})}\n\n"
            hasActive = any((s.get("status") in activeStatuses) for s in sessionRows)
            await asyncio.sleep(3.0 if hasActive else 20.0)
    return StreamingResponse(
        eventGenerator(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",
        },
    )
@router.get("/{instanceId}/sessions/{sessionId}")
@limiter.limit("30/minute")
 async def getSession(
@ -634,12 +690,10 @@ def _getEffectiveConfig(instanceId: str, userId: str, interface) -> TeamsbotConf
    if not userSettings:
        return baseConfig
-    # Merge: user settings override instance defaults (only non-None values)
+    # Merge: user settings override instance defaults (only non-None values).
    # Derive mergeable fields from TeamsbotConfig so new fields are picked up automatically.
    overrides = {}
-    for field in ["botName", "aiSystemPrompt", "responseMode",
+    for field in TeamsbotConfig.model_fields:
                  "responseChannel", "transferMode", "language", "voiceId",
                  "triggerIntervalSeconds", "triggerCooldownSeconds", "contextWindowSegments",
                  "debugMode"]:
        value = userSettings.get(field)
        if value is not None:
            overrides[field] = value
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@ -83,10 +83,10 @@ _EPHEMERAL_PHRASE_INTENTS: Dict[str, str] = {
    ),
    "agentRound": (
        "One short sentence (max ~14 words) the assistant says BETWEEN rounds "
-        "of a longer agent task to signal that work is still in progress. "
+        "of a longer agent task to update the audience on what it is doing. "
-        "Include the placeholder tokens '{round}' and '{maxRounds}' so the "
+        "Include the placeholder token '{activity}' which will be filled with "
-        "caller can substitute the actual numbers — e.g. 'Step {round} of "
+        "the current activity — e.g. 'I am {activity}, one moment...' or "
-        "{maxRounds}, still working.'"
+        "'Currently {activity}, almost there...'. Do NOT include step numbers."
    ),
 }
@ -602,6 +602,13 @@ class TeamsbotService:
        self._lastTranscriptText: Optional[str] = None
        self._lastTranscriptId: Optional[str] = None
        self._lastSttTime: float = 0.0
        # Audio chunk aggregation: collect chunks and send to STT only
        # after a speech pause or when the buffer reaches a target duration.
        self._audioBuffer: bytes = b""
        self._audioBufferStartTime: float = 0.0
        self._audioBufferLastChunkTime: float = 0.0
        self._audioBufferSampleRate: int = 16000
        self._lastBotResponseText: Optional[str] = None
        self._lastBotResponseTs: float = 0.0
@ -732,6 +739,12 @@ class TeamsbotService:
            hasAuth = bool(botAccountEmail and botAccountPassword)
            logger.info(f"Joining meeting for session {sessionId}: auth={hasAuth}, email={botAccountEmail or 'N/A'}, transferMode={self.config.transferMode}")
            avatarMediaData = None
            avatarMediaType = None
            avatarFileId = self._resolveAvatarFileId(session, interface)
            if avatarFileId:
                avatarMediaData, avatarMediaType = self._loadAvatarFileData(avatarFileId, interface)
            result = await self.browserBotConnector.joinMeeting(
                sessionId=sessionId,
                meetingUrl=meetingLink,
@ -743,6 +756,8 @@ class TeamsbotService:
                botAccountPassword=botAccountPassword,
                transferMode=self.config.transferMode if hasattr(self.config, 'transferMode') else "auto",
                debugMode=self.config.debugMode if hasattr(self.config, 'debugMode') else False,
                avatarMediaData=avatarMediaData,
                avatarMediaType=avatarMediaType,
            )
            if result.get("success"):
@ -767,6 +782,37 @@ class TeamsbotService:
            })
            await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)})
    def _resolveAvatarFileId(self, session, interface):
        """Resolve avatarFileId: module override > config default."""
        moduleId = session.get("moduleId")
        if moduleId:
            module = interface.getModule(moduleId)
            if module and module.get("defaultAvatarFileId"):
                return module["defaultAvatarFileId"]
        return getattr(self.config, "avatarFileId", None)
    def _loadAvatarFileData(self, fileId, _teamsbotInterface):
        """Load avatar file as base64 data + mime type. Returns (data, mimeType) or (None, None)."""
        import base64
        from modules.interfaces import interfaceDbManagement
        try:
            mgmt = interfaceDbManagement.getInterface(self.currentUser, self.mandateId)
            fileRecord = mgmt.getFile(fileId)
            if not fileRecord:
                logger.warning(f"Avatar file {fileId} not found")
                return None, None
            mimeType = getattr(fileRecord, "mimeType", None) or "image/png"
            rawBytes = mgmt.getFileData(fileId)
            if not rawBytes:
                logger.warning(f"Avatar file {fileId} has no data")
                return None, None
            b64 = base64.b64encode(rawBytes).decode("ascii")
            logger.info(f"Avatar file loaded: {fileId}, {mimeType}, {len(b64)} chars base64")
            return b64, mimeType
        except Exception as e:
            logger.error(f"Failed to load avatar file {fileId}: {e}")
            return None, None
    async def leaveMeeting(self, sessionId: str):
        """Send leave command to the Browser Bot service."""
        from . import interfaceFeatureTeamsbot as interfaceDb
@ -1164,6 +1210,14 @@ class TeamsbotService:
        interface.updateSession(sessionId, updates)
        await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
        # Flush remaining audio buffer before generating summary
        if dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
            if self._audioBuffer:
                logger.info(f"[AudioChunk] Flushing remaining buffer on session end ({len(self._audioBuffer)} bytes)")
                self._audioBuffer = b""
                self._audioBufferStartTime = 0.0
                self._audioBufferLastChunkTime = 0.0
        # Generate summary when session ends
        if dbStatus == TeamsbotSessionStatus.ENDED.value:
            asyncio.create_task(self._generateMeetingSummary(sessionId))
@ -1178,11 +1232,18 @@ class TeamsbotService:
        voiceInterface,
        websocket: WebSocket,
    ):
-        """Process an audio chunk from WebRTC capture — run STT and feed into transcript pipeline."""
+        """Process an audio chunk from WebRTC capture. The bot-side VAD
        (AudioWorklet / ScriptProcessor) already segments speech into 1-8s
        voiced chunks. Here we apply a minimum-duration safety net: very short
        chunks (<1s) are buffered until they reach 1s; everything else goes
        straight to STT. A wall-clock timeout flushes stale buffers."""
        import base64
        _MIN_CHUNK_SEC = 1.0
        _STALE_TIMEOUT_SEC = 3.0
        try:
            audioBytes = base64.b64decode(audioBase64)
-            if len(audioBytes) < 1000:
+            if len(audioBytes) < 500:
                return
            if captureDiagnostics:
@ -1195,14 +1256,12 @@ class TeamsbotService:
                    f"rms={rms}, nativeRate={nativeSampleRate}, bytes={len(audioBytes)}"
                )
-            # Use RMS from capture diagnostics to skip real silence.
+            isSilent = False
            # Byte-variation heuristics produced false positives and dropped valid speech.
            if captureDiagnostics and captureDiagnostics.get("rms") is not None:
                try:
                    rmsVal = float(captureDiagnostics.get("rms"))
                    if rmsVal < 0.0003:
-                        logger.debug(f"[AudioChunk] Skipping silent audio ({len(audioBytes)} bytes, rms={rmsVal:.6f})")
+                        isSilent = True
                        return
                except Exception:
                    pass
@ -1210,21 +1269,51 @@ class TeamsbotService:
                logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}")
                return
-            # Treat sampleRate=0 as unknown (triggers auto-detection)
+            now = time.time()
-            effectiveSampleRate = sampleRate if sampleRate and sampleRate > 0 else None
+            effectiveRate = sampleRate if sampleRate and sampleRate > 0 else 16000
            if not isSilent:
                if not self._audioBuffer:
                    self._audioBufferStartTime = now
                self._audioBuffer += audioBytes
                self._audioBufferLastChunkTime = now
                self._audioBufferSampleRate = effectiveRate
            bufferDuration = len(self._audioBuffer) / (effectiveRate * 2) if self._audioBuffer else 0.0
            bufferAge = (now - self._audioBufferStartTime) if self._audioBuffer else 0.0
            shouldFlush = (
                self._audioBuffer
                and (
                    bufferDuration >= _MIN_CHUNK_SEC
                    or (bufferAge >= _STALE_TIMEOUT_SEC and bufferDuration > 0.3)
                )
            )
            if not shouldFlush:
                return
            flushBytes = self._audioBuffer
            flushRate = self._audioBufferSampleRate
            self._audioBuffer = b""
            self._audioBufferStartTime = 0.0
            self._audioBufferLastChunkTime = 0.0
            flushDuration = len(flushBytes) / (flushRate * 2)
            logger.info(f"[AudioChunk] Flushing buffer: {len(flushBytes)} bytes, {flushDuration:.1f}s, {flushRate}Hz")
            phraseHints = list(self._knownSpeakers)
            if self.config.botName:
                phraseHints.append(self.config.botName)
            sttResult = await voiceInterface.speechToText(
-                audioContent=audioBytes,
+                audioContent=flushBytes,
                language=self.config.language or "de-DE",
-                sampleRate=effectiveSampleRate,
+                sampleRate=flushRate,
                channels=1,
                skipFallbacks=True,
                phraseHints=phraseHints if phraseHints else None,
-                alternativeLanguages=["en-US"],
+                audioFormat="linear16",
            )
            if sttResult and sttResult.get("success") and sttResult.get("text"):
@ -1252,19 +1341,18 @@ class TeamsbotService:
    def _registerSpeakerHint(self, speaker: str, text: str, sessionId: str = ""):
        """Track current speaker from captions for STT attribution.
-        When the first non-bot caption arrives, retroactively attributes
+        Retroactively attributes any unattributed STT segments whenever a
-        any STT segments that were created before a speaker was known."""
+        new non-bot caption speaker arrives (not just the first time)."""
        if not speaker:
            return
        normalizedSpeaker = speaker.strip()
        if not normalizedSpeaker or self._isBotSpeaker(normalizedSpeaker):
            return
        prevSpeaker = self._lastCaptionSpeaker
        self._lastCaptionSpeaker = normalizedSpeaker
        self._knownSpeakers.add(normalizedSpeaker)
-        if prevSpeaker is None and self._unattributedTranscriptIds:
+        if self._unattributedTranscriptIds:
            from . import interfaceFeatureTeamsbot as interfaceDb
            interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
            for tid in self._unattributedTranscriptIds:
@ -3243,17 +3331,53 @@ class TeamsbotService:
        return await self._pickEphemeralPhrase("agentBusy")
    async def _interimAgentRoundMessage(
-        self, roundNum: int, maxRounds: int
+        self, lastToolLabel: Optional[str] = None
    ) -> Optional[str]:
        """Per-round progress notice for long agent runs (meeting voice /
-        chat, ephemeral). Phrasing is AI-localised once per session;
+        chat, ephemeral). Generates a single short phrase in the bot's
-        ``{round}`` and ``{maxRounds}`` placeholders are substituted at
+        configured language that describes the current activity. Unlike
-        render time. Returns ``None`` if generation failed."""
+        the cached ephemeral phrases, this is a per-call AI generation
-        return await self._pickEphemeralPhrase(
+        to avoid mixing English displayLabels into non-English speech."""
-            "agentRound",
+        targetLang = (self.config.language or "").strip() or "en-US"
-            substitutions={"round": roundNum, "maxRounds": maxRounds},
+        botName = (self.config.botName or "the assistant").strip()
        activityHint = lastToolLabel or "working on the task"
        prompt = (
            f"You are a meeting assistant named '{botName}'.\n"
            f"Target spoken language (BCP-47): {targetLang}\n\n"
            f"The assistant is currently busy with: {activityHint}\n\n"
            f"Generate ONE short sentence (max 12 words) in {targetLang} "
            f"that tells the audience what the assistant is doing right now. "
            f"Natural, spoken style. No step numbers. No quotes around the output.\n"
            f"Output ONLY the sentence, nothing else."
        )
        try:
            aiService = createAiService(
                self.currentUser, self.mandateId, self.instanceId
            )
            await aiService.ensureAiObjectsInitialized()
            request = AiCallRequest(
                prompt=prompt,
                context="",
                options=AiCallOptions(
                    operationType=OperationTypeEnum.DATA_ANALYSE,
                    priority=PriorityEnum.SPEED,
                ),
            )
            response = await aiService.callAi(request)
        except Exception as aiErr:
            logger.debug(f"Agent round phrase generation failed: {aiErr}")
            return None
        if not response or response.errorCount != 0 or not response.content:
            return None
        result = response.content.strip().strip('"').strip("'")
        if len(result) > 200:
            result = result[:200]
        return result
    async def _notifyMeetingEphemeral(self, sessionId: str, text: str) -> None:
        """Deliver a short line to the meeting (TTS + chat per config) without
        persisting botResponses/transcripts, so the main agent answer stays the
@ -3370,6 +3494,7 @@ class TeamsbotService:
        finalText: str = ""
        rounds = 0
        lastToolLabel: Optional[str] = None
        try:
            async for event in agentService.runAgent(
                prompt=taskText,
@ -3390,11 +3515,9 @@ class TeamsbotService:
                        "round": roundNum,
                        "maxRounds": maxR,
                    })
                    # Runde 1: schon allgemeiner Start-Hinweis; ab Runde 2 ins Meeting melden.
                    # Director prompts bleiben still — keine Zwischen-Updates ins Meeting.
                    if roundNum >= 2 and not directorPromptMode:
                        try:
-                            roundText = await self._interimAgentRoundMessage(roundNum, maxR)
+                            roundText = await self._interimAgentRoundMessage(lastToolLabel)
                            if roundText:
                                await self._notifyMeetingEphemeral(sessionId, roundText)
                        except Exception as roundNoticeErr:
@ -3402,12 +3525,26 @@ class TeamsbotService:
                                f"Session {sessionId}: Per-round agent notice failed: {roundNoticeErr}"
                            )
                elif event.type == AgentEventTypeEnum.TOOL_CALL:
-                    toolName = (event.data or {}).get("toolName") if event.data else None
+                    evtData = event.data or {}
                    toolName = evtData.get("toolName")
                    lastToolLabel = evtData.get("displayLabel")
                    await _emitSessionEvent(sessionId, "agentRun", {
                        "source": sourceLabel,
                        "promptId": promptId,
                        "status": "toolCall",
                        "toolName": toolName,
                        "displayLabel": lastToolLabel,
                    })
                elif event.type == AgentEventTypeEnum.TOOL_RESULT:
                    evtData = event.data or {}
                    resultSnippet = (evtData.get("data") or "")[:200]
                    await _emitSessionEvent(sessionId, "agentRun", {
                        "source": sourceLabel,
                        "promptId": promptId,
                        "status": "toolResult",
                        "toolName": evtData.get("toolName", ""),
                        "success": evtData.get("success", True),
                        "summary": resultSnippet,
                    })
                elif event.type == AgentEventTypeEnum.FILE_CREATED:
                    await _emitSessionEvent(sessionId, "documentCreated", event.data or {})
--- a/modules/features/workspace/mainWorkspace.py
+++ b/modules/features/workspace/mainWorkspace.py
@ -33,11 +33,6 @@ UI_OBJECTS = [
        "label": t("Einstellungen", context="UI"),
        "meta": {"area": "settings"}
    },
    {
        "objectKey": "ui.feature.workspace.rag-insights",
        "label": t("Wissens-Insights", context="UI"),
        "meta": {"area": "rag-insights"},
    },
 ]
 RESOURCE_OBJECTS = [
@ -86,7 +81,6 @@ TEMPLATE_ROLES = [
            {"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
            {"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
            {"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
            {"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True},
            {"context": "DATA", "item": None, "view": True, "read": "m", "create": "n", "update": "n", "delete": "n"},
        ]
    },
@ -97,7 +91,6 @@ TEMPLATE_ROLES = [
            {"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
            {"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
            {"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
            {"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True},
            {"context": "RESOURCE", "item": "resource.feature.workspace.start", "view": True},
            {"context": "RESOURCE", "item": "resource.feature.workspace.stop", "view": True},
            {"context": "RESOURCE", "item": "resource.feature.workspace.files", "view": True},
--- a/modules/features/workspace/routeFeatureWorkspace.py
+++ b/modules/features/workspace/routeFeatureWorkspace.py
@ -2192,49 +2192,4 @@ async def putWorkspaceUserSettings(
 # =========================================================================
 # RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
 # =========================================================================
 def _collectWorkspaceFileIdsForStats(instanceId: str, mandateId: Optional[str]) -> List[str]:
    """All FileItem ids for this feature instance (any user). Knowledge rows are often stored
    without featureInstanceId; we correlate by file id from the Management DB."""
    from modules.datamodels.datamodelFiles import FileItem
    from modules.interfaces.interfaceDbManagement import ComponentObjects
    co = ComponentObjects()
    rows = co.db.getRecordset(FileItem, recordFilter={"featureInstanceId": instanceId})
    out: List[str] = []
    m = str(mandateId) if mandateId else ""
    for r in rows or []:
        rid = r.get("id") if isinstance(r, dict) else getattr(r, "id", None)
        if not rid:
            continue
        if m:
            mid = r.get("mandateId") if isinstance(r, dict) else getattr(r, "mandateId", "") or ""
            if mid and mid != m:
                continue
        out.append(str(rid))
    return out
@router.get("/{instanceId}/rag-statistics")
@limiter.limit("60/minute")
 async def getRagStatistics(
    request: Request,
    instanceId: str = Path(...),
    days: int = Query(90, ge=7, le=365, description="Timeline window in days"),
    context: RequestContext = Depends(getRequestContext),
 ):
    """Aggregated, non-identifying knowledge-store metrics for this workspace instance."""
    mandateId, _instanceConfig = _validateInstanceAccess(instanceId, context)
    workspaceFileIds = _collectWorkspaceFileIdsForStats(instanceId, mandateId)
    kdb = getKnowledgeInterface(context.user)
    stats = kdb.getRagStatisticsForInstance(
        featureInstanceId=instanceId,
        mandateId=str(mandateId) if mandateId else "",
        timelineDays=days,
        workspaceFileIds=workspaceFileIds,
    )
    if isinstance(stats, dict):
        stats.setdefault("scope", {})
        stats["scope"]["workspaceFileIdsResolved"] = len(workspaceFileIds)
    return JSONResponse(stats)
--- a/modules/interfaces/interfaceDbKnowledge.py
+++ b/modules/interfaces/interfaceDbKnowledge.py
@ -133,6 +133,60 @@ class KnowledgeObjects:
        return {"indexRows": indexCount, "chunks": chunkCount}
    def deleteFileContentIndexByDataSource(self, dataSourceId: str) -> Dict[str, int]:
        """Delete all FileContentIndex rows whose provenance.dataSourceId matches.
        Used when a user disables ragIndexEnabled on a DataSource to purge
        only those chunks that were ingested from that specific tree element.
        """
        if not dataSourceId:
            return {"indexRows": 0, "chunks": 0}
        allRows = self.db.getRecordset(FileContentIndex)
        matchedRows = []
        for row in allRows:
            prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None)
            if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId:
                matchedRows.append(row)
        mandateIds: set = set()
        chunkCount = 0
        indexCount = 0
        for row in matchedRows:
            fid = row.get("id") if isinstance(row, dict) else getattr(row, "id", None)
            mid = row.get("mandateId") if isinstance(row, dict) else getattr(row, "mandateId", "")
            if not fid:
                continue
            chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fid})
            for chunk in chunks:
                if self.db.recordDelete(ContentChunk, chunk["id"]):
                    chunkCount += 1
            if self.db.recordDelete(FileContentIndex, fid):
                indexCount += 1
                if mid:
                    mandateIds.add(str(mid))
        for mid in mandateIds:
            try:
                from modules.interfaces.interfaceDbBilling import _getRootInterface
                _getRootInterface().reconcileMandateStorageBilling(mid)
            except Exception as ex:
                logger.warning("reconcileMandateStorageBilling after datasource purge failed: %s", ex)
        return {"indexRows": indexCount, "chunks": chunkCount}
    def listFileContentIndexByDataSource(self, dataSourceId: str) -> List[Dict[str, Any]]:
        """List all FileContentIndex rows whose provenance.dataSourceId matches."""
        if not dataSourceId:
            return []
        allRows = self.db.getRecordset(FileContentIndex)
        out = []
        for row in allRows:
            prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None)
            if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId:
                out.append(dict(row) if not isinstance(row, dict) else row)
        return out
    def deleteFileContentIndex(self, fileId: str) -> bool:
        """Delete a FileContentIndex and all associated ContentChunks."""
        existing = self.getFileContentIndex(fileId)
--- a/modules/interfaces/interfaceDbManagement.py
+++ b/modules/interfaces/interfaceDbManagement.py
@ -1274,17 +1274,20 @@ class ComponentObjects:
            if getattr(permissions, "update", None) != AccessLevel.ALL:
                raise PermissionError("Setting global scope requires ALL permission")
-        self.db.recordModify(FileFolder, folderId, {"scope": scope})
+        allFolderIds = self._collectChildFolderIds(folderId)
        for fid in allFolderIds:
            self.db.recordModify(FileFolder, fid, {"scope": scope})
        filesUpdated = 0
        if cascadeToFiles:
-            items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId})
+            for fid in allFolderIds:
-            for item in items:
+                items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid})
-                owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
+                for item in items:
-                if owner == self.userId:
+                    owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
-                    iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
+                    if owner == self.userId:
-                    self.db.recordModify(FileItem, iid, {"scope": scope})
+                        iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
-                    filesUpdated += 1
+                        self.db.recordModify(FileItem, iid, {"scope": scope})
                        filesUpdated += 1
        return {"folderId": folderId, "scope": scope, "filesUpdated": filesUpdated}
@ -1294,16 +1297,19 @@ class ComponentObjects:
            raise FileNotFoundError(f"Folder {folderId} not found")
        self._requireFolderWriteAccess(folder, folderId, "update")
-        self.db.recordModify(FileFolder, folderId, {"neutralize": neutralize})
+        allFolderIds = self._collectChildFolderIds(folderId)
        for fid in allFolderIds:
            self.db.recordModify(FileFolder, fid, {"neutralize": neutralize})
        items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId})
        filesUpdated = 0
-        for item in items:
+        for fid in allFolderIds:
-            owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
+            items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid})
-            if owner == self.userId:
+            for item in items:
-                iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
+                owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
-                self.db.recordModify(FileItem, iid, {"neutralize": neutralize})
+                if owner == self.userId:
-                filesUpdated += 1
+                    iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
                    self.db.recordModify(FileItem, iid, {"neutralize": neutralize})
                    filesUpdated += 1
        return {"folderId": folderId, "neutralize": neutralize, "filesUpdated": filesUpdated}
--- a/modules/interfaces/interfaceVoiceObjects.py
+++ b/modules/interfaces/interfaceVoiceObjects.py
@ -69,7 +69,10 @@ class VoiceObjects:
                          sampleRate: int = None, channels: int = None,
                          skipFallbacks: bool = False,
                          phraseHints: list = None,
-                          alternativeLanguages: list = None) -> Dict[str, Any]:
+                          alternativeLanguages: list = None,
                          model: str = "latest_long",
                          lightweight: bool = False,
                          audioFormat: Optional[str] = None) -> Dict[str, Any]:
        """
        Convert speech to text using Google Cloud Speech-to-Text API.
@ -81,6 +84,9 @@ class VoiceObjects:
            skipFallbacks: If True, skip fallback attempts (use when audio format is known)
            phraseHints: Optional list of phrases to boost recognition (names, terms)
            alternativeLanguages: Optional list of additional language codes for multi-language
            model: Google STT model (e.g. latest_long, latest_short)
            lightweight: If True, omit word-level features and enhanced model
            audioFormat: If set (webm_opus, linear16, ...), skip format auto-detection
        Returns:
            Dict containing transcribed text, confidence, and metadata
@ -97,6 +103,9 @@ class VoiceObjects:
                skipFallbacks=skipFallbacks,
                phraseHints=phraseHints,
                alternativeLanguages=alternativeLanguages,
                model=model,
                lightweight=lightweight,
                audioFormat=audioFormat,
            )
            if result["success"]:
@ -120,13 +129,23 @@ class VoiceObjects:
        audioQueue: asyncio.Queue,
        language: str = "de-DE",
        phraseHints: Optional[list] = None,
        model: str = "latest_long",
        lightweight: bool = False,
        singleUtterance: bool = False,
    ) -> AsyncGenerator[Dict[str, Any], None]:
        """
        Stream audio to Google Streaming STT and yield interim/final results.
        Billing is recorded for each final result.
        """
        connector = self._getGoogleSpeechConnector()
-        async for event in connector.streamingRecognize(audioQueue, language, phraseHints):
+        async for event in connector.streamingRecognize(
            audioQueue,
            language,
            phraseHints,
            model=model,
            lightweight=lightweight,
            singleUtterance=singleUtterance,
        ):
            if event.get("isFinal") and self.billingCallback:
                durationSec = event.get("audioDurationSec", 0)
                priceCHF = connector.calculateSttCostCHF(durationSec)
--- a/modules/routes/routeBilling.py
+++ b/modules/routes/routeBilling.py
@ -1986,10 +1986,10 @@ def getUserViewTransactions(
            if not pagination:
                raise HTTPException(status_code=400, detail="pagination required for groupSummary")
            import json as _json
            from collections import defaultdict
            from modules.interfaces.interfaceDbApp import getInterface as getAppInterface
            from modules.routes.routeHelpers import (
                applyViewToParams,
                build_group_summary_groups,
                effective_group_by_levels,
                resolveView,
            )
@ -2018,28 +2018,7 @@ def getUserViewTransactions(
                summary_params,
                ctx.user,
            )
-            counts: Dict[str, int] = defaultdict(int)
+            groups_out = build_group_summary_groups(all_rows, field, null_label, groupByLevels=levels)
            labels: Dict[str, str] = {}
            null_key = "\x00NULL"
            for item in all_rows:
                raw = item.get(field)
                if raw is None or raw == "":
                    nk = null_key
                    labels[nk] = null_label
                else:
                    nk = str(raw)
                    if nk not in labels:
                        labels[nk] = nk
                counts[nk] += 1
            groups_out: List[Dict[str, Any]] = []
            for nk in sorted(counts.keys(), key=lambda x: (x == null_key, labels.get(x, x).lower())):
                groups_out.append(
                    {
                        "value": None if nk == null_key else nk,
                        "label": labels.get(nk, nk),
                        "totalCount": counts[nk],
                    }
                )
            return JSONResponse(content={"groups": groups_out})
        paginationParams = None
--- a/modules/routes/routeDataConnections.py
+++ b/modules/routes/routeDataConnections.py
@ -130,7 +130,7 @@ def get_auth_authority_options(
 # ============================================================================
@router.get("/")
-@limiter.limit("30/minute")
+@limiter.limit("60/minute")
 async def get_connections(
    request: Request,
    pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
@ -197,7 +197,9 @@ async def get_connections(
                "lastChecked": connection.lastChecked,
                "expiresAt": connection.expiresAt,
                "tokenStatus": tokenStatus,
-                "tokenExpiresAt": tokenExpiresAt
+                "tokenExpiresAt": tokenExpiresAt,
                "knowledgeIngestionEnabled": getattr(connection, "knowledgeIngestionEnabled", False),
                "knowledgePreferences": getattr(connection, "knowledgePreferences", None) or {},
            })
        return items
@ -264,7 +266,7 @@ async def get_connections(
            })
        enrichRowsWithFkLabels(enhanced_connections_dict, UserConnection)
        filtered = apply_strategy_b_filters_and_sort(enhanced_connections_dict, paginationParams, currentUser)
-        groups_out = build_group_summary_groups(filtered, field, null_label)
+        groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
        return JSONResponse(content={"groups": groups_out})
    try:
@ -725,3 +727,171 @@ def delete_connection(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Failed to delete connection: {str(e)}"
        )
 # =========================================================================
 # Knowledge Consent & Control Endpoints
 # =========================================================================
 def _findOwnConnection(interface, userId: str, connectionId: str):
    """Find a connection owned by the user. Returns None if not found."""
    connections = interface.getUserConnections(userId)
    for conn in connections:
        if conn.id == connectionId:
            return conn
    return None
@router.patch("/{connectionId}/knowledge-consent")
@limiter.limit("10/minute")
 def _updateKnowledgeConsent(
    request: Request,
    connectionId: str = Path(..., description="Connection ID"),
    enabled: bool = Body(..., embed=True),
    currentUser: User = Depends(getCurrentUser),
 ) -> Dict[str, Any]:
    """Master switch: can PowerOn ingest data from this connection into the RAG knowledge store?
    enabled=False: purge ALL chunks for this connection + cancel running jobs.
    enabled=True: set flag; enqueue bootstrap only if rag-enabled DataSources exist.
    """
    try:
        interface = getInterface(currentUser)
        connection = _findOwnConnection(interface, currentUser.id, connectionId)
        if not connection:
            raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
        from modules.interfaces.interfaceDbApp import getRootInterface
        rootIf = getRootInterface()
        rootIf.db.recordModify(UserConnection, connectionId, {"knowledgeIngestionEnabled": enabled})
        purged = None
        cancelled = 0
        bootstrapEnqueued = False
        if not enabled:
            from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
            purged = getKnowledgeInterface(None).deleteFileContentIndexByConnectionId(connectionId)
            from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection
            cancelled = cancelJobsByConnection(connectionId)
        else:
            from modules.datamodels.datamodelDataSource import DataSource
            dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId, "ragIndexEnabled": True})
            if dataSources:
                import asyncio
                from modules.serviceCenter.services.serviceBackgroundJobs import startJob
                authority = connection.authority.value if hasattr(connection.authority, "value") else str(connection.authority or "")
                async def _enqueue():
                    await startJob(
                        "connection.bootstrap",
                        {"connectionId": connectionId, "authority": authority.lower()},
                        triggeredBy=str(currentUser.id),
                    )
                try:
                    loop = asyncio.get_event_loop()
                    if loop.is_running():
                        loop.create_task(_enqueue())
                    else:
                        loop.run_until_complete(_enqueue())
                except RuntimeError:
                    asyncio.run(_enqueue())
                bootstrapEnqueued = True
        import json as _json
        from modules.shared.auditLogger import audit_logger
        from modules.datamodels.datamodelAudit import AuditCategory
        audit_logger.logEvent(
            userId=str(currentUser.id),
            mandateId=str(getattr(connection, "mandateId", "") or ""),
            category=AuditCategory.PERMISSION.value,
            action="knowledge_consent_changed",
            details=_json.dumps({"connectionId": connectionId, "enabled": enabled}),
        )
        logger.info("Knowledge consent %s for connection %s by user %s",
                    "enabled" if enabled else "disabled", connectionId, currentUser.id)
        return {
            "connectionId": connectionId,
            "knowledgeIngestionEnabled": enabled,
            "purged": purged,
            "cancelledJobs": cancelled,
            "bootstrapEnqueued": bootstrapEnqueued,
        }
    except HTTPException:
        raise
    except Exception as e:
        logger.error("Error updating knowledge consent: %s", e, exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
@router.patch("/{connectionId}/knowledge-preferences")
@limiter.limit("20/minute")
 def _updateKnowledgePreferences(
    request: Request,
    connectionId: str = Path(..., description="Connection ID"),
    preferences: Dict[str, Any] = Body(..., embed=True),
    currentUser: User = Depends(getCurrentUser),
 ) -> Dict[str, Any]:
    """Update per-connection knowledge ingestion preferences (mail depth, attachments, etc.)."""
    _ALLOWED_KEYS = {"mailContentDepth", "mailIndexAttachments", "filesIndexBinaries",
                     "clickupScope", "clickupIndexAttachments", "maxAgeDays"}
    try:
        interface = getInterface(currentUser)
        connection = _findOwnConnection(interface, currentUser.id, connectionId)
        if not connection:
            raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
        existing = getattr(connection, "knowledgePreferences", None) or {}
        cleaned = {k: v for k, v in preferences.items() if k in _ALLOWED_KEYS}
        merged = {**existing, **cleaned, "schemaVersion": 1}
        from modules.interfaces.interfaceDbApp import getRootInterface
        getRootInterface().db.recordModify(UserConnection, connectionId, {"knowledgePreferences": merged})
        logger.info("Knowledge preferences updated for connection %s", connectionId)
        return {"connectionId": connectionId, "knowledgePreferences": merged, "updated": True}
    except HTTPException:
        raise
    except Exception as e:
        logger.error("Error updating knowledge preferences: %s", e, exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/{connectionId}/knowledge-stop")
@limiter.limit("10/minute")
 def _stopKnowledgeJobs(
    request: Request,
    connectionId: str = Path(..., description="Connection ID"),
    currentUser: User = Depends(getCurrentUser),
 ) -> Dict[str, Any]:
    """Cancel all running/pending bootstrap jobs for this connection."""
    try:
        interface = getInterface(currentUser)
        connection = _findOwnConnection(interface, currentUser.id, connectionId)
        if not connection:
            raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
        from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection
        cancelled = cancelJobsByConnection(connectionId)
        import json as _json
        from modules.shared.auditLogger import audit_logger
        from modules.datamodels.datamodelAudit import AuditCategory
        audit_logger.logEvent(
            userId=str(currentUser.id),
            mandateId=str(getattr(connection, "mandateId", "") or ""),
            category=AuditCategory.PERMISSION.value,
            action="knowledge_jobs_stopped",
            details=_json.dumps({"connectionId": connectionId, "cancelledCount": cancelled}),
        )
        logger.info("Stopped %d knowledge jobs for connection %s", cancelled, connectionId)
        return {"connectionId": connectionId, "cancelled": cancelled}
    except HTTPException:
        raise
    except Exception as e:
        logger.error("Error stopping knowledge jobs: %s", e, exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
--- a/modules/routes/routeDataFiles.py
+++ b/modules/routes/routeDataFiles.py
@ -413,7 +413,7 @@ def patch_folder_scope(
        scope = body.get("scope")
        if not scope:
            raise HTTPException(status_code=400, detail="scope is required")
-        cascadeToFiles = body.get("cascadeToFiles", False)
+        cascadeToFiles = body.get("cascadeChildren", body.get("cascadeToFiles", False))
        managementInterface = interfaceDbManagement.getInterface(
            currentUser,
            mandateId=str(context.mandateId) if context.mandateId else None,
@ -543,7 +543,7 @@ def get_files(
                FileItem,
            )
            filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
-            groups_out = build_group_summary_groups(filtered, field, null_label)
+            groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
            return JSONResponse(content={"groups": groups_out})
        if mode == "filterValues":
--- a/modules/routes/routeDataPrompts.py
+++ b/modules/routes/routeDataPrompts.py
@ -100,7 +100,7 @@ def get_prompts(
            result if isinstance(result, list) else (result.items if hasattr(result, "items") else [])
        )
        filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
-        groups_out = build_group_summary_groups(filtered, field, null_label)
+        groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
        return JSONResponse(content={"groups": groups_out})
    if mode == "filterValues":
--- a/modules/routes/routeDataSources.py
+++ b/modules/routes/routeDataSources.py
@ -1,6 +1,6 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
-"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize tagging."""
+"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize/rag-index tagging."""
 import logging
 from typing import Any, Dict, List, Optional
@ -125,3 +125,75 @@ def _updateNeutralizeFields(
    except Exception as e:
        logger.error("Error updating neutralizeFields: %s", e)
        raise HTTPException(status_code=500, detail=str(e))
@router.patch("/{sourceId}/rag-index")
@limiter.limit("30/minute")
 def _updateDataSourceRagIndex(
    request: Request,
    sourceId: str = Path(..., description="ID of the DataSource"),
    ragIndexEnabled: bool = Body(..., embed=True),
    context: RequestContext = Depends(getRequestContext),
 ) -> Dict[str, Any]:
    """Toggle RAG indexing for a DataSource.
    true:  sets flag + enqueues mini-bootstrap for this DataSource only.
    false: sets flag + synchronously purges all chunks from this DataSource.
    """
    try:
        from modules.interfaces.interfaceDbApp import getRootInterface
        rootIf = getRootInterface()
        rec = rootIf.db.getRecord(DataSource, sourceId)
        if not rec:
            raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
        rootIf.db.recordModify(DataSource, sourceId, {"ragIndexEnabled": ragIndexEnabled})
        logger.info("Updated ragIndexEnabled=%s for DataSource %s", ragIndexEnabled, sourceId)
        if ragIndexEnabled:
            from modules.serviceCenter.services.serviceBackgroundJobs import startJob
            import asyncio
            connectionId = rec.get("connectionId") or rec.get("connection_id") or ""
            conn = rootIf.getUserConnectionById(connectionId) if connectionId else None
            authority = ""
            if conn:
                authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
            async def _enqueue():
                await startJob(
                    "connection.bootstrap",
                    {"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": [sourceId]},
                    triggeredBy=str(context.user.id),
                )
            try:
                loop = asyncio.get_event_loop()
                if loop.is_running():
                    loop.create_task(_enqueue())
                else:
                    loop.run_until_complete(_enqueue())
            except RuntimeError:
                asyncio.run(_enqueue())
        else:
            from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
            purgeResult = getKnowledgeInterface(None).deleteFileContentIndexByDataSource(sourceId)
            logger.info("Purged %d index rows / %d chunks for DataSource %s",
                        purgeResult.get("indexRows", 0), purgeResult.get("chunks", 0), sourceId)
        import json
        from modules.shared.auditLogger import audit_logger
        from modules.datamodels.datamodelAudit import AuditCategory
        audit_logger.logEvent(
            userId=str(context.user.id),
            mandateId=context.mandateId,
            category=AuditCategory.PERMISSION.value,
            action="rag_index_toggled",
            details=json.dumps({"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled}),
        )
        return {"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled, "updated": True}
    except HTTPException:
        raise
    except Exception as e:
        logger.error("Error updating datasource ragIndexEnabled: %s", e)
        raise HTTPException(status_code=500, detail=str(e))
--- a/modules/routes/routeHelpers.py
+++ b/modules/routes/routeHelpers.py
@ -825,45 +825,106 @@ def build_group_summary_groups(
    items: List[Dict[str, Any]],
    field: str,
    null_label: str = "—",
    groupByLevels: List[Dict[str, Any]] | None = None,
 ) -> List[Dict[str, Any]]:
    """
-    Build {"value", "label", "totalCount"} for mode=groupSummary (single grouping level).
+    Build {"value", "label", "totalCount"} summaries for mode=groupSummary.
    When *groupByLevels* contains more than one level the function produces one
    entry per unique combination of all level values (flat permutations).
    ``value`` becomes a ``///``-joined composite key and ``label`` the ``/``-joined
    human-readable label so the frontend can split them back.
    """
    from collections import defaultdict
-    counts: Dict[str, int] = defaultdict(int)
+    fields: list[dict] = []
-    display_by_key: Dict[str, str] = {}
+    if groupByLevels and len(groupByLevels) > 1:
-    null_key = "\x00NULL"
+        for lvl in groupByLevels:
-    label_attr = f"{field}Label"
+            f = lvl.get("field", "")
            nl = str(lvl.get("nullLabel") or null_label)
            if f:
                fields.append({"field": f, "nullLabel": nl})
    if not fields:
        fields = [{"field": field, "nullLabel": null_label}]
    nullKey = "\x00NULL"
    if len(fields) == 1:
        f = fields[0]["field"]
        nl = fields[0]["nullLabel"]
        counts: Dict[str, int] = defaultdict(int)
        displayByKey: Dict[str, str] = {}
        labelAttr = f"{f}Label"
        for item in items:
            raw = item.get(f)
            if raw is None or raw == "":
                nk = nullKey
                display = nl
            else:
                nk = str(raw)
                display = None
                lbl = item.get(labelAttr)
                if lbl is not None and lbl != "":
                    display = str(lbl)
                if display is None:
                    display = nk
            counts[nk] += 1
            if nk not in displayByKey:
                displayByKey[nk] = display
        orderedKeys = sorted(
            counts.keys(),
            key=lambda x: (x == nullKey, str(displayByKey.get(x, x)).lower()),
        )
        return [
            {
                "value": None if nk == nullKey else nk,
                "label": displayByKey.get(nk, nk),
                "totalCount": counts[nk],
            }
            for nk in orderedKeys
        ]
    counts = defaultdict(int)
    displayByComposite: Dict[str, list] = {}
    filtersByComposite: Dict[str, dict] = {}
    for item in items:
-        raw = item.get(field)
+        parts: list[str] = []
-        if raw is None or raw == "":
+        labels: list[str] = []
-            nk = null_key
+        filterMap: dict = {}
-            display = null_label
+        for fd in fields:
-        else:
+            f = fd["field"]
-            nk = str(raw)
+            nl = fd["nullLabel"]
-            display = None
+            labelAttr = f"{f}Label"
-            lbl = item.get(label_attr)
+            raw = item.get(f)
-            if lbl is not None and lbl != "":
+            if raw is None or raw == "":
-                display = str(lbl)
+                parts.append(nullKey)
-            if display is None:
+                labels.append(nl)
-                display = nk
+                filterMap[f] = None
-        counts[nk] += 1
+            else:
-        if nk not in display_by_key:
+                parts.append(str(raw))
-            display_by_key[nk] = display
+                lbl = item.get(labelAttr)
                labels.append(str(lbl) if lbl not in (None, "") else str(raw))
                filterMap[f] = str(raw)
        compositeKey = "///".join(parts)
        counts[compositeKey] += 1
        if compositeKey not in displayByComposite:
            displayByComposite[compositeKey] = labels
            filtersByComposite[compositeKey] = filterMap
-    ordered_keys = sorted(
+    orderedKeys = sorted(
        counts.keys(),
-        key=lambda x: (x == null_key, str(display_by_key.get(x, x)).lower()),
+        key=lambda x: tuple(
            (seg == nullKey, seg.lower()) for seg in x.split("///")
        ),
    )
    return [
        {
-            "value": None if nk == null_key else nk,
+            "value": ck.replace(nullKey, "__null__") if nullKey in ck else ck,
-            "label": display_by_key.get(nk, nk),
+            "label": " / ".join(displayByComposite[ck]),
-            "totalCount": counts[nk],
+            "totalCount": counts[ck],
            "filters": filtersByComposite[ck],
        }
-        for nk in ordered_keys
+        for ck in orderedKeys
    ]
--- a/modules/routes/routeRagInventory.py
+++ b/modules/routes/routeRagInventory.py
@ -0,0 +1,277 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """RAG Inventory API — global knowledge-store visibility for users, admins, platform."""
 import logging
 from typing import Any, Dict, List, Optional
 from fastapi import APIRouter, HTTPException, Depends, Request
 from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext
 from modules.datamodels.datamodelUam import User
 from modules.shared.i18nRegistry import apiRouteContext
 routeApiMsg = apiRouteContext("routeRagInventory")
 logger = logging.getLogger(__name__)
 router = APIRouter(
    prefix="/api/rag/inventory",
    tags=["RAG Inventory"],
    responses={
        401: {"description": "Unauthorized"},
        403: {"description": "Forbidden"},
        500: {"description": "Internal server error"},
    },
 )
 def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> List[Dict[str, Any]]:
    from modules.datamodels.datamodelDataSource import DataSource
    from modules.datamodels.datamodelKnowledge import FileContentIndex
    out = []
    for conn in connections:
        connectionId = str(conn.id)
        dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
        connIndexRows = knowledgeIf.db.getRecordset(FileContentIndex, recordFilter={"connectionId": connectionId})
        connChunkTotal = len(connIndexRows)
        chunksByDs: Dict[str, int] = {}
        unassigned = 0
        for idx in connIndexRows:
            prov = (idx.get("provenance") if isinstance(idx, dict) else getattr(idx, "provenance", None)) or {}
            dsIdRef = prov.get("dataSourceId", "") if isinstance(prov, dict) else ""
            if dsIdRef:
                chunksByDs[dsIdRef] = chunksByDs.get(dsIdRef, 0) + 1
            else:
                unassigned += 1
        dsItems = []
        for ds in dataSources:
            dsId = ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")
            dsItems.append({
                "id": dsId,
                "label": ds.get("label") if isinstance(ds, dict) else getattr(ds, "label", ""),
                "path": ds.get("path") if isinstance(ds, dict) else getattr(ds, "path", ""),
                "sourceType": ds.get("sourceType") if isinstance(ds, dict) else getattr(ds, "sourceType", ""),
                "ragIndexEnabled": ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False),
                "neutralize": ds.get("neutralize") if isinstance(ds, dict) else getattr(ds, "neutralize", False),
                "lastIndexed": ds.get("lastIndexed") if isinstance(ds, dict) else getattr(ds, "lastIndexed", None),
                "chunkCount": chunksByDs.get(dsId, 0),
            })
        if unassigned > 0 and len(dsItems) == 1:
            dsItems[0]["chunkCount"] += unassigned
        jobs = jobService.listJobs(jobType="connection.bootstrap", limit=5)
        connJobs = [j for j in jobs if (j.get("payload") or {}).get("connectionId") == connectionId]
        runningJobs = [
            {"jobId": j["id"], "progress": j.get("progress", 0), "progressMessage": j.get("progressMessage", "")}
            for j in connJobs
            if j.get("status") in ("PENDING", "RUNNING")
        ]
        lastError = None
        for j in connJobs:
            if j.get("status") == "ERROR":
                lastError = {"jobId": j["id"], "errorMessage": j.get("errorMessage", "")}
                break
        out.append({
            "id": connectionId,
            "authority": conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority),
            "externalEmail": getattr(conn, "externalEmail", ""),
            "knowledgeIngestionEnabled": getattr(conn, "knowledgeIngestionEnabled", False),
            "preferences": getattr(conn, "knowledgePreferences", None) or {},
            "dataSources": dsItems,
            "totalChunks": connChunkTotal,
            "runningJobs": runningJobs,
            "lastError": lastError,
        })
    return out
@router.get("/me")
@limiter.limit("30/minute")
 def _getInventoryMe(
    request: Request,
    currentUser: User = Depends(getCurrentUser),
 ) -> Dict[str, Any]:
    """Personal RAG inventory: own connections + DataSources + chunk counts."""
    try:
        from modules.interfaces.interfaceDbApp import getRootInterface
        from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
        from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
        rootIf = getRootInterface()
        knowledgeIf = getKnowledgeInterface(None)
        connections = rootIf.getUserConnections(currentUser.id)
        items = _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService)
        totalChunks = sum(c.get("totalChunks", 0) for c in items)
        return {"connections": items, "totals": {"chunks": totalChunks}}
    except Exception as e:
        logger.error("Error in RAG inventory /me: %s", e, exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
@router.get("/mandate")
@limiter.limit("20/minute")
 def _getInventoryMandate(
    request: Request,
    context: RequestContext = Depends(getRequestContext),
 ) -> Dict[str, Any]:
    """Mandate-level RAG aggregation (requires mandate membership)."""
    if not context.mandateId:
        raise HTTPException(status_code=403, detail=routeApiMsg("Mandate context required"))
    try:
        from modules.interfaces.interfaceDbApp import getRootInterface
        from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface, aggregateMandateRagTotalBytes
        from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
        rootIf = getRootInterface()
        knowledgeIf = getKnowledgeInterface(None)
        mandateId = str(context.mandateId) if context.mandateId else ""
        from modules.datamodels.datamodelUam import UserConnection
        allConnections = rootIf.db.getRecordset(UserConnection, recordFilter={"mandateId": mandateId})
        connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
        items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
        totalChunks = sum(c.get("totalChunks", 0) for c in items)
        totalBytes = aggregateMandateRagTotalBytes(mandateId)
        return {"connections": items, "totals": {"chunks": totalChunks, "bytes": totalBytes}}
    except HTTPException:
        raise
    except Exception as e:
        logger.error("Error in RAG inventory /mandate: %s", e, exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
@router.get("/platform")
@limiter.limit("10/minute")
 def _getInventoryPlatform(
    request: Request,
    context: RequestContext = Depends(getRequestContext),
 ) -> Dict[str, Any]:
    """Platform-wide RAG statistics (sysadmin only)."""
    if not context.isSysAdmin:
        raise HTTPException(status_code=403, detail=routeApiMsg("Platform admin required"))
    try:
        from modules.interfaces.interfaceDbApp import getRootInterface
        from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
        from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
        from modules.datamodels.datamodelUam import UserConnection
        rootIf = getRootInterface()
        knowledgeIf = getKnowledgeInterface(None)
        allConnections = rootIf.db.getRecordset(UserConnection)
        connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
        items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
        totalChunks = sum(c.get("totalChunks", 0) for c in items)
        return {"connections": items, "totals": {"chunks": totalChunks}}
    except HTTPException:
        raise
    except Exception as e:
        logger.error("Error in RAG inventory /platform: %s", e, exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/reindex/{connectionId}")
@limiter.limit("10/minute")
 def _reindexConnection(
    request: Request,
    connectionId: str,
    currentUser: User = Depends(getCurrentUser),
 ) -> Dict[str, Any]:
    """Re-trigger bootstrap for a connection (re-index all ragIndexEnabled DataSources).
    Submits a new connection.bootstrap job, regardless of previous failures.
    """
    try:
        from modules.interfaces.interfaceDbApp import getRootInterface
        from modules.serviceCenter.services.serviceBackgroundJobs import startJob
        from modules.datamodels.datamodelDataSource import DataSource
        import asyncio
        rootIf = getRootInterface()
        conn = rootIf.getUserConnectionById(connectionId)
        if conn is None:
            raise HTTPException(status_code=404, detail="Connection not found")
        if str(conn.userId) != str(currentUser.id):
            raise HTTPException(status_code=403, detail="Not your connection")
        dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
        ragDs = [ds for ds in dataSources if (ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False))]
        if not ragDs:
            return {"status": "skipped", "reason": "no_rag_enabled_datasources"}
        authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
        dsIds = [(ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")) for ds in ragDs]
        async def _enqueue():
            return await startJob(
                "connection.bootstrap",
                {"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": dsIds},
                triggeredBy=str(currentUser.id),
            )
        try:
            loop = asyncio.get_event_loop()
            if loop.is_running():
                future = asyncio.ensure_future(_enqueue())
                jobId = None
            else:
                jobId = loop.run_until_complete(_enqueue())
        except RuntimeError:
            jobId = asyncio.run(_enqueue())
        logger.info("Reindex triggered for connection %s (%d DataSources)", connectionId, len(dsIds))
        return {"status": "queued", "connectionId": connectionId, "dataSourceCount": len(dsIds), "jobId": jobId}
    except HTTPException:
        raise
    except Exception as e:
        logger.error("Error triggering reindex: %s", e, exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
@router.get("/jobs")
@limiter.limit("60/minute")
 def _getActiveJobs(
    request: Request,
    currentUser: User = Depends(getCurrentUser),
 ) -> List[Dict[str, Any]]:
    """Active RAG jobs for the current user (used by header badge)."""
    try:
        from modules.serviceCenter.services.serviceBackgroundJobs import listJobs
        from modules.interfaces.interfaceDbApp import getRootInterface
        rootIf = getRootInterface()
        connections = rootIf.getUserConnections(currentUser.id)
        connectionMap = {str(c.id): c for c in connections}
        connectionIds = set(connectionMap.keys())
        jobs = listJobs(jobType="connection.bootstrap", limit=50)
        active = []
        for j in jobs:
            if j.get("status") not in ("PENDING", "RUNNING"):
                continue
            payload = j.get("payload") or {}
            connId = payload.get("connectionId")
            if connId in connectionIds:
                conn = connectionMap[connId]
                active.append({
                    "jobId": j["id"],
                    "connectionId": connId,
                    "connectionLabel": getattr(conn, "displayLabel", None) or getattr(conn, "authority", connId),
                    "jobType": j.get("jobType", "connection.bootstrap"),
                    "progress": j.get("progress", 0),
                    "progressMessage": j.get("progressMessage", ""),
                })
        return active
    except Exception as e:
        logger.error("Error in RAG inventory /jobs: %s", e, exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
--- a/modules/routes/routeVoiceGoogle.py
+++ b/modules/routes/routeVoiceGoogle.py
@ -155,12 +155,13 @@ async def sttStream(
    Protocol:
      Client sends JSON:
-        {"type": "open", "language": "de-DE"}
+        {"type": "open", "language": "de-DE", "model": "latest_short", "lightweight": true, "singleUtterance": true}
        {"type": "audio", "chunk": "<base64>"}
        {"type": "close"}
      Server sends JSON:
        {"type": "interim", "text": "..."}
        {"type": "final", "text": "...", "confidence": 0.95}
        {"type": "end_of_single_utterance", "audioDurationSec": 0.0}
        {"type": "error", "message": "..."}
        {"type": "closed"}
    """
@ -205,7 +206,12 @@ async def sttStream(
        logger.warning(f"STT billing pre-flight skipped: {e}")
    audioQueue: asyncio.Queue = asyncio.Queue()
-    language = "de-DE"
+    sttOpenOptions: Dict[str, Any] = {
        "language": "de-DE",
        "model": "latest_long",
        "lightweight": False,
        "singleUtterance": False,
    }
    streamingTask: Optional[asyncio.Task] = None
    voiceInterface: Optional[VoiceObjects] = None
@ -233,10 +239,23 @@ async def sttStream(
            voiceInterface.billingCallback = _billingCb
        try:
-            async for event in voiceInterface.streamingSpeechToText(audioQueue, language):
+            async for event in voiceInterface.streamingSpeechToText(
                audioQueue,
                sttOpenOptions["language"],
                phraseHints=None,
                model=sttOpenOptions["model"],
                lightweight=sttOpenOptions["lightweight"],
                singleUtterance=sttOpenOptions["singleUtterance"],
            ):
                if event.get("reconnectRequired"):
                    await _sendJson({"type": "reconnect_required"})
                    return
                if event.get("endOfSingleUtterance"):
                    await _sendJson({
                        "type": "end_of_single_utterance",
                        "audioDurationSec": event.get("audioDurationSec", 0.0),
                    })
                    continue
                if event.get("isFinal"):
                    if event.get("transcript"):
                        await _sendJson({"type": "final", "text": event["transcript"], "confidence": event.get("confidence", 0.0)})
@ -258,7 +277,10 @@ async def sttStream(
            msgType = (msg.get("type") or "").strip()
            if msgType == "open":
-                language = msg.get("language") or "de-DE"
+                sttOpenOptions["language"] = msg.get("language") or "de-DE"
                sttOpenOptions["model"] = msg.get("model") or "latest_long"
                sttOpenOptions["lightweight"] = bool(msg.get("lightweight"))
                sttOpenOptions["singleUtterance"] = bool(msg.get("singleUtterance"))
                if streamingTask and not streamingTask.done():
                    await audioQueue.put((b"", True))
                    streamingTask.cancel()
--- a/modules/serviceCenter/services/serviceAgent/agentLoop.py
+++ b/modules/serviceCenter/services/serviceAgent/agentLoop.py
@ -335,9 +335,14 @@ async def runAgentLoop(
        # Execute tool calls
        for tc in toolCalls:
            toolDef = toolRegistry.getTool(tc.name)
            yield AgentEvent(
                type=AgentEventTypeEnum.TOOL_CALL,
-                data={"toolName": tc.name, "args": tc.args}
+                data={
                    "toolName": tc.name,
                    "displayLabel": toolDef.displayLabel if toolDef else None,
                    "args": tc.args,
                }
            )
        results = await _executeToolCalls(toolCalls, toolRegistry, {
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_connectionTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_connectionTools.py
@ -184,4 +184,5 @@ def _registerConnectionTools(registry: ToolRegistry, services):
            "required": ["connectionId", "to", "subject", "body"],
        },
        readOnly=False,
        displayLabel="composing an email",
    )
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
@ -297,6 +297,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
            },
        },
        readOnly=False,
        displayLabel="creating a document",
    )
    # ── textToSpeech tool ──────────────────────────────────────────────
@ -573,6 +574,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
            "required": ["prompt"],
        },
        readOnly=False,
        displayLabel="generating an image",
    )
    # ── createChart tool ─────────────────────────────────────────────────
@ -770,6 +772,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
            "required": ["datasets"],
        },
        readOnly=False,
        displayLabel="creating a chart",
    )
    # ── Phase 3: speechToText, detectLanguage, neutralizeData, executeCode ──
@ -917,5 +920,6 @@ def _registerMediaTools(registry: ToolRegistry, services):
            },
            "required": ["code"]
        },
-        readOnly=True
+        readOnly=True,
        displayLabel="running calculations",
    )
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py
@ -310,11 +310,15 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
                return ToolResult(toolCallId="", toolName="writeFile", success=False, error="name is required for mode=create")
            fileItem, _ = dbMgmt.saveUploadedFile(content.encode("utf-8"), name)
            fiId = context.get("featureInstanceId") or (services.featureInstanceId if services else "")
            updateFields: Dict[str, Any] = {}
            if fiId:
-                dbMgmt.updateFile(fileItem.id, {"featureInstanceId": fiId})
+                updateFields["featureInstanceId"] = fiId
-            # File group tree removed — groupId arg and instance-group assignment no longer apply
+            if args.get("folderId"):
                updateFields["folderId"] = args["folderId"]
            if args.get("tags"):
-                dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
+                updateFields["tags"] = args["tags"]
            if updateFields:
                dbMgmt.updateFile(fileItem.id, updateFields)
            chatDocId = _attachFileAsChatDocument(
                services, fileItem,
@ -359,7 +363,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
            },
            "required": ["fileId"]
        },
-        readOnly=True
+        readOnly=True,
        displayLabel="reviewing a document",
    )
    registry.register(
@ -406,7 +411,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
            "properties": {"query": {"type": "string", "description": "Search query"}},
            "required": ["query"]
        },
-        readOnly=True
+        readOnly=True,
        displayLabel="researching on the web",
    )
    registry.register(
@ -427,7 +433,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
        "writeFile", _writeFile,
        description=(
            "Create, append, or overwrite a file. Modes:\n"
-            "- create (default): create a new file (name required).\n"
+            "- create (default): create a new file (name required). Use folderId to place it in a specific folder.\n"
            "- append: append content to an existing file (fileId required). "
            "Use for large content that exceeds a single tool call (~8000 chars per call).\n"
            "- overwrite: replace entire file content (fileId required).\n"
@ -443,7 +449,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
                "content": {"type": "string", "description": "Content to write/append"},
                "mode": {"type": "string", "enum": ["create", "append", "overwrite"], "description": "Write mode (default: create)"},
                "fileId": {"type": "string", "description": "File ID (required for mode=append/overwrite)"},
-                "groupId": {"type": "string", "description": "Group ID to place the file in (mode=create only). Omit to use the instance default group."},
+                "folderId": {"type": "string", "description": "Folder ID to place the file in (mode=create only). Use listFolders to find IDs. Omit for root."},
                "tags": {"type": "array", "items": {"type": "string"}, "description": "Tags (mode=create only)"},
            },
            "required": ["content"]
@ -581,7 +587,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
            },
            "required": ["url"]
        },
-        readOnly=True
+        readOnly=True,
        displayLabel="reading a webpage",
    )
    registry.register(
@ -701,7 +708,147 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
        readOnly=False
    )
-    # Group tree tools removed — file grouping now uses view-based display grouping (TableListView)
+    # ---- Folder management tools ----
    async def _createFolder(args: Dict[str, Any], context: Dict[str, Any]):
        name = args.get("name", "")
        parentId = args.get("parentId") or None
        if not name:
            return ToolResult(toolCallId="", toolName="createFolder", success=False, error="name is required")
        try:
            chatService = services.chat
            dbMgmt = chatService.interfaceDbComponent
            folder = dbMgmt.createFolder(name, parentId=parentId)
            folderId = folder.get("id") if isinstance(folder, dict) else getattr(folder, "id", None)
            folderName = folder.get("name") if isinstance(folder, dict) else getattr(folder, "name", name)
            return ToolResult(
                toolCallId="", toolName="createFolder", success=True,
                data=f"Folder '{folderName}' created (id: {folderId})" + (f" inside parent {parentId}" if parentId else ""),
                sideEvents=[{"type": "folderCreated", "data": {"folderId": folderId, "folderName": folderName, "parentId": parentId}}],
            )
        except Exception as e:
            return ToolResult(toolCallId="", toolName="createFolder", success=False, error=str(e))
    async def _listFolders(args: Dict[str, Any], context: Dict[str, Any]):
        try:
            chatService = services.chat
            dbMgmt = chatService.interfaceDbComponent
            folders = dbMgmt.getOwnFolderTree()
            if not folders:
                return ToolResult(toolCallId="", toolName="listFolders", success=True, data="No folders found.")
            lines = []
            folderMap: Dict[Optional[str], List] = {}
            for f in folders:
                pid = f.get("parentId") if isinstance(f, dict) else getattr(f, "parentId", None)
                folderMap.setdefault(pid, []).append(f)
            def _walk(parentId: Optional[str], indent: int):
                for f in sorted(folderMap.get(parentId, []), key=lambda x: (x.get("name") if isinstance(x, dict) else getattr(x, "name", "")).lower()):
                    fId = f.get("id") if isinstance(f, dict) else getattr(f, "id", "")
                    fName = f.get("name") if isinstance(f, dict) else getattr(f, "name", "")
                    prefix = "  " * indent
                    lines.append(f"{prefix}- {fName} (id: {fId})")
                    _walk(fId, indent + 1)
            _walk(None, 0)
            return ToolResult(toolCallId="", toolName="listFolders", success=True, data="\n".join(lines))
        except Exception as e:
            return ToolResult(toolCallId="", toolName="listFolders", success=False, error=str(e))
    async def _moveFile(args: Dict[str, Any], context: Dict[str, Any]):
        fileId = args.get("fileId", "")
        folderId = args.get("folderId")
        if not fileId:
            return ToolResult(toolCallId="", toolName="moveFile", success=False, error="fileId is required")
        try:
            chatService = services.chat
            dbMgmt = chatService.interfaceDbComponent
            file = dbMgmt.getFile(fileId)
            if not file:
                return ToolResult(toolCallId="", toolName="moveFile", success=False, error=f"File {fileId} not found")
            dbMgmt.updateFile(fileId, {"folderId": folderId or None})
            targetLabel = f"folder {folderId}" if folderId else "root"
            return ToolResult(
                toolCallId="", toolName="moveFile", success=True,
                data=f"File '{file.fileName}' (id: {fileId}) moved to {targetLabel}",
                sideEvents=[{"type": "fileUpdated", "data": {"fileId": fileId, "fileName": file.fileName}}],
            )
        except Exception as e:
            return ToolResult(toolCallId="", toolName="moveFile", success=False, error=str(e))
    registry.register(
        "createFolder", _createFolder,
        description=(
            "Create a new folder in the workspace file tree. "
            "Use parentId to create nested folders. Returns the new folder ID."
        ),
        parameters={
            "type": "object",
            "properties": {
                "name": {"type": "string", "description": "Folder name"},
                "parentId": {"type": "string", "description": "Parent folder ID for nesting. Omit to create at root level."},
            },
            "required": ["name"]
        },
        readOnly=False
    )
    registry.register(
        "listFolders", _listFolders,
        description=(
            "List all folders in the workspace as an indented tree. "
            "Use to find folder IDs for createFolder (parentId), writeFile (folderId), or moveFile."
        ),
        parameters={"type": "object", "properties": {}},
        readOnly=True
    )
    async def _renameFolder(args: Dict[str, Any], context: Dict[str, Any]):
        folderId = args.get("folderId", "")
        newName = args.get("newName", "")
        if not folderId or not newName:
            return ToolResult(toolCallId="", toolName="renameFolder", success=False, error="folderId and newName are required")
        try:
            chatService = services.chat
            dbMgmt = chatService.interfaceDbComponent
            folder = dbMgmt.renameFolder(folderId, newName)
            return ToolResult(
                toolCallId="", toolName="renameFolder", success=True,
                data=f"Folder {folderId} renamed to '{newName}'",
                sideEvents=[{"type": "folderUpdated", "data": {"folderId": folderId, "folderName": newName}}],
            )
        except Exception as e:
            return ToolResult(toolCallId="", toolName="renameFolder", success=False, error=str(e))
    registry.register(
        "renameFolder", _renameFolder,
        description="Rename an existing folder in the workspace file tree.",
        parameters={
            "type": "object",
            "properties": {
                "folderId": {"type": "string", "description": "The folder ID to rename"},
                "newName": {"type": "string", "description": "New folder name"},
            },
            "required": ["folderId", "newName"]
        },
        readOnly=False
    )
    registry.register(
        "moveFile", _moveFile,
        description=(
            "Move a file into a specific folder. Set folderId to null or omit to move the file back to the root level."
        ),
        parameters={
            "type": "object",
            "properties": {
                "fileId": {"type": "string", "description": "The file ID to move"},
                "folderId": {"type": "string", "description": "Target folder ID. Omit or null to move to root."},
            },
            "required": ["fileId"]
        },
        readOnly=False
    )
    registry.register(
        "replaceInFile", _replaceInFile,
--- a/modules/serviceCenter/services/serviceAgent/datamodelAgent.py
+++ b/modules/serviceCenter/services/serviceAgent/datamodelAgent.py
@ -41,6 +41,12 @@ class ToolDefinition(BaseModel):
    """Schema for a tool available to the agent."""
    name: str = Field(description="Unique tool name")
    description: str = Field(description="What this tool does")
    displayLabel: Optional[str] = Field(
        default=None,
        description="Short human-readable activity phrase (e.g. 'researching on the web'). "
                    "Used for live progress messages in meetings. English gerund phrase; "
                    "localised by the caller."
    )
    parameters: Dict[str, Any] = Field(
        default_factory=dict,
        description="JSON Schema for tool parameters"
--- a/modules/serviceCenter/services/serviceAgent/toolRegistry.py
+++ b/modules/serviceCenter/services/serviceAgent/toolRegistry.py
@ -23,7 +23,7 @@ class ToolRegistry:
    def register(self, name: str, handler: Callable[..., Awaitable[ToolResult]],
                 description: str = "", parameters: Dict[str, Any] = None,
                 readOnly: bool = False, featureType: str = None,
-                 toolSet: str = None):
+                 toolSet: str = None, displayLabel: str = None):
        """Register a tool with its handler function."""
        if name in self._tools:
            logger.warning(f"Tool '{name}' already registered, overwriting")
@ -31,6 +31,7 @@ class ToolRegistry:
        self._tools[name] = ToolDefinition(
            name=name,
            description=description,
            displayLabel=displayLabel,
            parameters=parameters or {},
            readOnly=readOnly,
            featureType=featureType,
--- a/modules/serviceCenter/services/serviceAi/mainServiceAi.py
+++ b/modules/serviceCenter/services/serviceAi/mainServiceAi.py
@ -567,11 +567,14 @@ mit Web-Recherche, E-Mail-Versand, Dokumenten-Erzeugung und Datenquellen-Zugriff
 Setze "needsAgent": true und "agentReason": "<kurze Beschreibung der Aufgabe in einem Satz>"
 WENN die Aufgabe eines oder mehrere dieser Merkmale hat:
- Recherche im Internet noetig (z.B. "recherchier was im Internet ueber XY", "schau mal nach", "google das")
+- Recherche im Internet oder aktuelle Informationen noetig
- E-Mail an Teilnehmer/Kontakte versenden
+- Informationen beschaffen die du NICHT im Transkript oder in deinem Vorwissen hast
- Dokument (PDF, Word, Excel) generieren oder im SharePoint/Drive ablegen
+- E-Mail versenden
- Mehrere Schritte oder Tool-Aufrufe noetig (Zusammenfassung + Versand, Recherche + Empfehlung etc.)
+- Dokument generieren oder in einer Datenquelle ablegen
- Daten aus externen Quellen abrufen (Outlook-Kontakte, SharePoint-Dateien, Kalender etc.)
+- Mehrere Schritte oder Tool-Aufrufe noetig
 - Daten aus externen Quellen abrufen
 Wenn du den gewuenschten Inhalt nicht selbst liefern kannst, setze needsAgent=true.
 Wenn needsAgent=true:
 - Setze shouldRespond=false (der Agent uebernimmt; du sprichst NICHT eigenstaendig).
--- a/modules/serviceCenter/services/serviceBackgroundJobs/init.py
+++ b/modules/serviceCenter/services/serviceBackgroundJobs/init.py
@ -7,6 +7,9 @@ from .mainBackgroundJobService import (
    startJob,
    getJobStatus,
    listJobs,
    cancelJob,
    cancelJobsByConnection,
    isTerminalStatus,
    JobProgressCallback,
 )
@ -15,5 +18,8 @@ __all__ = [
    "startJob",
    "getJobStatus",
    "listJobs",
    "cancelJob",
    "cancelJobsByConnection",
    "isTerminalStatus",
    "JobProgressCallback",
 ]
--- a/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py
+++ b/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py
@ -30,6 +30,7 @@ clear message. No silent zombies.
 import asyncio
 import logging
 import time
 from datetime import datetime, timezone
 from typing import Any, Awaitable, Callable, Dict, List, Optional
@ -49,7 +50,46 @@ JOBS_DATABASE = APP_CONFIG.get("DB_DATABASE", "poweron_app")
 registerDatabase(JOBS_DATABASE)
-JobProgressCallback = Callable[[int, Optional[str]], None]
+_CANCEL_CHECK_INTERVAL_S = 3.0
 class JobProgressCallback:
    """Callable progress reporter with cooperative cancel-check for long-running walkers."""
    def __init__(self, jobId: str):
        self._jobId = jobId
        self._cancelledCache: Optional[bool] = None
        self._lastCheckedAt: float = 0.0
    def __call__(self, progress: int, message: Optional[str] = None) -> None:
        try:
            clamped = max(0, min(100, int(progress)))
            fields: Dict[str, Any] = {"progress": clamped}
            if message is not None:
                fields["progressMessage"] = message[:500]
            _updateJob(self._jobId, fields)
        except Exception as ex:
            logger.warning("Progress update failed for job %s: %s", self._jobId, ex)
    def isCancelled(self) -> bool:
        """Check if this job was cancelled. Reads DB at most every 3s to limit load."""
        now = time.time()
        if self._cancelledCache is True:
            return True
        if now - self._lastCheckedAt < _CANCEL_CHECK_INTERVAL_S:
            return self._cancelledCache or False
        self._lastCheckedAt = now
        try:
            job = _loadJob(self._jobId)
            if job and job.get("status") == BackgroundJobStatusEnum.CANCELLED.value:
                self._cancelledCache = True
                return True
        except Exception:
            pass
        self._cancelledCache = False
        return False
 JobHandler = Callable[[Dict[str, Any], JobProgressCallback], Awaitable[Optional[Dict[str, Any]]]]
@ -155,16 +195,7 @@ def _markError(jobId: str, errorMessage: str) -> None:
 def _makeProgressCallback(jobId: str) -> JobProgressCallback:
-    def _cb(progress: int, message: Optional[str] = None) -> None:
+    return JobProgressCallback(jobId)
        try:
            clamped = max(0, min(100, int(progress)))
            fields: Dict[str, Any] = {"progress": clamped}
            if message is not None:
                fields["progressMessage"] = message[:500]
            _updateJob(jobId, fields)
        except Exception as ex:
            logger.warning("Progress update failed for job %s: %s", jobId, ex)
    return _cb
 async def _runJob(jobId: str) -> None:
@ -220,12 +251,51 @@ def isTerminalStatus(status: str) -> bool:
    return status in {s.value for s in TERMINAL_JOB_STATUSES}
 def cancelJob(jobId: str, *, reason: str = "user_requested") -> bool:
    """Mark a job as CANCELLED. Walkers detect this via JobProgressCallback.isCancelled().
    Returns False if the job is already in a terminal state or does not exist.
    """
    job = _loadJob(jobId)
    if not job:
        return False
    if isTerminalStatus(job.get("status", "")):
        return False
    _updateJob(jobId, {
        "status": BackgroundJobStatusEnum.CANCELLED.value,
        "errorMessage": f"cancelled: {reason}"[:1000],
        "finishedAt": datetime.now(timezone.utc).timestamp(),
    })
    logger.info("BackgroundJob %s cancelled (reason=%s)", jobId, reason)
    return True
 def cancelJobsByConnection(connectionId: str, *, jobType: str = "connection.bootstrap") -> int:
    """Cancel all RUNNING/PENDING jobs whose payload.connectionId matches.
    Returns count of jobs marked as cancelled.
    """
    db = _getDb()
    rows = db.getRecordset(BackgroundJob, recordFilter={"jobType": jobType})
    count = 0
    for row in rows:
        status = row.get("status", "")
        if status not in (BackgroundJobStatusEnum.PENDING.value, BackgroundJobStatusEnum.RUNNING.value):
            continue
        payload = row.get("payload") or {}
        if payload.get("connectionId") == connectionId:
            if cancelJob(row["id"], reason=f"connection_stop:{connectionId[:8]}"):
                count += 1
    return count
 def recoverInterruptedJobs() -> int:
-    """Flip any RUNNING jobs to ERROR (called at worker boot).
+    """Flip any RUNNING jobs to ERROR and re-queue bootstrap jobs (called at worker boot).
    A RUNNING job in the DB after process restart means the previous worker
    died mid-execution; the asyncio task is gone and the job will never
-    finish on its own.
+    finish on its own. For connection.bootstrap jobs, a fresh job is
    automatically re-queued so the user doesn't have to manually retry.
    """
    db = _getDb()
    try:
@ -234,12 +304,34 @@ def recoverInterruptedJobs() -> int:
        logger.warning("recoverInterruptedJobs: failed to scan RUNNING jobs: %s", ex)
        return 0
    count = 0
    requeued = 0
    for row in rows:
        try:
            _markError(row["id"], "Interrupted by worker restart")
            count += 1
        except Exception as ex:
            logger.warning("recoverInterruptedJobs: could not mark %s as ERROR: %s", row.get("id"), ex)
            continue
        if row.get("jobType") == "connection.bootstrap":
            payload = row.get("payload") or {}
            if payload.get("connectionId"):
                try:
                    newJob = BackgroundJob(
                        jobType="connection.bootstrap",
                        payload=payload,
                        triggeredBy="recovery.requeue",
                    )
                    record = db.recordCreate(BackgroundJob, _serialiseDatetimes(newJob.model_dump()))
                    asyncio.create_task(_runJob(record["id"]))
                    requeued += 1
                    logger.info(
                        "recoverInterruptedJobs: re-queued bootstrap for connectionId=%s (new jobId=%s)",
                        payload["connectionId"], record["id"],
                    )
                except Exception as reqEx:
                    logger.warning("recoverInterruptedJobs: re-queue failed for %s: %s", row.get("id"), reqEx)
    if count:
-        logger.warning("Recovered %d interrupted background job(s) after restart", count)
+        logger.warning("Recovered %d interrupted background job(s) after restart (re-queued %d)", count, requeued)
    return count
--- a/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py
+++ b/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py
@ -77,6 +77,7 @@ class ContainerExtractor(Extractor):
        """Extract by recursively unpacking the container."""
        fileName = context.get("fileName", "archive")
        mimeType = context.get("mimeType", "application/octet-stream")
        cascadeDepth = context.get("_cascadeDepth", 0)
        rootId = makeId()
        parts: List[ContentPart] = [
@ -97,7 +98,7 @@ class ContainerExtractor(Extractor):
                parts.extend(lazy)
                return parts
-        state = {"totalSize": 0, "fileCount": 0}
+        state = {"totalSize": 0, "fileCount": 0, "cascadeDepth": cascadeDepth}
        try:
            childParts = _resolveContainerRecursive(
                fileBytes, mimeType, fileName, rootId, "", 0, state
@ -209,7 +210,12 @@ def _addFilePart(
    if extractor and not isinstance(extractor, ContainerExtractor):
        try:
-            childParts = extractor.extract(data, {"fileName": fileName, "mimeType": detectedMime})
+            cascadeDepth = state.get("cascadeDepth", 0)
            childParts = extractor.extract(data, {
                "fileName": fileName,
                "mimeType": detectedMime,
                "_cascadeDepth": cascadeDepth + 1,
            })
            for part in childParts:
                part.parentId = parentId
                if not part.metadata:
--- a/modules/serviceCenter/services/serviceExtraction/extractors/extractorEmail.py
+++ b/modules/serviceCenter/services/serviceExtraction/extractors/extractorEmail.py
@ -53,12 +53,13 @@ class EmailExtractor(Extractor):
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        fileName = context.get("fileName", "email")
        lower = (fileName or "").lower()
        depth = context.get("_cascadeDepth", 0)
        if lower.endswith(".msg"):
-            return self._extractMsg(fileBytes, fileName)
+            return self._extractMsg(fileBytes, fileName, depth)
-        return self._extractEml(fileBytes, fileName)
+        return self._extractEml(fileBytes, fileName, depth)
-    def _extractEml(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
+    def _extractEml(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]:
        """Parse standard EML (RFC 822) using stdlib email."""
        rootId = makeId()
        parts: List[ContentPart] = []
@ -91,7 +92,7 @@ class EmailExtractor(Extractor):
                attachName = part.get_filename() or "attachment"
                attachData = part.get_payload(decode=True)
                if attachData:
-                    parts.extend(_delegateAttachment(attachData, attachName, rootId))
+                    parts.extend(_delegateAttachment(attachData, attachName, rootId, depth))
                continue
            if contentType == "text/plain":
@ -113,7 +114,7 @@ class EmailExtractor(Extractor):
        return parts
-    def _extractMsg(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
+    def _extractMsg(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]:
        """Parse Outlook MSG files using extract-msg (optional)."""
        rootId = makeId()
        parts: List[ContentPart] = []
@ -179,7 +180,7 @@ class EmailExtractor(Extractor):
            attachName = getattr(attachment, "longFilename", None) or getattr(attachment, "shortFilename", None) or "attachment"
            attachData = getattr(attachment, "data", None)
            if attachData:
-                parts.extend(_delegateAttachment(attachData, attachName, rootId))
+                parts.extend(_delegateAttachment(attachData, attachName, rootId, depth))
        try:
            msgFile.close()
@ -199,18 +200,39 @@ def _buildHeaderText(msg) -> str:
    return "\n".join(lines)
-def _delegateAttachment(attachData: bytes, attachName: str, parentId: str) -> List[ContentPart]:
+_MAX_CASCADE_DEPTH = 10
-    """Delegate an attachment to the appropriate type-specific extractor."""
+
 def _delegateAttachment(attachData: bytes, attachName: str, parentId: str, depth: int = 0) -> List[ContentPart]:
    """Delegate an attachment to the appropriate type-specific extractor.
    Passes ``_cascadeDepth`` through the context so nested Email→Container→Email
    chains share a global depth counter and don't recurse infinitely.
    """
    if depth >= _MAX_CASCADE_DEPTH:
        logger.warning(f"Cascade depth {depth} reached for {attachName}, skipping extraction")
        import base64
        encodedData = base64.b64encode(attachData).decode("utf-8") if attachData else ""
        return [ContentPart(
            id=makeId(), parentId=parentId, label=attachName,
            typeGroup="binary", mimeType="application/octet-stream",
            data=encodedData,
            metadata={"size": len(attachData), "emailAttachment": attachName, "cascadeDepthExceeded": True},
        )]
    guessedMime, _ = mimetypes.guess_type(attachName)
    detectedMime = guessedMime or "application/octet-stream"
-    from ..subRegistry import ExtractorRegistry
+    from ..subRegistry import getExtractorRegistry
-    registry = ExtractorRegistry()
+    registry = getExtractorRegistry()
    extractor = registry.resolve(detectedMime, attachName)
-    if extractor and not isinstance(extractor, EmailExtractor):
+    if extractor:
        try:
-            childParts = extractor.extract(attachData, {"fileName": attachName, "mimeType": detectedMime})
+            childParts = extractor.extract(attachData, {
                "fileName": attachName,
                "mimeType": detectedMime,
                "_cascadeDepth": depth + 1,
            })
            for part in childParts:
                part.parentId = parentId
                if not part.metadata:
--- a/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py
@ -33,6 +33,7 @@ class ExtractionService:
        self._interfaceDbComponent = getComponentInterface(
            context.user,
            mandateId=context.mandate_id,
            featureInstanceId=context.feature_instance_id,
        )
        self._extractorRegistry = getExtractorRegistry()
        if ExtractionService._sharedChunkerRegistry is None:
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py
@ -122,21 +122,54 @@ def _onConnectionRevoked(
    )
 _SOURCE_TYPE_MAP = {
    "msft": {
        "sharepoint": ("sharepointFolder", "onedriveFolder"),
        "outlook": ("outlookFolder", "calendarFolder", "contactFolder"),
    },
    "google": {
        "drive": ("googleDriveFolder",),
        "gmail": ("gmailFolder",),
    },
    "clickup": {
        "clickup": ("clickupList",),
    },
    "infomaniak": {
        "kdrive": ("kdriveFolder",),
    },
 }
 def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list] = None):
    """Load DataSource rows with ragIndexEnabled=true for a connection.
    If dataSourceIds is provided (mini-bootstrap), filter to only those IDs.
    """
    from modules.interfaces.interfaceDbApp import getRootInterface
    from modules.datamodels.datamodelDataSource import DataSource
    rootIf = getRootInterface()
    allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
    if dataSourceIds:
        return [ds for ds in allDs if ds.get("id") in dataSourceIds and ds.get("ragIndexEnabled")]
    return [ds for ds in allDs if ds.get("ragIndexEnabled")]
 async def _bootstrapJobHandler(
    job: Dict[str, Any],
    progressCb,
 ) -> Dict[str, Any]:
-    """Dispatch bootstrap by authority. Each authority runs its own sub-bootstraps."""
+    """Dispatch bootstrap by authority, iterating only over ragIndexEnabled DataSources."""
    payload = job.get("payload") or {}
    connectionId = payload.get("connectionId")
    authority = (payload.get("authority") or "").lower()
    dataSourceIds = payload.get("dataSourceIds")
    if not connectionId:
        raise ValueError("connection.bootstrap requires payload.connectionId")
    progressCb(5, f"resolving {authority} connection")
-    # Defensive consent check: if the connection has since disabled knowledge ingestion
+    # Defensive consent check
    # (e.g. user toggled setting after the job was enqueued), skip all walkers.
    try:
        from modules.interfaces.interfaceDbApp import getRootInterface
        _root = getRootInterface()
@ -156,6 +189,21 @@ async def _bootstrapJobHandler(
    except Exception as _guardErr:
        logger.debug("Could not load connection for consent guard: %s", _guardErr)
    # Load only ragIndexEnabled DataSources for this connection
    dataSources = _loadRagEnabledDataSources(connectionId, dataSourceIds)
    if not dataSources:
        logger.info(
            "ingestion.connection.bootstrap.skipped — no rag-enabled DataSources connectionId=%s",
            connectionId,
            extra={
                "event": "ingestion.connection.bootstrap.skipped",
                "connectionId": connectionId,
                "authority": authority,
                "reason": "no_data_sources",
            },
        )
        return {"connectionId": connectionId, "authority": authority, "skipped": True, "reason": "no_data_sources"}
    def _normalize(res: Any, label: str) -> Dict[str, Any]:
        if isinstance(res, Exception):
            logger.error(
@ -165,6 +213,10 @@ async def _bootstrapJobHandler(
            return {"error": str(res)}
        return res or {}
    def _filterDs(walkerKey: str) -> list:
        sourceTypes = _SOURCE_TYPE_MAP.get(authority, {}).get(walkerKey, ())
        return [ds for ds in dataSources if ds.get("sourceType") in sourceTypes]
    if authority == "msft":
        from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
            bootstrapSharepoint,
@ -174,9 +226,14 @@ async def _bootstrapJobHandler(
        )
        progressCb(10, "sharepoint + outlook")
        spDs = _filterDs("sharepoint")
        olDs = _filterDs("outlook")
        async def _noopResult():
            return {"skipped": True, "reason": "no_datasources"}
        spResult, olResult = await asyncio.gather(
-            bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb),
+            bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb, dataSources=spDs) if spDs else _noopResult(),
-            bootstrapOutlook(connectionId=connectionId, progressCb=progressCb),
+            bootstrapOutlook(connectionId=connectionId, progressCb=progressCb, dataSources=olDs) if olDs else _noopResult(),
            return_exceptions=True,
        )
        return {
@ -195,9 +252,14 @@ async def _bootstrapJobHandler(
        )
        progressCb(10, "drive + gmail")
        gdDs = _filterDs("drive")
        gmDs = _filterDs("gmail")
        async def _noopResult():
            return {"skipped": True, "reason": "no_datasources"}
        gdResult, gmResult = await asyncio.gather(
-            bootstrapGdrive(connectionId=connectionId, progressCb=progressCb),
+            bootstrapGdrive(connectionId=connectionId, progressCb=progressCb, dataSources=gdDs) if gdDs else _noopResult(),
-            bootstrapGmail(connectionId=connectionId, progressCb=progressCb),
+            bootstrapGmail(connectionId=connectionId, progressCb=progressCb, dataSources=gmDs) if gmDs else _noopResult(),
            return_exceptions=True,
        )
        return {
@ -213,7 +275,8 @@ async def _bootstrapJobHandler(
        )
        progressCb(10, "clickup tasks")
-        cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb)
+        cuDs = _filterDs("clickup")
        cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb, dataSources=cuDs) if cuDs else {"skipped": True, "reason": "no_datasources"}
        return {
            "connectionId": connectionId,
            "authority": authority,
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorPrefs.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorPrefs.py
@ -9,7 +9,7 @@ is None).
 from __future__ import annotations
 import logging
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 logger = logging.getLogger(__name__)
@ -21,10 +21,11 @@ _DEFAULT_CLICKUP_SCOPE = "title_description"
@dataclass
 class ConnectionIngestionPrefs:
-    """Parsed per-connection preferences for knowledge ingestion walkers."""
+    """Parsed per-connection preferences for knowledge ingestion walkers.
-    # PII
+    Neutralization is now controlled per DataSource.neutralize (not here).
-    neutralizeBeforeEmbed: bool = False
+    Surface toggles are obsolete — walker iterates only over ragIndexEnabled DataSources.
    """
    # Mail (Outlook + Gmail)
    mailContentDepth: str = _DEFAULT_MAIL_DEPTH          # "metadata" | "snippet" | "full"
@ -32,18 +33,11 @@ class ConnectionIngestionPrefs:
    # Files (Drive / SharePoint / OneDrive)
    filesIndexBinaries: bool = True
    mimeAllowlist: List[str] = field(default_factory=list)  # empty = all allowed
    # ClickUp
    clickupScope: str = _DEFAULT_CLICKUP_SCOPE  # "titles" | "title_description" | "with_comments"
    clickupIndexAttachments: bool = False
    # Per-authority surface toggles (default everything on)
    gmailEnabled: bool = True
    driveEnabled: bool = True
    sharepointEnabled: bool = True
    outlookEnabled: bool = True
    # Time window
    maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS  # 0 = no limit
@ -78,22 +72,12 @@ def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs:
            v = raw.get(key)
            return int(v) if isinstance(v, int) else default
        surface = raw.get("surfaceToggles") or {}
        google_surf = surface.get("google") or {}
        msft_surf = surface.get("msft") or {}
        return ConnectionIngestionPrefs(
            neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False),
            mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH),
            mailIndexAttachments=_bool("mailIndexAttachments", False),
            filesIndexBinaries=_bool("filesIndexBinaries", True),
            mimeAllowlist=list(raw.get("mimeAllowlist") or []),
            clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE),
            clickupIndexAttachments=_bool("clickupIndexAttachments", False),
            gmailEnabled=bool(google_surf.get("gmail", True)),
            driveEnabled=bool(google_surf.get("drive", True)),
            sharepointEnabled=bool(msft_surf.get("sharepoint", True)),
            outlookEnabled=bool(msft_surf.get("outlook", True)),
            maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS),
        )
    except Exception as exc:
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py
@ -23,7 +23,7 @@ import logging
 import time
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta, timezone
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Dict, List, Optional
 logger = logging.getLogger(__name__)
@ -150,8 +150,6 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -
                "data": description,
                "contextRef": {"part": "description"},
            })
        # text_content is ClickUp's rendered-markdown version; include if it adds
        # something beyond the plain description (common for bullet lists, checklists).
        textContent = _truncate(task.get("text_content"), limits.maxDescriptionChars)
        if textContent and textContent != description:
            parts.append({
@ -166,33 +164,35 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -
 async def bootstrapClickup(
    connectionId: str,
    *,
-    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
+    dataSources: Optional[List[Dict[str, Any]]] = None,
    progressCb: Optional[Any] = None,
    adapter: Any = None,
    connection: Any = None,
    knowledgeService: Any = None,
    limits: Optional[ClickupBootstrapLimits] = None,
 ) -> Dict[str, Any]:
-    """Walk workspaces → lists → tasks and ingest each task as a virtual doc."""
+    """Walk workspaces → lists → tasks and ingest each task as a virtual doc.
-    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
+
-    prefs = loadConnectionPrefs(connectionId)
+    Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
    Each DataSource defines the neutralize policy for its subtree.
    """
    if not dataSources:
        return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
    if not limits:
-        limits = ClickupBootstrapLimits(
+        limits = ClickupBootstrapLimits()
            maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
            neutralize=prefs.neutralizeBeforeEmbed,
            clickupScope=prefs.clickupScope,
        )
    startMs = time.time()
    result = ClickupBootstrapResult(connectionId=connectionId)
    logger.info(
-        "ingestion.connection.bootstrap.started part=clickup connectionId=%s",
+        "ingestion.connection.bootstrap.started part=clickup connectionId=%s dataSources=%d",
-        connectionId,
+        connectionId, len(dataSources),
        extra={
            "event": "ingestion.connection.bootstrap.started",
            "part": "clickup",
            "connectionId": connectionId,
            "dataSourceCount": len(dataSources),
        },
    )
@ -215,30 +215,56 @@ async def bootstrapClickup(
        return _finalizeResult(connectionId, result, startMs)
    teams = (teamsResp or {}).get("teams") or []
-    for team in teams[: limits.maxWorkspaces]:
+
    cancelled = False
    for ds in dataSources:
        if result.indexed + result.skippedDuplicate >= limits.maxTasks:
            break
-        teamId = str(team.get("id", "") or "")
+        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
-        if not teamId:
+            cancelled = True
-            continue
+            break
        result.workspaces += 1
        try:
            await _walkTeam(
                svc=svc,
                knowledgeService=knowledgeService,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                team=team,
                limits=limits,
                result=result,
                progressCb=progressCb,
            )
        except Exception as exc:
            logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
            result.errors.append(f"team({teamId}): {exc}")
-    return _finalizeResult(connectionId, result, startMs)
+        dsId = ds.get("id", "")
        dsNeutralize = ds.get("neutralize", False)
        dsLimits = ClickupBootstrapLimits(
            maxTasks=limits.maxTasks,
            maxWorkspaces=limits.maxWorkspaces,
            maxListsPerWorkspace=limits.maxListsPerWorkspace,
            maxDescriptionChars=limits.maxDescriptionChars,
            maxAgeDays=limits.maxAgeDays,
            includeClosed=limits.includeClosed,
            neutralize=dsNeutralize,
            clickupScope=limits.clickupScope,
        )
        for team in teams[:dsLimits.maxWorkspaces]:
            if result.indexed + result.skippedDuplicate >= dsLimits.maxTasks:
                break
            teamId = str(team.get("id", "") or "")
            if not teamId:
                continue
            result.workspaces += 1
            try:
                await _walkTeam(
                    svc=svc,
                    knowledgeService=knowledgeService,
                    connectionId=connectionId,
                    mandateId=mandateId,
                    userId=userId,
                    team=team,
                    limits=dsLimits,
                    result=result,
                    progressCb=progressCb,
                    dataSourceId=dsId,
                )
            except Exception as exc:
                logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
                result.errors.append(f"team({teamId}): {exc}")
    finalResult = _finalizeResult(connectionId, result, startMs)
    if cancelled:
        finalResult["cancelled"] = True
    return finalResult
 async def _resolveDependencies(connectionId: str):
@ -280,8 +306,12 @@ async def _walkTeam(
    team: Dict[str, Any],
    limits: ClickupBootstrapLimits,
    result: ClickupBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
    dataSourceId: str = "",
 ) -> None:
    if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
        return
    teamId = str(team.get("id", "") or "")
    spacesResp = await svc.getSpaces(teamId)
    spaces = (spacesResp or {}).get("spaces") or []
@ -294,14 +324,12 @@ async def _walkTeam(
        if not spaceId:
            continue
        # Folderless lists directly under the space
        folderless = await svc.getFolderlessLists(spaceId)
        for lst in (folderless or {}).get("lists") or []:
            if len(listsCollected) >= limits.maxListsPerWorkspace:
                break
            listsCollected.append({**lst, "_space": space})
        # Lists inside folders
        foldersResp = await svc.getFolders(spaceId)
        for folder in (foldersResp or {}).get("folders") or []:
            if len(listsCollected) >= limits.maxListsPerWorkspace:
@ -318,6 +346,8 @@ async def _walkTeam(
    for lst in listsCollected:
        if result.indexed + result.skippedDuplicate >= limits.maxTasks:
            return
        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
            return
        result.lists += 1
        await _walkList(
            svc=svc,
@ -330,6 +360,7 @@ async def _walkTeam(
            limits=limits,
            result=result,
            progressCb=progressCb,
            dataSourceId=dataSourceId,
        )
@ -344,13 +375,16 @@ async def _walkList(
    lst: Dict[str, Any],
    limits: ClickupBootstrapLimits,
    result: ClickupBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
    dataSourceId: str = "",
 ) -> None:
    listId = str(lst.get("id", "") or "")
    if not listId:
        return
    page = 0
    while result.indexed + result.skippedDuplicate < limits.maxTasks:
        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
            return
        resp = await svc.getTasksInList(
            listId,
            page=page,
@ -371,7 +405,6 @@ async def _walkList(
            if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
                result.skippedPolicy += 1
                continue
            # Inject the list/folder/space metadata we already loaded.
            task["list"] = task.get("list") or {"id": listId, "name": lst.get("name")}
            task["folder"] = task.get("folder") or lst.get("_folder") or {}
            task["space"] = task.get("space") or lst.get("_space") or {}
@ -385,9 +418,10 @@ async def _walkList(
                limits=limits,
                result=result,
                progressCb=progressCb,
                dataSourceId=dataSourceId,
            )
-        if len(tasks) < 100:  # ClickUp page-size hint: fewer than 100 => last page
+        if len(tasks) < 100:
            return
        page += 1
@ -402,7 +436,8 @@ async def _ingestTask(
    task: Dict[str, Any],
    limits: ClickupBootstrapLimits,
    result: ClickupBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
    dataSourceId: str = "",
 ) -> None:
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -431,6 +466,7 @@ async def _ingestTask(
                neutralize=limits.neutralize,
                provenance={
                    "connectionId": connectionId,
                    "dataSourceId": dataSourceId,
                    "authority": "clickup",
                    "service": "clickup",
                    "externalItemId": taskId,
@ -456,8 +492,10 @@ async def _ingestTask(
    else:
        result.failed += 1
-    if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
+    processed = result.indexed + result.skippedDuplicate
-        processed = result.indexed + result.skippedDuplicate
+    if progressCb is not None and processed % 50 == 0:
        if hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
            return
        try:
            progressCb(
                min(90, 10 + int(80 * processed / max(1, limits.maxTasks))),
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py
@ -12,6 +12,7 @@ via export), runs the standard extraction pipeline and routes results through
 from __future__ import annotations
 import asyncio
 import hashlib
 import logging
 import time
@ -30,7 +31,6 @@ SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
 MAX_DEPTH_DEFAULT = 4
 MAX_AGE_DAYS_DEFAULT = 365
 # Google Drive uses virtual mime-types for folders and non-downloadable assets.
 FOLDER_MIME = "application/vnd.google-apps.folder"
@ -41,12 +41,8 @@ class GdriveBootstrapLimits:
    maxFileSize: int = MAX_FILE_SIZE_DEFAULT
    skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
    maxDepth: int = MAX_DEPTH_DEFAULT
    # Only ingest files modified within the last N days. None disables filter.
    maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
    # Pass-through to IngestionJob.neutralize
    neutralize: bool = False
    # Whether to skip binary/non-text files
    filesIndexBinaries: bool = True
@dataclass
@ -95,10 +91,8 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
    if not maxAgeDays:
        return True
    if not modifiedIso:
        # No timestamp -> be permissive (Drive native docs sometimes omit it on export).
        return True
    try:
        # Google returns RFC 3339 with `Z` or offset; python 3.11+ parses both.
        ts = datetime.fromisoformat(modifiedIso.replace("Z", "+00:00"))
    except Exception:
        return True
@ -111,34 +105,36 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
 async def bootstrapGdrive(
    connectionId: str,
    *,
-    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
+    dataSources: Optional[List[Dict[str, Any]]] = None,
    progressCb: Optional[Any] = None,
    adapter: Any = None,
    connection: Any = None,
    knowledgeService: Any = None,
    limits: Optional[GdriveBootstrapLimits] = None,
    runExtractionFn: Optional[Callable[..., Any]] = None,
 ) -> Dict[str, Any]:
-    """Walk My Drive starting from the virtual root folder."""
+    """Walk My Drive starting from the virtual root folder.
-    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
+
-    prefs = loadConnectionPrefs(connectionId)
+    Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
    Each DataSource defines the root path + neutralize policy for its subtree.
    """
    if not dataSources:
        return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
    if not limits:
-        limits = GdriveBootstrapLimits(
+        limits = GdriveBootstrapLimits()
            maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
            neutralize=prefs.neutralizeBeforeEmbed,
            filesIndexBinaries=prefs.filesIndexBinaries,
        )
    startMs = time.time()
    result = GdriveBootstrapResult(connectionId=connectionId)
    logger.info(
-        "ingestion.connection.bootstrap.started part=gdrive connectionId=%s",
+        "ingestion.connection.bootstrap.started part=gdrive connectionId=%s dataSources=%d",
-        connectionId,
+        connectionId, len(dataSources),
        extra={
            "event": "ingestion.connection.bootstrap.started",
            "part": "gdrive",
            "connectionId": connectionId,
            "dataSourceCount": len(dataSources),
        },
    )
@ -158,25 +154,51 @@ async def bootstrapGdrive(
    mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
    userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
-    try:
+    cancelled = False
-        await _walkFolder(
+    for ds in dataSources:
-            adapter=adapter,
+        if result.indexed + result.skippedDuplicate >= limits.maxItems:
-            knowledgeService=knowledgeService,
+            break
-            runExtractionFn=runExtractionFn,
+        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
-            connectionId=connectionId,
+            cancelled = True
-            mandateId=mandateId,
+            break
            userId=userId,
            folderPath="/",  # DriveAdapter.browse maps "" / "/" -> "root"
            depth=0,
            limits=limits,
            result=result,
            progressCb=progressCb,
        )
    except Exception as exc:
        logger.error("gdrive walk failed for %s: %s", connectionId, exc, exc_info=True)
        result.errors.append(f"walk: {exc}")
-    return _finalizeResult(connectionId, result, startMs)
+        dsPath = ds.get("path", "/")
        dsId = ds.get("id", "")
        dsNeutralize = ds.get("neutralize", False)
        dsMaxAgeDays = ds.get("maxAgeDays", limits.maxAgeDays)
        dsLimits = GdriveBootstrapLimits(
            maxItems=limits.maxItems,
            maxBytes=limits.maxBytes,
            maxFileSize=limits.maxFileSize,
            skipMimePrefixes=limits.skipMimePrefixes,
            maxDepth=limits.maxDepth,
            maxAgeDays=dsMaxAgeDays,
            neutralize=dsNeutralize,
        )
        try:
            await _walkFolder(
                adapter=adapter,
                knowledgeService=knowledgeService,
                runExtractionFn=runExtractionFn,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                folderPath=dsPath,
                depth=0,
                limits=dsLimits,
                result=result,
                progressCb=progressCb,
                dataSourceId=dsId,
            )
        except Exception as exc:
            logger.error("gdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
            result.errors.append(f"walk({dsPath}): {exc}")
    finalResult = _finalizeResult(connectionId, result, startMs)
    if cancelled:
        finalResult["cancelled"] = True
    return finalResult
 async def _resolveDependencies(connectionId: str):
@ -220,10 +242,13 @@ async def _walkFolder(
    depth: int,
    limits: GdriveBootstrapLimits,
    result: GdriveBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
    dataSourceId: str = "",
 ) -> None:
    if depth > limits.maxDepth:
        return
    if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
        return
    try:
        entries = await adapter.browse(folderPath)
    except Exception as exc:
@ -236,6 +261,8 @@ async def _walkFolder(
            return
        if result.bytesProcessed >= limits.maxBytes:
            return
        if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
            return
        entryPath = getattr(entry, "path", "") or ""
        metadata = getattr(entry, "metadata", {}) or {}
@ -254,6 +281,7 @@ async def _walkFolder(
                limits=limits,
                result=result,
                progressCb=progressCb,
                dataSourceId=dataSourceId,
            )
            continue
@ -288,6 +316,7 @@ async def _walkFolder(
            limits=limits,
            result=result,
            progressCb=progressCb,
            dataSourceId=dataSourceId,
        )
@ -306,7 +335,8 @@ async def _ingestOne(
    revision: Optional[str],
    limits: GdriveBootstrapLimits,
    result: GdriveBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
    dataSourceId: str = "",
 ) -> None:
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -321,14 +351,13 @@ async def _ingestOne(
        result.errors.append(f"download({entryPath}): {exc}")
        return
    # Adapter.download returns raw bytes today; guard DownloadResult shape too.
    fileBytes: bytes
    if isinstance(downloaded, (bytes, bytearray)):
        fileBytes = bytes(downloaded)
    else:
        fileBytes = bytes(getattr(downloaded, "data", b"") or b"")
        if getattr(downloaded, "mimeType", None):
-            mimeType = downloaded.mimeType  # export may have changed the type
+            mimeType = downloaded.mimeType
    if not fileBytes:
        result.failed += 1
        return
@ -354,6 +383,15 @@ async def _ingestOne(
        result.skippedPolicy += 1
        return
    provenance: Dict[str, Any] = {
        "connectionId": connectionId,
        "dataSourceId": dataSourceId,
        "authority": "google",
        "service": "drive",
        "externalItemId": externalItemId,
        "entryPath": entryPath,
        "tier": "body",
    }
    try:
        handle = await knowledgeService.requestIngestion(
            IngestionJob(
@ -366,14 +404,7 @@ async def _ingestOne(
                contentObjects=contentObjects,
                contentVersion=revision,
                neutralize=limits.neutralize,
-                provenance={
+                provenance=provenance,
                    "connectionId": connectionId,
                    "authority": "google",
                    "service": "drive",
                    "externalItemId": externalItemId,
                    "entryPath": entryPath,
                    "tier": "body",
                },
            )
        )
    except Exception as exc:
@ -388,6 +419,8 @@ async def _ingestOne(
        result.indexed += 1
    else:
        result.failed += 1
        if handle.error:
            result.errors.append(f"ingest({entryPath}): {handle.error}")
    if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
        processed = result.indexed + result.skippedDuplicate
@ -411,6 +444,8 @@ async def _ingestOne(
            },
        )
    await asyncio.sleep(0)
 def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
    durationMs = int((time.time() - startMs) * 1000)
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py
@ -175,35 +175,36 @@ def _buildContentObjects(
 async def bootstrapGmail(
    connectionId: str,
    *,
-    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
+    dataSources: Optional[List[Dict[str, Any]]] = None,
    progressCb: Optional[Any] = None,
    adapter: Any = None,
    connection: Any = None,
    knowledgeService: Any = None,
    limits: Optional[GmailBootstrapLimits] = None,
    googleGetFn: Optional[Callable[..., Any]] = None,
 ) -> Dict[str, Any]:
-    """Enumerate Gmail labels (INBOX + SENT default) and ingest messages."""
+    """Enumerate Gmail labels (INBOX + SENT default) and ingest messages.
-    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
+
-    prefs = loadConnectionPrefs(connectionId)
+    Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
    Each DataSource defines the neutralize policy for its scope.
    """
    if not dataSources:
        return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
    if not limits:
-        limits = GmailBootstrapLimits(
+        limits = GmailBootstrapLimits()
            includeAttachments=prefs.mailIndexAttachments,
            maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
            mailContentDepth=prefs.mailContentDepth,
            neutralize=prefs.neutralizeBeforeEmbed,
        )
    startMs = time.time()
    result = GmailBootstrapResult(connectionId=connectionId)
    logger.info(
-        "ingestion.connection.bootstrap.started part=gmail connectionId=%s",
+        "ingestion.connection.bootstrap.started part=gmail connectionId=%s dataSources=%d",
-        connectionId,
+        connectionId, len(dataSources),
        extra={
            "event": "ingestion.connection.bootstrap.started",
            "part": "gmail",
            "connectionId": connectionId,
            "dataSourceCount": len(dataSources),
        },
    )
@ -221,26 +222,51 @@ async def bootstrapGmail(
    mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
    userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
-    for labelId in limits.labels:
+    cancelled = False
    for ds in dataSources:
        if result.indexed + result.skippedDuplicate >= limits.maxMessages:
            break
-        try:
+        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
-            await _ingestLabel(
+            cancelled = True
-                googleGetFn=googleGetFn,
+            break
                knowledgeService=knowledgeService,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                labelId=labelId,
                limits=limits,
                result=result,
                progressCb=progressCb,
            )
        except Exception as exc:
            logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
            result.errors.append(f"label({labelId}): {exc}")
-    return _finalizeResult(connectionId, result, startMs)
+        dsId = ds.get("id", "")
        dsNeutralize = ds.get("neutralize", False)
        dsLimits = GmailBootstrapLimits(
            maxMessages=limits.maxMessages,
            labels=limits.labels,
            maxBodyChars=limits.maxBodyChars,
            includeAttachments=limits.includeAttachments,
            maxAttachmentBytes=limits.maxAttachmentBytes,
            maxAgeDays=limits.maxAgeDays,
            mailContentDepth=limits.mailContentDepth,
            neutralize=dsNeutralize,
        )
        for labelId in dsLimits.labels:
            if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages:
                break
            try:
                await _ingestLabel(
                    googleGetFn=googleGetFn,
                    knowledgeService=knowledgeService,
                    connectionId=connectionId,
                    mandateId=mandateId,
                    userId=userId,
                    labelId=labelId,
                    limits=dsLimits,
                    result=result,
                    progressCb=progressCb,
                    dataSourceId=dsId,
                )
            except Exception as exc:
                logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
                result.errors.append(f"label({labelId}): {exc}")
    finalResult = _finalizeResult(connectionId, result, startMs)
    if cancelled:
        finalResult["cancelled"] = True
    return finalResult
 async def _resolveDependencies(connectionId: str):
@ -282,7 +308,8 @@ async def _ingestLabel(
    labelId: str,
    limits: GmailBootstrapLimits,
    result: GmailBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
    dataSourceId: str = "",
 ) -> None:
    remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
    if remaining <= 0:
@ -316,6 +343,8 @@ async def _ingestLabel(
        for stub in messageStubs:
            if result.indexed + result.skippedDuplicate >= limits.maxMessages:
                break
            if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
                return
            msgId = stub.get("id")
            if not msgId:
                continue
@ -337,6 +366,7 @@ async def _ingestLabel(
                limits=limits,
                result=result,
                progressCb=progressCb,
                dataSourceId=dataSourceId,
            )
        nextPageToken = page.get("nextPageToken")
@ -355,7 +385,8 @@ async def _ingestMessage(
    message: Dict[str, Any],
    limits: GmailBootstrapLimits,
    result: GmailBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
    dataSourceId: str = "",
 ) -> None:
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -386,6 +417,7 @@ async def _ingestMessage(
                neutralize=limits.neutralize,
                provenance={
                    "connectionId": connectionId,
                    "dataSourceId": dataSourceId,
                    "authority": "google",
                    "service": "gmail",
                    "externalItemId": messageId,
@ -420,6 +452,7 @@ async def _ingestMessage(
                parentSyntheticId=syntheticId,
                limits=limits,
                result=result,
                dataSourceId=dataSourceId,
            )
        except Exception as exc:
            logger.warning("gmail attachments %s failed: %s", messageId, exc)
@ -461,6 +494,7 @@ async def _ingestAttachments(
    parentSyntheticId: str,
    limits: GmailBootstrapLimits,
    result: GmailBootstrapResult,
    dataSourceId: str = "",
 ) -> None:
    """Child ingestion jobs for file attachments. Skips inline images (cid: refs)."""
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -561,6 +595,7 @@ async def _ingestAttachments(
                    contentObjects=contentObjects,
                    provenance={
                        "connectionId": connectionId,
                        "dataSourceId": dataSourceId,
                        "authority": "google",
                        "service": "gmail",
                        "parentId": parentSyntheticId,
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py
@ -18,7 +18,7 @@ import hashlib
 import logging
 import time
 from dataclasses import dataclass, field
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Dict, List, Optional
 from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
@ -139,34 +139,35 @@ def _buildContentObjects(
 async def bootstrapOutlook(
    connectionId: str,
    *,
-    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
+    dataSources: Optional[List[Dict[str, Any]]] = None,
    progressCb: Optional[Any] = None,
    adapter: Any = None,
    connection: Any = None,
    knowledgeService: Any = None,
    limits: Optional[OutlookBootstrapLimits] = None,
 ) -> Dict[str, Any]:
-    """Enumerate Outlook folders (inbox + sent by default) and ingest messages."""
+    """Enumerate Outlook folders (inbox + sent by default) and ingest messages.
-    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
+
-    prefs = loadConnectionPrefs(connectionId)
+    Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
    Each DataSource defines the neutralize policy for its messages.
    """
    if not dataSources:
        return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
    if not limits:
-        limits = OutlookBootstrapLimits(
+        limits = OutlookBootstrapLimits()
            includeAttachments=prefs.mailIndexAttachments,
            maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
            mailContentDepth=prefs.mailContentDepth,
            neutralize=prefs.neutralizeBeforeEmbed,
        )
    startMs = time.time()
    result = OutlookBootstrapResult(connectionId=connectionId)
    logger.info(
-        "ingestion.connection.bootstrap.started part=outlook connectionId=%s",
+        "ingestion.connection.bootstrap.started part=outlook connectionId=%s dataSources=%d",
-        connectionId,
+        connectionId, len(dataSources),
        extra={
            "event": "ingestion.connection.bootstrap.started",
            "part": "outlook",
            "connectionId": connectionId,
            "dataSourceCount": len(dataSources),
        },
    )
@ -176,27 +177,52 @@ async def bootstrapOutlook(
    mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
    userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
-    folderIds = await _selectFolderIds(adapter, limits)
+    cancelled = False
-    for folderId in folderIds:
+    for ds in dataSources:
        if result.indexed + result.skippedDuplicate >= limits.maxMessages:
            break
-        try:
+        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
-            await _ingestFolder(
+            cancelled = True
-                adapter=adapter,
+            break
                knowledgeService=knowledgeService,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                folderId=folderId,
                limits=limits,
                result=result,
                progressCb=progressCb,
            )
        except Exception as exc:
            logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
            result.errors.append(f"folder({folderId}): {exc}")
-    return _finalizeResult(connectionId, result, startMs)
+        dsId = ds.get("id", "")
        dsNeutralize = ds.get("neutralize", False)
        dsLimits = OutlookBootstrapLimits(
            maxMessages=limits.maxMessages,
            maxFolders=limits.maxFolders,
            maxBodyChars=limits.maxBodyChars,
            includeAttachments=limits.includeAttachments,
            maxAttachmentBytes=limits.maxAttachmentBytes,
            maxAgeDays=limits.maxAgeDays,
            mailContentDepth=limits.mailContentDepth,
            neutralize=dsNeutralize,
        )
        folderIds = await _selectFolderIds(adapter, dsLimits)
        for folderId in folderIds:
            if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages:
                break
            try:
                await _ingestFolder(
                    adapter=adapter,
                    knowledgeService=knowledgeService,
                    connectionId=connectionId,
                    mandateId=mandateId,
                    userId=userId,
                    folderId=folderId,
                    limits=dsLimits,
                    result=result,
                    progressCb=progressCb,
                    dataSourceId=dsId,
                )
            except Exception as exc:
                logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
                result.errors.append(f"folder({folderId}): {exc}")
    finalResult = _finalizeResult(connectionId, result, startMs)
    if cancelled:
        finalResult["cancelled"] = True
    return finalResult
 async def _resolveDependencies(connectionId: str):
@ -266,8 +292,12 @@ async def _ingestFolder(
    folderId: str,
    limits: OutlookBootstrapLimits,
    result: OutlookBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
    dataSourceId: str = "",
 ) -> None:
    if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
        return
    remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
    if remaining <= 0:
        return
@ -307,6 +337,8 @@ async def _ingestFolder(
        for message in page.get("value", []) or []:
            if result.indexed + result.skippedDuplicate >= limits.maxMessages:
                break
            if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
                return
            await _ingestMessage(
                adapter=adapter,
                knowledgeService=knowledgeService,
@ -317,6 +349,7 @@ async def _ingestFolder(
                limits=limits,
                result=result,
                progressCb=progressCb,
                dataSourceId=dataSourceId,
            )
        nextLink = page.get("@odata.nextLink")
@ -338,7 +371,8 @@ async def _ingestMessage(
    message: Dict[str, Any],
    limits: OutlookBootstrapLimits,
    result: OutlookBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
    dataSourceId: str = "",
 ) -> None:
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -369,6 +403,7 @@ async def _ingestMessage(
                neutralize=limits.neutralize,
                provenance={
                    "connectionId": connectionId,
                    "dataSourceId": dataSourceId,
                    "authority": "msft",
                    "service": "outlook",
                    "externalItemId": messageId,
@ -402,6 +437,7 @@ async def _ingestMessage(
                parentSyntheticId=syntheticId,
                limits=limits,
                result=result,
                dataSourceId=dataSourceId,
            )
        except Exception as exc:
            logger.warning("outlook attachments %s failed: %s", messageId, exc)
@ -443,6 +479,7 @@ async def _ingestAttachments(
    parentSyntheticId: str,
    limits: OutlookBootstrapLimits,
    result: OutlookBootstrapResult,
    dataSourceId: str = "",
 ) -> None:
    """Child ingestion jobs for file attachments (skip inline & oversized)."""
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -531,6 +568,7 @@ async def _ingestAttachments(
                    neutralize=limits.neutralize,
                    provenance={
                        "connectionId": connectionId,
                        "dataSourceId": dataSourceId,
                        "authority": "msft",
                        "service": "outlook",
                        "parentId": parentSyntheticId,
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py
@ -94,35 +94,36 @@ def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
 async def bootstrapSharepoint(
    connectionId: str,
    *,
-    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
+    dataSources: Optional[List[Dict[str, Any]]] = None,
    progressCb: Optional[Any] = None,
    adapter: Any = None,
    connection: Any = None,
    knowledgeService: Any = None,
    limits: Optional[SharepointBootstrapLimits] = None,
    runExtractionFn: Optional[Callable[..., Any]] = None,
 ) -> Dict[str, Any]:
-    """Enumerate SharePoint drives and ingest every reachable file via the façade.
+    """Enumerate SharePoint drives and ingest files via the facade.
-    Parameters allow injection for tests; production callers pass only
+    Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
-    `connectionId` (and optionally a progressCb) and everything else is
+    Each DataSource defines the root path + neutralize policy for its subtree.
    resolved against the registered services.
    """
-    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
+    if not dataSources:
-    prefs = loadConnectionPrefs(connectionId)
+        return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
    if not limits:
-        limits = SharepointBootstrapLimits(neutralize=prefs.neutralizeBeforeEmbed)
+        limits = SharepointBootstrapLimits()
    startMs = time.time()
    result = SharepointBootstrapResult(connectionId=connectionId)
    logger.info(
-        "ingestion.connection.bootstrap.started part=sharepoint connectionId=%s",
+        "ingestion.connection.bootstrap.started part=sharepoint connectionId=%s dataSources=%d",
-        connectionId,
+        connectionId, len(dataSources),
        extra={
            "event": "ingestion.connection.bootstrap.started",
            "part": "sharepoint",
            "connectionId": connectionId,
            "dataSourceCount": len(dataSources),
        },
    )
@ -142,17 +143,27 @@ async def bootstrapSharepoint(
    mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
    userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
-    try:
+    cancelled = False
-        sites = await adapter.browse("/", limit=limits.maxSites)
+    for ds in dataSources:
    except Exception as exc:
        logger.error("sharepoint site discovery failed for %s: %s", connectionId, exc, exc_info=True)
        result.errors.append(f"site_discovery: {exc}")
        return _finalizeResult(connectionId, result, startMs)
    for site in sites[: limits.maxSites]:
        if result.indexed + result.skippedDuplicate >= limits.maxItems:
            break
-        sitePath = getattr(site, "path", "") or ""
+        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
            cancelled = True
            break
        dsPath = ds.get("path", "")
        dsId = ds.get("id", "")
        dsNeutralize = ds.get("neutralize", False)
        dsLimits = SharepointBootstrapLimits(
            maxItems=limits.maxItems,
            maxBytes=limits.maxBytes,
            maxFileSize=limits.maxFileSize,
            skipMimePrefixes=limits.skipMimePrefixes,
            maxDepth=limits.maxDepth,
            maxSites=limits.maxSites,
            neutralize=dsNeutralize,
        )
        try:
            await _walkFolder(
                adapter=adapter,
@ -161,17 +172,21 @@ async def bootstrapSharepoint(
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
-                folderPath=sitePath,
+                folderPath=dsPath,
                depth=0,
-                limits=limits,
+                limits=dsLimits,
                result=result,
                progressCb=progressCb,
                dataSourceId=dsId,
            )
        except Exception as exc:
-            logger.error("sharepoint walk failed for site %s: %s", sitePath, exc, exc_info=True)
+            logger.error("sharepoint walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
-            result.errors.append(f"walk({sitePath}): {exc}")
+            result.errors.append(f"walk({dsPath}): {exc}")
-    return _finalizeResult(connectionId, result, startMs)
+    finalResult = _finalizeResult(connectionId, result, startMs)
    if cancelled:
        finalResult["cancelled"] = True
    return finalResult
 async def _resolveDependencies(connectionId: str):
@ -221,10 +236,13 @@ async def _walkFolder(
    depth: int,
    limits: SharepointBootstrapLimits,
    result: SharepointBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
    dataSourceId: str = "",
 ) -> None:
    if depth > limits.maxDepth:
        return
    if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
        return
    try:
        entries = await adapter.browse(folderPath)
    except Exception as exc:
@ -237,6 +255,8 @@ async def _walkFolder(
            return
        if result.bytesProcessed >= limits.maxBytes:
            return
        if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
            return
        entryPath = getattr(entry, "path", "") or ""
        if getattr(entry, "isFolder", False):
@ -252,6 +272,7 @@ async def _walkFolder(
                limits=limits,
                result=result,
                progressCb=progressCb,
                dataSourceId=dataSourceId,
            )
            continue
@ -283,6 +304,7 @@ async def _walkFolder(
            limits=limits,
            result=result,
            progressCb=progressCb,
            dataSourceId=dataSourceId,
        )
@ -301,7 +323,8 @@ async def _ingestOne(
    revision: Optional[str],
    limits: SharepointBootstrapLimits,
    result: SharepointBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
    dataSourceId: str = "",
 ) -> None:
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@ -339,6 +362,7 @@ async def _ingestOne(
    provenance: Dict[str, Any] = {
        "connectionId": connectionId,
        "dataSourceId": dataSourceId,
        "authority": "msft",
        "service": "sharepoint",
        "externalItemId": externalItemId,
--- a/modules/serviceCenter/services/serviceKnowledge/subPolicyResolver.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subPolicyResolver.py
@ -0,0 +1,78 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Resolve effective policies (neutralize, ragIndexEnabled) for DataSource tree hierarchies.
 Tree-inheritance rule: nearest ancestor DataSource with an explicit value wins.
 If no ancestor has a value, the default (False) is used.
 """
 from __future__ import annotations
 import logging
 from typing import Any, Dict, List, Optional
 logger = logging.getLogger(__name__)
 def resolveEffectiveNeutralize(
    ds: Dict[str, Any],
    allDataSources: List[Dict[str, Any]],
 ) -> bool:
    """Compute effective neutralize by walking up the path tree.
    A DataSource at /sites/HR/Documents inherits from /sites/HR if
    that ancestor has neutralize=True and the child has no explicit override.
    """
    ownValue = ds.get("neutralize")
    if ownValue is not None and ownValue is not False:
        return True
    if ownValue is False:
        return False
    return _findAncestorPolicy(ds, allDataSources, "neutralize")
 def resolveEffectiveRagIndexEnabled(
    ds: Dict[str, Any],
    allDataSources: List[Dict[str, Any]],
 ) -> bool:
    """Compute effective ragIndexEnabled by walking up the path tree."""
    ownValue = ds.get("ragIndexEnabled")
    if ownValue is True:
        return True
    if ownValue is False:
        return False
    return _findAncestorPolicy(ds, allDataSources, "ragIndexEnabled")
 def _findAncestorPolicy(
    ds: Dict[str, Any],
    allDataSources: List[Dict[str, Any]],
    field: str,
 ) -> bool:
    """Walk ancestors (longest-prefix match) to find an inherited policy value."""
    dsPath = ds.get("path", "")
    connectionId = ds.get("connectionId", "")
    if not dsPath:
        return False
    ancestors = []
    for candidate in allDataSources:
        if candidate.get("id") == ds.get("id"):
            continue
        if candidate.get("connectionId") != connectionId:
            continue
        candidatePath = candidate.get("path", "")
        if not candidatePath:
            continue
        if dsPath.startswith(candidatePath) and len(candidatePath) < len(dsPath):
            ancestors.append(candidate)
    ancestors.sort(key=lambda a: len(a.get("path", "")), reverse=True)
    for ancestor in ancestors:
        val = ancestor.get(field)
        if val is True:
            return True
        if val is False:
            return False
    return False
--- a/modules/serviceCenter/services/serviceWeb/mainServiceWeb.py
+++ b/modules/serviceCenter/services/serviceWeb/mainServiceWeb.py
@ -98,7 +98,8 @@ class WebService:
            searchUrls = []
            searchResultsWithContent = []
            if needsSearch and (not allUrls or len(allUrls) < maxNumberPages):
-                self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content")
+                if operationId:
                    self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content")
                try:
                    searchUrls, searchResultsWithContent = await self._performWebSearch(
@ -113,16 +114,14 @@ class WebService:
                    searchUrls = []
                    searchResultsWithContent = []
                # Prioritize Tavily search URLs over AI-extracted URLs (they're more relevant)
                if searchUrls:
                    # Prepend Tavily URLs to the list (they're more relevant)
                    allUrls = searchUrls + allUrls
                    logger.info(f"Using {len(searchUrls)} Tavily URLs + {len(allUrls) - len(searchUrls)} other URLs = {len(allUrls)} total")
                else:
                    # If Tavily search failed, use AI-extracted URLs
                    logger.warning("Tavily search returned no URLs, using AI-extracted URLs only")
-                self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
+                if operationId:
                    self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
            # If we have search results (even without content), use them directly instead of crawling
            # Tavily search results are more relevant than generic AI-extracted URLs
--- a/modules/system/mainSystem.py
+++ b/modules/system/mainSystem.py
@ -144,6 +144,14 @@ NAVIGATION_SECTIONS = [
                        "path": "/automations",
                        "order": 30,
                    },
                    {
                        "id": "rag-inventory",
                        "objectKey": "ui.system.ragInventory",
                        "label": t("RAG-Inventar"),
                        "icon": "FaDatabase",
                        "path": "/rag-inventory",
                        "order": 35,
                    },
                    {
                        "id": "store",
                        "objectKey": "ui.system.store",
--- a/requirements.txt
+++ b/requirements.txt
@ -110,6 +110,9 @@ asyncpg==0.30.0
 ## Stripe payments
 stripe>=11.0.0
 ## Outlook MSG file extraction
 extract-msg>=0.55.0
 ## Geospatial libraries for STAC connector
 pyproj>=3.6.0  # For coordinate transformations (EPSG:2056 <-> EPSG:4326)
 shapely>=2.0.0  # For geometric operations (intersections, area calculations)
--- a/scripts/script_db_migrate_datasource_rag.py
+++ b/scripts/script_db_migrate_datasource_rag.py
@ -0,0 +1,88 @@
 #!/usr/bin/env python3
 """Migration: Rename DataSource.autoSync -> ragIndexEnabled, lastSynced -> lastIndexed.
 This is a one-off migration for the RAG consent & control unification.
 Safe to run multiple times (checks column existence before acting).
 Usage:
    python script_db_migrate_datasource_rag.py [--dry-run]
 """
 import os
 import sys
 import argparse
 import logging
 from pathlib import Path
 scriptPath = Path(__file__).resolve()
 gatewayPath = scriptPath.parent.parent
 sys.path.insert(0, str(gatewayPath))
 os.chdir(str(gatewayPath))
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
 logger = logging.getLogger(__name__)
 import psycopg2
 from modules.shared.configuration import APP_CONFIG
 def _getConnection():
    return psycopg2.connect(
        host=APP_CONFIG.get("DB_HOST", "localhost"),
        port=int(APP_CONFIG.get("DB_PORT", "5432")),
        database=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
        user=APP_CONFIG.get("DB_USER"),
        password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
    )
 def _columnExists(cur, table: str, column: str) -> bool:
    cur.execute(
        """SELECT 1 FROM information_schema.columns
           WHERE table_schema = 'public' AND table_name = %s AND column_name = %s""",
        (table, column),
    )
    return cur.fetchone() is not None
 def migrate(dryRun: bool = False):
    conn = _getConnection()
    conn.autocommit = False
    cur = conn.cursor()
    renames = [
        ("DataSource", "autoSync", "ragIndexEnabled"),
        ("DataSource", "lastSynced", "lastIndexed"),
    ]
    executed = []
    for table, oldCol, newCol in renames:
        if _columnExists(cur, table, oldCol) and not _columnExists(cur, table, newCol):
            sql = f'ALTER TABLE public."{table}" RENAME COLUMN "{oldCol}" TO "{newCol}";'
            logger.info("EXEC: %s", sql)
            if not dryRun:
                cur.execute(sql)
            executed.append(sql)
        elif _columnExists(cur, table, newCol):
            logger.info("SKIP: %s.%s already exists (migration already applied)", table, newCol)
        elif not _columnExists(cur, table, oldCol):
            logger.warning("SKIP: %s.%s does not exist (table schema may differ)", table, oldCol)
    if not dryRun and executed:
        conn.commit()
        logger.info("Migration committed (%d statements)", len(executed))
    elif dryRun and executed:
        conn.rollback()
        logger.info("DRY RUN — would execute %d statements", len(executed))
    else:
        logger.info("Nothing to do — schema already up to date")
    cur.close()
    conn.close()
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--dry-run", action="store_true", help="Print SQL without executing")
    args = parser.parse_args()
    migrate(dryRun=args.dry_run)
--- a/tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py
+++ b/tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py
@ -0,0 +1,23 @@
 # Copyright (c) 2025 Patrick Motsch
 """Unit tests for Google STT helper config (no API calls)."""
 from modules.connectors.connectorVoiceGoogle import _buildPrimarySttRecognitionFields
 def test_buildPrimaryStt_lightweight_stripsHeavyFeatures():
    d = _buildPrimarySttRecognitionFields(model="latest_short", lightweight=True)
    assert d["model"] == "latest_short"
    assert d["enable_word_time_offsets"] is False
    assert d["enable_word_confidence"] is False
    assert d["max_alternatives"] == 1
    assert d["use_enhanced"] is False
    assert d["enable_automatic_punctuation"] is True
 def test_buildPrimaryStt_full_matchesLegacyDefaults():
    d = _buildPrimarySttRecognitionFields(model="latest_long", lightweight=False)
    assert d["model"] == "latest_long"
    assert d["enable_word_time_offsets"] is True
    assert d["enable_word_confidence"] is True
    assert d["max_alternatives"] == 3
    assert d["use_enhanced"] is True
--- a/tests/unit/services/test_bootstrap_clickup.py
+++ b/tests/unit/services/test_bootstrap_clickup.py
@ -100,6 +100,9 @@ def _adapter(svc):
    return SimpleNamespace(_svc=svc)
 _DEFAULT_DS = [{"id": "ds-1", "neutralize": False}]
 def test_bootstrap_walks_team_space_lists_and_tasks():
    svc = _FakeClickupService(taskCount=2)
    knowledge = _FakeKnowledgeService()
@ -108,6 +111,7 @@ def test_bootstrap_walks_team_space_lists_and_tasks():
    async def _run():
        return await bootstrapClickup(
            connectionId="c1",
            dataSources=_DEFAULT_DS,
            adapter=_adapter(svc),
            connection=connection,
            knowledgeService=knowledge,
@ -126,10 +130,10 @@ def test_bootstrap_walks_team_space_lists_and_tasks():
        assert job.mimeType == "application/vnd.clickup.task+json"
        assert job.mandateId == "m1"
        assert job.provenance["connectionId"] == "c1"
        assert job.provenance["dataSourceId"] == "ds-1"
        assert job.provenance["authority"] == "clickup"
        assert job.provenance["teamId"] == "team-1"
        assert job.contentVersion  # numeric millisecond string
        # At least the header content-object is present.
        ids = [co["contentObjectId"] for co in job.contentObjects]
        assert "header" in ids
@ -146,6 +150,7 @@ def test_bootstrap_reports_duplicates_on_second_run():
    async def _run():
        return await bootstrapClickup(
            connectionId="c1",
            dataSources=_DEFAULT_DS,
            adapter=_adapter(svc),
            connection=connection,
            knowledgeService=knowledge,
@ -165,6 +170,7 @@ def test_bootstrap_skips_tasks_older_than_maxAgeDays():
    async def _run():
        return await bootstrapClickup(
            connectionId="c1",
            dataSources=_DEFAULT_DS,
            adapter=_adapter(svc),
            connection=connection,
            knowledgeService=knowledge,
@ -185,6 +191,7 @@ def test_bootstrap_maxTasks_caps_ingestion():
    async def _run():
        return await bootstrapClickup(
            connectionId="c1",
            dataSources=_DEFAULT_DS,
            adapter=_adapter(svc),
            connection=connection,
            knowledgeService=knowledge,
@ -195,9 +202,41 @@ def test_bootstrap_maxTasks_caps_ingestion():
    assert result["indexed"] == 3
 def test_bootstrap_skips_when_no_datasources():
    async def _run():
        return await bootstrapClickup(connectionId="c1")
    result = asyncio.run(_run())
    assert result["skipped"] is True
    assert result["reason"] == "no_datasources"
 def test_bootstrap_honours_datasource_neutralize():
    svc = _FakeClickupService(taskCount=1)
    knowledge = _FakeKnowledgeService()
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapClickup(
            connectionId="c1",
            dataSources=[{"id": "ds-n", "neutralize": True}],
            adapter=_adapter(svc),
            connection=connection,
            knowledgeService=knowledge,
            limits=ClickupBootstrapLimits(maxAgeDays=None),
        )
    asyncio.run(_run())
    for job in knowledge.calls:
        assert job.neutralize is True
        assert job.provenance["dataSourceId"] == "ds-n"
 if __name__ == "__main__":
    test_bootstrap_walks_team_space_lists_and_tasks()
    test_bootstrap_reports_duplicates_on_second_run()
    test_bootstrap_skips_tasks_older_than_maxAgeDays()
    test_bootstrap_maxTasks_caps_ingestion()
    test_bootstrap_skips_when_no_datasources()
    test_bootstrap_honours_datasource_neutralize()
    print("OK — bootstrapClickup tests passed")
--- a/tests/unit/services/test_bootstrap_gdrive.py
+++ b/tests/unit/services/test_bootstrap_gdrive.py
@ -119,6 +119,9 @@ def _fakeRunExtraction(data, name, mime, options):
    )
 _DEFAULT_DS = [{"id": "ds1", "path": "/", "neutralize": False}]
 def test_bootstrap_walks_drive_and_subfolders():
    adapter = _FakeDriveAdapter()
    knowledge = _FakeKnowledgeService()
@ -127,6 +130,7 @@ def test_bootstrap_walks_drive_and_subfolders():
    async def _run():
        return await bootstrapGdrive(
            connectionId="c1",
            dataSources=_DEFAULT_DS,
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
@ -160,6 +164,7 @@ def test_bootstrap_reports_duplicates_on_second_run():
    async def _run():
        return await bootstrapGdrive(
            connectionId="c1",
            dataSources=_DEFAULT_DS,
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
@ -180,11 +185,11 @@ def test_bootstrap_skips_files_older_than_maxAgeDays():
    async def _run():
        return await bootstrapGdrive(
            connectionId="c1",
            dataSources=[{"id": "ds1", "path": "/", "neutralize": False, "maxAgeDays": 180}],
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
            runExtractionFn=_fakeRunExtraction,
            limits=GdriveBootstrapLimits(maxAgeDays=180),
        )
    result = asyncio.run(_run())
@ -200,6 +205,7 @@ def test_bootstrap_passes_connection_provenance():
    async def _run():
        return await bootstrapGdrive(
            connectionId="c1",
            dataSources=_DEFAULT_DS,
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
@ -212,14 +218,25 @@ def test_bootstrap_passes_connection_provenance():
        assert job.sourceKind == "gdrive_item"
        assert job.mandateId == "m1"
        assert job.provenance["connectionId"] == "c1"
        assert job.provenance["dataSourceId"] == "ds1"
        assert job.provenance["authority"] == "google"
        assert job.provenance["service"] == "drive"
        assert job.contentVersion  # modifiedTime ISO string
 def test_bootstrap_skips_when_no_datasources():
    async def _run():
        return await bootstrapGdrive(connectionId="c1")
    result = asyncio.run(_run())
    assert result["skipped"] is True
    assert result["reason"] == "no_datasources"
 if __name__ == "__main__":
    test_bootstrap_walks_drive_and_subfolders()
    test_bootstrap_reports_duplicates_on_second_run()
    test_bootstrap_skips_files_older_than_maxAgeDays()
    test_bootstrap_passes_connection_provenance()
    test_bootstrap_skips_when_no_datasources()
    print("OK — bootstrapGdrive tests passed")
--- a/tests/unit/services/test_bootstrap_outlook.py
+++ b/tests/unit/services/test_bootstrap_outlook.py
@ -111,6 +111,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
    async def _run():
        return await bootstrapOutlook(
            connectionId="c1",
            dataSources=[{"id": "ds1", "neutralize": False}],
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
@ -129,6 +130,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
        assert job.sourceKind == "outlook_message"
        assert job.mimeType == "message/rfc822"
        assert job.provenance["connectionId"] == "c1"
        assert job.provenance["dataSourceId"] == "ds1"
        assert job.provenance["service"] == "outlook"
        assert job.contentVersion == "ck1"
        assert any(co["contentObjectId"] == "header" for co in job.contentObjects)
@ -146,6 +148,7 @@ def test_bootstrap_outlook_follows_pagination():
    async def _run():
        return await bootstrapOutlook(
            connectionId="c1",
            dataSources=[{"id": "ds1", "neutralize": False}],
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
@ -171,6 +174,7 @@ def test_bootstrap_outlook_reports_duplicates():
    async def _run():
        return await bootstrapOutlook(
            connectionId="c1",
            dataSources=[{"id": "ds1", "neutralize": False}],
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,