Merge pull request #161 from valueonag/feat/demo-system-readieness
Feat/demo system readieness
This commit is contained in:
commit
ab43b42aa9
56 changed files with 2180 additions and 523 deletions
3
app.py
3
app.py
|
|
@ -604,6 +604,9 @@ app.include_router(promptRouter)
|
||||||
from modules.routes.routeDataConnections import router as connectionsRouter
|
from modules.routes.routeDataConnections import router as connectionsRouter
|
||||||
app.include_router(connectionsRouter)
|
app.include_router(connectionsRouter)
|
||||||
|
|
||||||
|
from modules.routes.routeRagInventory import router as ragInventoryRouter
|
||||||
|
app.include_router(ragInventoryRouter)
|
||||||
|
|
||||||
from modules.routes.routeTableViews import router as tableViewsRouter
|
from modules.routes.routeTableViews import router as tableViewsRouter
|
||||||
app.include_router(tableViewsRouter)
|
app.include_router(tableViewsRouter)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2Z
|
||||||
APP_TOKEN_EXPIRY=300
|
APP_TOKEN_EXPIRY=300
|
||||||
|
|
||||||
# CORS Configuration
|
# CORS Configuration
|
||||||
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss
|
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
||||||
|
|
||||||
# Logging configuration
|
# Logging configuration
|
||||||
APP_LOGGING_LOG_LEVEL = DEBUG
|
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||||
|
|
@ -32,18 +32,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
|
||||||
APP_LOGGING_BACKUP_COUNT = 5
|
APP_LOGGING_BACKUP_COUNT = 5
|
||||||
|
|
||||||
# OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP.
|
# OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP.
|
||||||
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||||
Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
|
Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kxaG9WY1FJaWdCbVFVaTllUlJfU3Y3MmJkRmkzMDVDWUNtZEhlNVhISzJPcy00ZUVZcklYLXFMV0dIODV3NXNSSFBKQ0ZsZllES3diTEgySDF0T1ZCbFZHREZtcXFGSWNZN1NJbzJzczRRQWxoeVNsNzlsa0VzMHJPWHUydjBBclo=
|
||||||
Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback
|
Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback
|
||||||
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||||
Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
|
Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyUW96aXFVOVJlLUdyRlVvT1hVU09ILWtMZnV2M19mVUxGMnFPV3FzNTdQa3dTbHVGTDBHTk01ZThLcjh6QUR5VldVZUpfcDlZNTh5YldtLWtjTll6VzJNQ3JCQ3ZubHdmd2JvaExDOXdvQ1pjWDVQTUtFWVAtUHhwS1lFQnJXWk4=
|
||||||
Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback
|
Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback
|
||||||
|
|
||||||
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||||
Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
|
Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyd1hPd09vcVFtbVg0Sm5Nd1VYVEEtWjZMZkFndmFVS0ZlcTU0dzJnYVYzRkZWbjh0QldyZkhseDV2cUgxYkNHTzF6MXhqQlZ2N0UtbmhPeWRKUHBVdzV0Q1ROaWNuN2xjMmVzMjNZQ2ZYZ3dOTHgxaU5sTGRjVHpfakhYeWF0ZGU=
|
||||||
Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback
|
Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback
|
||||||
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||||
Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
|
Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kySXoyd1BmTnhOd1owTUJOWm53WlZMMjFHNGJhSUwyd2NDUW9BanlRWVJPLU5jYzRlcm5QeW96d0JYUkVWVWd2dGNBVEpJbElZY2lWb0o5S0gyNnhoV1pnNXhpSFEyaklZZjcwX2lVU0ktMEJGN01DMDhXQ3k4R1BXc1Q3ejFjOEg=
|
||||||
Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback
|
Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback
|
||||||
|
|
||||||
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||||
|
|
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug
|
||||||
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True
|
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True
|
||||||
APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync
|
APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync
|
||||||
|
|
||||||
# Manadate Pre-Processing Servers
|
|
||||||
PREPROCESS_ALTHAUS_CHAT_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGbEphQ3ZUMlFMQ2EwSGpoSE9NNzRJNTJtaGk1N0RGakdIYnVVeVFHZmF5OXB3QTVWLVNaZk9wNkhfQkZWRnVwRGRxem9iRzJIWXdpX1NIN2FwSExfT3c9PQ==
|
|
||||||
|
|
||||||
# Preprocessor API Configuration
|
|
||||||
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
|
|
||||||
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
|
|
||||||
|
|
||||||
# Azure Communication Services Email Configuration
|
# Azure Communication Services Email Configuration
|
||||||
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||||
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZ
|
||||||
APP_TOKEN_EXPIRY=300
|
APP_TOKEN_EXPIRY=300
|
||||||
|
|
||||||
# CORS Configuration
|
# CORS Configuration
|
||||||
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
||||||
|
|
||||||
# Logging configuration
|
# Logging configuration
|
||||||
APP_LOGGING_LOG_LEVEL = DEBUG
|
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||||
|
|
@ -34,18 +34,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
|
||||||
APP_LOGGING_BACKUP_COUNT = 5
|
APP_LOGGING_BACKUP_COUNT = 5
|
||||||
|
|
||||||
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
||||||
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||||
Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
|
Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kydlVubld1d1h6SUNSWW1aZ3p4X3Zod1NDTjhZVnVYS2lqOERGTFp2OXJ4TGRiNlRLVFpzLUVDTUhkZGhGUWdxa1djdEV5UWkyblN1UHZoaFBjaExNTEpGMG1PRGJEbDdHVll0Ungwcl9JemZ4ZXFzZUNFQmFlZi1DZFlCekU1S3E=
|
||||||
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/login/callback
|
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/login/callback
|
||||||
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||||
Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
|
Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyS1hWZXEzUzZTTE5MUlJncVowMU95Y0hmV1hveDBZOWdLU1RIUWt3SGlXNGxVTXVKc2QyQmtmWTlJRU43ZnRDdnlDTGxQY0hTU25CWWFFdDhUem9HU0VYcTFJTVFEbVk0dUhmVzJNVlEzNTNWdjdmaW9WeUVDVW5PRmNFZEQzNTY=
|
||||||
Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback
|
Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback
|
||||||
|
|
||||||
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||||
Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
|
Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyUTUwNXNGaHRNaGxxbF9sdWJ3Q0xLYU5yOHB4Yk8zMDZvQ29yaEhWOE5JMENXRk5jb2ZBdzRKQ2ZTTld6ZlIxemhOYzN1VE10TjBDRWZEMXlLVWRNYjZ0VG5RZ3I3NWt0SEJzMzdsUmRzcVNmbktRNHZqTUF6a2EyUkVUSFJnZFE=
|
||||||
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback
|
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback
|
||||||
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||||
Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
|
Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyV1FRVjF0c0d3d0dyWU1TdW9HdXVkdHdsVWZKYTJjbGZPRDhMRjA2M0FkaUZIVmhIUmFKNjg2ekFodHd6NG80VTI3TC1icW1LZ01jWVZuQ1pKRm5nMW5UREJEaGp2Wl9oRDRCSmZVT0JpTnkwXzgwY0pkV29yczQ5akF2d1ZGcVY=
|
||||||
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback
|
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback
|
||||||
|
|
||||||
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||||
|
|
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
|
||||||
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
||||||
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
||||||
|
|
||||||
# Manadate Pre-Processing Servers
|
|
||||||
PREPROCESS_ALTHAUS_CHAT_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4UkNBelhvckxCQUVjZm94N3BZUDcxaEMyckE2dm1lRVhqODhrWU1SUjNXZ3dQZlVJOWhveXFkZXpobW5xT0NneGZ2SkNUblFmYXd0WTBYNTl3UmRnSWc9PQ==
|
|
||||||
|
|
||||||
# Preprocessor API Configuration
|
|
||||||
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
|
|
||||||
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
|
|
||||||
|
|
||||||
# Azure Communication Services Email Configuration
|
# Azure Communication Services Email Configuration
|
||||||
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||||
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||||
|
|
|
||||||
|
|
@ -32,19 +32,19 @@ APP_LOGGING_ROTATION_SIZE = 10485760
|
||||||
APP_LOGGING_BACKUP_COUNT = 5
|
APP_LOGGING_BACKUP_COUNT = 5
|
||||||
|
|
||||||
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
||||||
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||||
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
|
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyeUZORDYxOFdlNHk1N25kV3pSQVJMUVFwLUFlMzlzQjQ1eVljOTlzX184RndsTmtTV1FjdWkyQlBiUkdCbGt5S2ltZjJxa2I2dHBMdnJqZnhFSnBCampHYjB3RG5URDM1YzZSLVd6TGdaRXRVcEdadE5zM2thNV9SZy1KZDdLSHY=
|
||||||
Service_MSFT_AUTH_REDIRECT_URI=https://api.poweron.swiss/api/msft/auth/login/callback
|
Service_MSFT_AUTH_REDIRECT_URI=https://api.poweron.swiss/api/msft/auth/login/callback
|
||||||
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||||
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
|
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySk5uMmlWczBWTE00MHBIcWlBbVJmVmc3MlBWbDA1YTFaS3psZjVLd3d1X2FvRHV0X0c5blpLV0FpY05aMTJMMzUtcG8wakF2TlM3SGQ2VjFZM3JLT1MwTlZ0bm9BRlpkbHVPQTFNaXJvazlQRzN4M2ZZNEVhV1JHV190dWluSUk=
|
||||||
Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback
|
Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback
|
||||||
|
|
||||||
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||||
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
|
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kybjVVZ0FldUE1NTJiY2U1N0I0aVU0Z2hfeWlYc2tTdmlxTS1NdGxsRnFHdjZVcW5RRHZkUFhzUTVyX2RaZHlrQThRdTdCRmVBelBOcDlsbFQyd19SZExuWEM5aTcwQ0FvY3ctMUlWU1pndDE0MkdzeTZZRHkwLWU3aW56LW1jS20=
|
||||||
Service_GOOGLE_AUTH_REDIRECT_URI =
|
Service_GOOGLE_AUTH_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/login/callback
|
||||||
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||||
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
|
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyMnFma3VPOVJtTFFrNDRLN0NkWHY2dUZDWlJzdDVMd3p3N19IY0tWdURRRzExOGZCMjJOYmpKT1E0cTVwYlgtcVJINTY0anZPc1VoTW00cHl6NVh3ZHVTek1oT1RqWUhtamRkZ1dENWlwNTlZSU1oNWczeGdEOC1Gbk5XU2RBcmI=
|
||||||
Service_GOOGLE_DATA_REDIRECT_URI =
|
Service_GOOGLE_DATA_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/connect/callback
|
||||||
|
|
||||||
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||||
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
||||||
|
|
@ -86,13 +86,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
|
||||||
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
||||||
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
||||||
|
|
||||||
# Manadate Pre-Processing Servers
|
|
||||||
PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
|
|
||||||
|
|
||||||
# Preprocessor API Configuration
|
|
||||||
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
|
|
||||||
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
|
|
||||||
|
|
||||||
# Azure Communication Services Email Configuration
|
# Azure Communication Services Email Configuration
|
||||||
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||||
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUl
|
||||||
APP_TOKEN_EXPIRY=300
|
APP_TOKEN_EXPIRY=300
|
||||||
|
|
||||||
# CORS Configuration
|
# CORS Configuration
|
||||||
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
||||||
|
|
||||||
# Logging configuration
|
# Logging configuration
|
||||||
APP_LOGGING_LOG_LEVEL = DEBUG
|
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||||
|
|
@ -33,18 +33,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
|
||||||
APP_LOGGING_BACKUP_COUNT = 5
|
APP_LOGGING_BACKUP_COUNT = 5
|
||||||
|
|
||||||
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
||||||
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||||
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
|
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySFR2NjBKM084QTNpeUlyUmM4R0N0SU1BZ2x4MmVTZTVHQkVzRE9GdmFkV041MzhudFhobjU0RWNnd3lqeXpKUXA5aGtNZkhtYU12QjBtX0NjemVmdEZBdC1TbXVBSXJTcF9vMlJXd0ZNRTRKRFBMUXNjTF85eTBxakR4RVNfYmU=
|
||||||
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/login/callback
|
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/login/callback
|
||||||
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
|
||||||
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
|
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyNVU4cVRIZFdjS3l2S1RJVTVlc1ozQ1liZXZDX1VwdFZQUzFtS0N6UWYyeGxkNGNmY1hoaWxEUDBXVU5QR2t3Vi1ZV1A2QkxqbnpobzJwOXdzYTBZaFZYdnNkeDE1VVl0bm4weHFiLXdON2gtZzAwMTkxNWRoZldFM2djSkNHVS0=
|
||||||
Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback
|
Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback
|
||||||
|
|
||||||
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||||
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
|
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyUmJleVpTOF9OaFV3NGVfcWVBX2oxSjUwMWRGOFZRWFRIN1FZRzZ6U3VQMlg5a21RY1drTHh3U254LW4zM1A1cXQ1TTFWYlNoek9hSHJIeE4tbm1wU1lKRXlKNU5HVWI4VGZwTVE0VnJGaV8wZmNvdkVrMjJGeXdmZ3UyNmVXN1E=
|
||||||
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback
|
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback
|
||||||
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
|
||||||
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
|
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyY2pxMDh0U0RqWERianBMTTNtSUZPSzhKUzh4S0RTenR2MmxnRDlvQzJjbDVTczRWLUJtVnhxWTE2MmUxQjJia2xJcVUzVlFlUnpma040NFdHRzVNRUt0OXR0c2JkTkRmQ1RIYllXbXFFaExIQWNycFVHbUxHbmtYOVhOVUV2MFY=
|
||||||
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback
|
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback
|
||||||
|
|
||||||
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||||
|
|
@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
|
||||||
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
||||||
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
||||||
|
|
||||||
# Manadate Pre-Processing Servers
|
|
||||||
PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
|
|
||||||
|
|
||||||
# Preprocessor API Configuration
|
|
||||||
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
|
|
||||||
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
|
|
||||||
|
|
||||||
# Azure Communication Services Email Configuration
|
# Azure Communication Services Email Configuration
|
||||||
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||||
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,30 @@ from modules.shared.voiceCatalog import getDefaultVoice as _catalogDefaultVoice
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _buildPrimarySttRecognitionFields(
|
||||||
|
*,
|
||||||
|
model: str,
|
||||||
|
lightweight: bool,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Shared fields for batch + streaming primary RecognitionConfig."""
|
||||||
|
base: Dict[str, Any] = {
|
||||||
|
"enable_automatic_punctuation": True,
|
||||||
|
"model": model,
|
||||||
|
}
|
||||||
|
if lightweight:
|
||||||
|
base["enable_word_time_offsets"] = False
|
||||||
|
base["enable_word_confidence"] = False
|
||||||
|
base["max_alternatives"] = 1
|
||||||
|
base["use_enhanced"] = False
|
||||||
|
else:
|
||||||
|
base["enable_word_time_offsets"] = True
|
||||||
|
base["enable_word_confidence"] = True
|
||||||
|
base["max_alternatives"] = 3
|
||||||
|
base["use_enhanced"] = True
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
# Gemini-TTS speaker IDs from voices.list use short names (e.g. "Kore") and require
|
# Gemini-TTS speaker IDs from voices.list use short names (e.g. "Kore") and require
|
||||||
# SynthesisInput.prompt + VoiceSelectionParams.model_name (google-cloud-texttospeech >= 2.24.0).
|
# SynthesisInput.prompt + VoiceSelectionParams.model_name (google-cloud-texttospeech >= 2.24.0).
|
||||||
_GEMINI_TTS_DEFAULT_MODEL = "gemini-2.5-flash-tts"
|
_GEMINI_TTS_DEFAULT_MODEL = "gemini-2.5-flash-tts"
|
||||||
|
|
@ -73,7 +97,10 @@ class ConnectorGoogleSpeech:
|
||||||
sampleRate: int = None, channels: int = None,
|
sampleRate: int = None, channels: int = None,
|
||||||
skipFallbacks: bool = False,
|
skipFallbacks: bool = False,
|
||||||
phraseHints: Optional[list] = None,
|
phraseHints: Optional[list] = None,
|
||||||
alternativeLanguages: Optional[list] = None) -> Dict:
|
alternativeLanguages: Optional[list] = None,
|
||||||
|
model: str = "latest_long",
|
||||||
|
lightweight: bool = False,
|
||||||
|
audioFormat: Optional[str] = None) -> Dict:
|
||||||
"""
|
"""
|
||||||
Convert speech to text using Google Cloud Speech-to-Text API.
|
Convert speech to text using Google Cloud Speech-to-Text API.
|
||||||
|
|
||||||
|
|
@ -82,6 +109,9 @@ class ConnectorGoogleSpeech:
|
||||||
language: Language code (e.g., 'de-DE', 'en-US')
|
language: Language code (e.g., 'de-DE', 'en-US')
|
||||||
sample_rate: Audio sample rate (auto-detected if None)
|
sample_rate: Audio sample rate (auto-detected if None)
|
||||||
channels: Number of audio channels (auto-detected if None)
|
channels: Number of audio channels (auto-detected if None)
|
||||||
|
model: Google recognition model (e.g. latest_long, latest_short)
|
||||||
|
lightweight: If True, omit word timings/confidence, single alternative, no enhanced model
|
||||||
|
audioFormat: If set (webm_opus, linear16, mp3, flac, wav), skip auto-detection
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict containing transcribed text, confidence, and metadata
|
Dict containing transcribed text, confidence, and metadata
|
||||||
|
|
@ -92,8 +122,24 @@ class ConnectorGoogleSpeech:
|
||||||
logger.warning(f"Invalid sampleRate={sampleRate}, treating as unknown for auto-detection")
|
logger.warning(f"Invalid sampleRate={sampleRate}, treating as unknown for auto-detection")
|
||||||
sampleRate = None
|
sampleRate = None
|
||||||
|
|
||||||
# Auto-detect audio format if not provided
|
explicitFormat = (audioFormat or "").strip().lower() or None
|
||||||
if sampleRate is None or channels is None:
|
if explicitFormat:
|
||||||
|
if channels is None:
|
||||||
|
channels = 1
|
||||||
|
if sampleRate is None:
|
||||||
|
if explicitFormat == "webm_opus":
|
||||||
|
sampleRate = 48000
|
||||||
|
elif explicitFormat == "linear16":
|
||||||
|
sampleRate = 16000
|
||||||
|
elif explicitFormat in ("mp3", "flac"):
|
||||||
|
sampleRate = 44100
|
||||||
|
elif explicitFormat == "wav":
|
||||||
|
sampleRate = 16000
|
||||||
|
else:
|
||||||
|
sampleRate = 16000
|
||||||
|
audioFormat = explicitFormat
|
||||||
|
logger.info(f"STT explicit format: {audioFormat}, {sampleRate}Hz, {channels}ch")
|
||||||
|
elif sampleRate is None or channels is None:
|
||||||
validation = self.validateAudioFormat(audioContent)
|
validation = self.validateAudioFormat(audioContent)
|
||||||
if not validation["valid"]:
|
if not validation["valid"]:
|
||||||
return {
|
return {
|
||||||
|
|
@ -156,12 +202,7 @@ class ConnectorGoogleSpeech:
|
||||||
"encoding": encoding,
|
"encoding": encoding,
|
||||||
"audio_channel_count": channels,
|
"audio_channel_count": channels,
|
||||||
"language_code": language,
|
"language_code": language,
|
||||||
"enable_automatic_punctuation": True,
|
**_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
|
||||||
"model": "latest_long",
|
|
||||||
"enable_word_time_offsets": True,
|
|
||||||
"enable_word_confidence": True,
|
|
||||||
"max_alternatives": 3,
|
|
||||||
"use_enhanced": True,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if phraseHints:
|
if phraseHints:
|
||||||
|
|
@ -205,8 +246,7 @@ class ConnectorGoogleSpeech:
|
||||||
sample_rate_hertz=16000,
|
sample_rate_hertz=16000,
|
||||||
audio_channel_count=1,
|
audio_channel_count=1,
|
||||||
language_code=language,
|
language_code=language,
|
||||||
enable_automatic_punctuation=True,
|
**_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
|
||||||
model="latest_long"
|
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
response = await asyncio.to_thread(
|
response = await asyncio.to_thread(
|
||||||
|
|
@ -343,7 +383,7 @@ class ConnectorGoogleSpeech:
|
||||||
"error": "No recognition results (silence or unclear audio)"
|
"error": "No recognition results (silence or unclear audio)"
|
||||||
}
|
}
|
||||||
|
|
||||||
models = ["latest_long", "phone_call", "latest_short"]
|
models = list(dict.fromkeys([model, "latest_long", "phone_call", "latest_short"]))
|
||||||
|
|
||||||
for fallback_config in fallback_configs:
|
for fallback_config in fallback_configs:
|
||||||
for model in models:
|
for model in models:
|
||||||
|
|
@ -419,6 +459,9 @@ class ConnectorGoogleSpeech:
|
||||||
audioQueue: asyncio.Queue,
|
audioQueue: asyncio.Queue,
|
||||||
language: str = "de-DE",
|
language: str = "de-DE",
|
||||||
phraseHints: Optional[list] = None,
|
phraseHints: Optional[list] = None,
|
||||||
|
model: str = "latest_long",
|
||||||
|
lightweight: bool = False,
|
||||||
|
singleUtterance: bool = False,
|
||||||
) -> AsyncGenerator[Dict[str, Any], None]:
|
) -> AsyncGenerator[Dict[str, Any], None]:
|
||||||
"""
|
"""
|
||||||
Stream audio chunks to Google Cloud Speech-to-Text Streaming API.
|
Stream audio chunks to Google Cloud Speech-to-Text Streaming API.
|
||||||
|
|
@ -429,9 +472,13 @@ class ConnectorGoogleSpeech:
|
||||||
Send (b"", True) to signal end of stream.
|
Send (b"", True) to signal end of stream.
|
||||||
language: Language code
|
language: Language code
|
||||||
phraseHints: Optional boost phrases
|
phraseHints: Optional boost phrases
|
||||||
|
model: Google recognition model (e.g. latest_long, latest_short)
|
||||||
|
lightweight: If True, use non-enhanced primary config (lower latency)
|
||||||
|
singleUtterance: If True, end stream after first utterance (client should reconnect)
|
||||||
|
|
||||||
Yields:
|
Yields:
|
||||||
Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec
|
Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec;
|
||||||
|
optionally endOfSingleUtterance, reconnectRequired
|
||||||
"""
|
"""
|
||||||
STREAM_LIMIT_SEC = 290
|
STREAM_LIMIT_SEC = 290
|
||||||
streamStartTs = time.time()
|
streamStartTs = time.time()
|
||||||
|
|
@ -442,9 +489,7 @@ class ConnectorGoogleSpeech:
|
||||||
"sample_rate_hertz": 48000,
|
"sample_rate_hertz": 48000,
|
||||||
"audio_channel_count": 1,
|
"audio_channel_count": 1,
|
||||||
"language_code": language,
|
"language_code": language,
|
||||||
"enable_automatic_punctuation": True,
|
**_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
|
||||||
"model": "latest_long",
|
|
||||||
"use_enhanced": True,
|
|
||||||
}
|
}
|
||||||
if phraseHints:
|
if phraseHints:
|
||||||
configParams["speech_contexts"] = [speech.SpeechContext(phrases=phraseHints, boost=15.0)]
|
configParams["speech_contexts"] = [speech.SpeechContext(phrases=phraseHints, boost=15.0)]
|
||||||
|
|
@ -453,7 +498,7 @@ class ConnectorGoogleSpeech:
|
||||||
streamingConfig = speech.StreamingRecognitionConfig(
|
streamingConfig = speech.StreamingRecognitionConfig(
|
||||||
config=recognitionConfig,
|
config=recognitionConfig,
|
||||||
interim_results=True,
|
interim_results=True,
|
||||||
single_utterance=False,
|
single_utterance=singleUtterance,
|
||||||
)
|
)
|
||||||
|
|
||||||
import queue as threadQueue
|
import queue as threadQueue
|
||||||
|
|
@ -490,7 +535,22 @@ class ConnectorGoogleSpeech:
|
||||||
)
|
)
|
||||||
for response in responseStream:
|
for response in responseStream:
|
||||||
elapsed = time.time() - streamStartTs
|
elapsed = time.time() - streamStartTs
|
||||||
estimatedDurationSec = totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0
|
|
||||||
|
durationFromResults = 0.0
|
||||||
|
for result in response.results:
|
||||||
|
rt = getattr(result, "result_end_time", None)
|
||||||
|
if rt is None:
|
||||||
|
continue
|
||||||
|
if hasattr(rt, "total_seconds"):
|
||||||
|
durationFromResults = max(durationFromResults, float(rt.total_seconds()))
|
||||||
|
else:
|
||||||
|
durationFromResults = max(
|
||||||
|
durationFromResults,
|
||||||
|
float(getattr(rt, "seconds", 0)) + float(getattr(rt, "nanos", 0)) * 1e-9,
|
||||||
|
)
|
||||||
|
estimatedDurationSec = durationFromResults if durationFromResults > 0 else (
|
||||||
|
totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0.0
|
||||||
|
)
|
||||||
|
|
||||||
finalTexts = []
|
finalTexts = []
|
||||||
interimTexts = []
|
interimTexts = []
|
||||||
|
|
@ -524,6 +584,13 @@ class ConnectorGoogleSpeech:
|
||||||
"stabilityScore": 0.0,
|
"stabilityScore": 0.0,
|
||||||
"audioDurationSec": estimatedDurationSec,
|
"audioDurationSec": estimatedDurationSec,
|
||||||
}), loop)
|
}), loop)
|
||||||
|
|
||||||
|
speechEvt = getattr(response, "speech_event_type", None)
|
||||||
|
if speechEvt and "END_OF_SINGLE_UTTERANCE" in str(speechEvt):
|
||||||
|
asyncio.run_coroutine_threadsafe(resultOutQ.put({
|
||||||
|
"endOfSingleUtterance": True,
|
||||||
|
"audioDurationSec": estimatedDurationSec,
|
||||||
|
}), loop)
|
||||||
if elapsed >= STREAM_LIMIT_SEC:
|
if elapsed >= STREAM_LIMIT_SEC:
|
||||||
logger.info("Streaming STT approaching 5-min limit, client should reconnect")
|
logger.info("Streaming STT approaching 5-min limit, client should reconnect")
|
||||||
asyncio.run_coroutine_threadsafe(resultOutQ.put({
|
asyncio.run_coroutine_threadsafe(resultOutQ.put({
|
||||||
|
|
|
||||||
|
|
@ -62,15 +62,15 @@ class DataSource(PowerOnModel):
|
||||||
description="Owner user ID",
|
description="Owner user ID",
|
||||||
json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "UserInDB", "labelField": "username"}},
|
json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "UserInDB", "labelField": "username"}},
|
||||||
)
|
)
|
||||||
autoSync: bool = Field(
|
ragIndexEnabled: bool = Field(
|
||||||
default=False,
|
default=False,
|
||||||
description="Automatically sync on schedule",
|
description="When true this tree element is indexed into the RAG knowledge store",
|
||||||
json_schema_extra={"label": "Auto-Sync"},
|
json_schema_extra={"label": "Im RAG indexieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||||
)
|
)
|
||||||
lastSynced: Optional[float] = Field(
|
lastIndexed: Optional[float] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Last sync timestamp",
|
description="Timestamp of last successful RAG indexing run",
|
||||||
json_schema_extra={"label": "Letzter Sync", "frontend_type": "timestamp"},
|
json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"},
|
||||||
)
|
)
|
||||||
scope: str = Field(
|
scope: str = Field(
|
||||||
default="personal",
|
default="personal",
|
||||||
|
|
|
||||||
|
|
@ -484,10 +484,10 @@ class UserConnection(PowerOnModel):
|
||||||
default=None,
|
default=None,
|
||||||
description=(
|
description=(
|
||||||
"Per-connection knowledge ingestion preferences. schemaVersion=1 keys: "
|
"Per-connection knowledge ingestion preferences. schemaVersion=1 keys: "
|
||||||
"neutralizeBeforeEmbed (bool), mailContentDepth (metadata|snippet|full), "
|
"mailContentDepth (metadata|snippet|full), mailIndexAttachments (bool), "
|
||||||
"mailIndexAttachments (bool), filesIndexBinaries (bool), mimeAllowlist (list[str]), "
|
"filesIndexBinaries (bool), clickupScope (titles|title_description|with_comments), "
|
||||||
"clickupScope (titles|title_description|with_comments), "
|
"clickupIndexAttachments (bool), maxAgeDays (int). "
|
||||||
"surfaceToggles (dict per authority), maxAgeDays (int)."
|
"Neutralization is controlled per DataSource.neutralize (not here)."
|
||||||
),
|
),
|
||||||
json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False, "label": "Wissenspräferenzen"},
|
json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False, "label": "Wissenspräferenzen"},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1080,6 +1080,8 @@ class CommcoachService:
|
||||||
audioContent=audioContent,
|
audioContent=audioContent,
|
||||||
language=language,
|
language=language,
|
||||||
skipFallbacks=True,
|
skipFallbacks=True,
|
||||||
|
model="latest_short",
|
||||||
|
lightweight=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
transcribedText = ""
|
transcribedText = ""
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,8 @@ class BrowserBotConnector:
|
||||||
botAccountPassword: Optional[str] = None,
|
botAccountPassword: Optional[str] = None,
|
||||||
transferMode: str = "auto",
|
transferMode: str = "auto",
|
||||||
debugMode: bool = False,
|
debugMode: bool = False,
|
||||||
|
avatarMediaData: Optional[str] = None,
|
||||||
|
avatarMediaType: Optional[str] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Send join command to the Browser Bot service.
|
Send join command to the Browser Bot service.
|
||||||
|
|
@ -79,12 +81,16 @@ class BrowserBotConnector:
|
||||||
"debugMode": debugMode,
|
"debugMode": debugMode,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add authenticated join credentials if configured
|
|
||||||
if botAccountEmail and botAccountPassword:
|
if botAccountEmail and botAccountPassword:
|
||||||
payload["botAccountEmail"] = botAccountEmail
|
payload["botAccountEmail"] = botAccountEmail
|
||||||
payload["botAccountPassword"] = botAccountPassword
|
payload["botAccountPassword"] = botAccountPassword
|
||||||
logger.info(f"Bot will join authenticated as {botAccountEmail}")
|
logger.info(f"Bot will join authenticated as {botAccountEmail}")
|
||||||
|
|
||||||
|
if avatarMediaData and avatarMediaType:
|
||||||
|
payload["avatarMediaData"] = avatarMediaData
|
||||||
|
payload["avatarMediaType"] = avatarMediaType
|
||||||
|
logger.info(f"Avatar media attached: {avatarMediaType}, {len(avatarMediaData)} chars")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async with aiohttp.ClientSession(timeout=_BOT_TIMEOUT) as session:
|
async with aiohttp.ClientSession(timeout=_BOT_TIMEOUT) as session:
|
||||||
async with session.post(f"{self.botUrl}/api/bot", json=payload) as resp:
|
async with session.post(f"{self.botUrl}/api/bot", json=payload) as resp:
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,18 @@ class TeamsbotMeetingModule(PowerOnModel):
|
||||||
defaultDirectorPrompts: Optional[str] = Field(default=None, description="JSON list of default director prompts")
|
defaultDirectorPrompts: Optional[str] = Field(default=None, description="JSON list of default director prompts")
|
||||||
goals: Optional[str] = Field(default=None, description="Free-text goals")
|
goals: Optional[str] = Field(default=None, description="Free-text goals")
|
||||||
kpiTargets: Optional[str] = Field(default=None, description="JSON object with structured KPI targets")
|
kpiTargets: Optional[str] = Field(default=None, description="JSON object with structured KPI targets")
|
||||||
|
defaultMeetingLink: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Default Teams meeting URL for new sessions in this module (user can override)",
|
||||||
|
)
|
||||||
|
defaultBotName: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Default display name for the bot when starting a session from this module",
|
||||||
|
)
|
||||||
|
defaultAvatarFileId: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="FileItem ID for the default avatar image/video shown in the meeting",
|
||||||
|
)
|
||||||
status: TeamsbotModuleStatus = Field(default=TeamsbotModuleStatus.ACTIVE)
|
status: TeamsbotModuleStatus = Field(default=TeamsbotModuleStatus.ACTIVE)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -217,6 +229,7 @@ class TeamsbotUserSettings(PowerOnModel):
|
||||||
triggerCooldownSeconds: Optional[int] = Field(default=None, description="Trigger cooldown override")
|
triggerCooldownSeconds: Optional[int] = Field(default=None, description="Trigger cooldown override")
|
||||||
contextWindowSegments: Optional[int] = Field(default=None, description="Context window override")
|
contextWindowSegments: Optional[int] = Field(default=None, description="Context window override")
|
||||||
debugMode: Optional[bool] = Field(default=None, description="Debug mode override")
|
debugMode: Optional[bool] = Field(default=None, description="Debug mode override")
|
||||||
|
avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video override")
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
@ -240,6 +253,7 @@ class TeamsbotConfig(BaseModel):
|
||||||
triggerCooldownSeconds: int = Field(default=3, ge=1, le=30, description="Minimum seconds between AI calls")
|
triggerCooldownSeconds: int = Field(default=3, ge=1, le=30, description="Minimum seconds between AI calls")
|
||||||
contextWindowSegments: int = Field(default=20, ge=5, le=100, description="Number of transcript segments to include in AI context")
|
contextWindowSegments: int = Field(default=20, ge=5, le=100, description="Number of transcript segments to include in AI context")
|
||||||
debugMode: bool = Field(default=False, description="Enable debug mode: screenshots at every join step for diagnostics")
|
debugMode: bool = Field(default=False, description="Enable debug mode: screenshots at every join step for diagnostics")
|
||||||
|
avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video shown in the meeting")
|
||||||
|
|
||||||
def _getEffectiveBrowserBotUrl(self) -> Optional[str]:
|
def _getEffectiveBrowserBotUrl(self) -> Optional[str]:
|
||||||
"""Resolve the effective browser bot URL: per-instance config takes priority, then env variable."""
|
"""Resolve the effective browser bot URL: per-instance config takes priority, then env variable."""
|
||||||
|
|
@ -257,6 +271,7 @@ class TeamsbotStartSessionRequest(BaseModel):
|
||||||
"""Request to start a new Teams Bot session."""
|
"""Request to start a new Teams Bot session."""
|
||||||
meetingLink: str = Field(description="Teams meeting join link (e.g., https://teams.microsoft.com/l/meetup-join/...)")
|
meetingLink: str = Field(description="Teams meeting join link (e.g., https://teams.microsoft.com/l/meetup-join/...)")
|
||||||
botName: Optional[str] = Field(default=None, description="Override bot name for this session")
|
botName: Optional[str] = Field(default=None, description="Override bot name for this session")
|
||||||
|
moduleId: Optional[str] = Field(default=None, description="Optional MeetingModule to attach this session to")
|
||||||
connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access")
|
connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access")
|
||||||
joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.")
|
joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.")
|
||||||
sessionContext: Optional[str] = Field(default=None, description="Custom context/knowledge to provide to the bot for this session (e.g. meeting agenda, documents, background info)")
|
sessionContext: Optional[str] = Field(default=None, description="Custom context/knowledge to provide to the bot for this session (e.g. meeting agenda, documents, background info)")
|
||||||
|
|
@ -277,6 +292,9 @@ class CreateMeetingModuleRequest(BaseModel):
|
||||||
defaultDirectorPrompts: Optional[str] = None
|
defaultDirectorPrompts: Optional[str] = None
|
||||||
goals: Optional[str] = None
|
goals: Optional[str] = None
|
||||||
kpiTargets: Optional[str] = None
|
kpiTargets: Optional[str] = None
|
||||||
|
defaultMeetingLink: Optional[str] = None
|
||||||
|
defaultBotName: Optional[str] = None
|
||||||
|
defaultAvatarFileId: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class UpdateMeetingModuleRequest(BaseModel):
|
class UpdateMeetingModuleRequest(BaseModel):
|
||||||
|
|
@ -287,6 +305,9 @@ class UpdateMeetingModuleRequest(BaseModel):
|
||||||
defaultDirectorPrompts: Optional[str] = None
|
defaultDirectorPrompts: Optional[str] = None
|
||||||
goals: Optional[str] = None
|
goals: Optional[str] = None
|
||||||
kpiTargets: Optional[str] = None
|
kpiTargets: Optional[str] = None
|
||||||
|
defaultMeetingLink: Optional[str] = None
|
||||||
|
defaultBotName: Optional[str] = None
|
||||||
|
defaultAvatarFileId: Optional[str] = None
|
||||||
status: Optional[TeamsbotModuleStatus] = None
|
status: Optional[TeamsbotModuleStatus] = None
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -304,6 +325,7 @@ class TeamsbotConfigUpdateRequest(BaseModel):
|
||||||
triggerCooldownSeconds: Optional[int] = None
|
triggerCooldownSeconds: Optional[int] = None
|
||||||
contextWindowSegments: Optional[int] = None
|
contextWindowSegments: Optional[int] = None
|
||||||
debugMode: Optional[bool] = None
|
debugMode: Optional[bool] = None
|
||||||
|
avatarFileId: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ from .datamodelTeamsbot import (
|
||||||
TeamsbotDirectorPromptStatus,
|
TeamsbotDirectorPromptStatus,
|
||||||
TeamsbotDirectorPromptMode,
|
TeamsbotDirectorPromptMode,
|
||||||
TeamsbotMeetingModule,
|
TeamsbotMeetingModule,
|
||||||
|
TeamsbotModuleStatus,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -338,6 +339,8 @@ class TeamsbotObjects:
|
||||||
def getModules(self, instanceId: str) -> List[Dict[str, Any]]:
|
def getModules(self, instanceId: str) -> List[Dict[str, Any]]:
|
||||||
"""Get all meeting modules for a feature instance."""
|
"""Get all meeting modules for a feature instance."""
|
||||||
records = self.db.getRecordset(TeamsbotMeetingModule, recordFilter={"instanceId": instanceId})
|
records = self.db.getRecordset(TeamsbotMeetingModule, recordFilter={"instanceId": instanceId})
|
||||||
|
for r in records:
|
||||||
|
r.setdefault("status", TeamsbotModuleStatus.ACTIVE.value)
|
||||||
records.sort(key=lambda r: r.get("sysCreatedAt") or "", reverse=True)
|
records.sort(key=lambda r: r.get("sysCreatedAt") or "", reverse=True)
|
||||||
return records
|
return records
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -290,6 +290,19 @@ def _runMigrations():
|
||||||
|
|
||||||
migrated = False
|
migrated = False
|
||||||
|
|
||||||
|
# M2: MeetingModule default meeting link / bot name (additive columns)
|
||||||
|
if _tableExists("TeamsbotMeetingModule"):
|
||||||
|
for col, sqlType in (
|
||||||
|
("defaultMeetingLink", "TEXT"),
|
||||||
|
("defaultBotName", "TEXT"),
|
||||||
|
):
|
||||||
|
if not _columnExists("TeamsbotMeetingModule", col):
|
||||||
|
cur.execute(
|
||||||
|
f'ALTER TABLE "TeamsbotMeetingModule" ADD COLUMN "{col}" {sqlType} NULL',
|
||||||
|
)
|
||||||
|
logger.info(f"Migration M2: Added TeamsbotMeetingModule.{col}")
|
||||||
|
migrated = True
|
||||||
|
|
||||||
# M1: Create default Adhoc modules for orphaned sessions
|
# M1: Create default Adhoc modules for orphaned sessions
|
||||||
# (only runs if TeamsbotSession table exists with moduleId column
|
# (only runs if TeamsbotSession table exists with moduleId column
|
||||||
# and there are sessions without a moduleId)
|
# and there are sessions without a moduleId)
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,7 @@ from .datamodelTeamsbot import (
|
||||||
TeamsbotDirectorPromptMode,
|
TeamsbotDirectorPromptMode,
|
||||||
TeamsbotDirectorPromptStatus,
|
TeamsbotDirectorPromptStatus,
|
||||||
TeamsbotMeetingModule,
|
TeamsbotMeetingModule,
|
||||||
|
TeamsbotModuleStatus,
|
||||||
CreateMeetingModuleRequest,
|
CreateMeetingModuleRequest,
|
||||||
UpdateMeetingModuleRequest,
|
UpdateMeetingModuleRequest,
|
||||||
DIRECTOR_PROMPT_FILE_LIMIT,
|
DIRECTOR_PROMPT_FILE_LIMIT,
|
||||||
|
|
@ -203,6 +204,7 @@ async def createModule(
|
||||||
data["instanceId"] = instanceId
|
data["instanceId"] = instanceId
|
||||||
data["mandateId"] = mandateId
|
data["mandateId"] = mandateId
|
||||||
data["ownerUserId"] = str(context.user.id)
|
data["ownerUserId"] = str(context.user.id)
|
||||||
|
data.setdefault("status", TeamsbotModuleStatus.ACTIVE.value)
|
||||||
module = interface.createModule(data)
|
module = interface.createModule(data)
|
||||||
return {"module": module}
|
return {"module": module}
|
||||||
|
|
||||||
|
|
@ -281,6 +283,11 @@ async def startSession(
|
||||||
interface = _getInterface(context, instanceId)
|
interface = _getInterface(context, instanceId)
|
||||||
config = _getInstanceConfig(instanceId)
|
config = _getInstanceConfig(instanceId)
|
||||||
|
|
||||||
|
if body.moduleId:
|
||||||
|
mod = interface.getModule(body.moduleId)
|
||||||
|
if not mod or str(mod.get("instanceId") or "") != str(instanceId):
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid moduleId for this instance")
|
||||||
|
|
||||||
# Extract and validate meeting URL from user input (handles SafeLinks, invitation text, etc.)
|
# Extract and validate meeting URL from user input (handles SafeLinks, invitation text, etc.)
|
||||||
cleanMeetingUrl = _extractTeamsMeetingUrl(body.meetingLink)
|
cleanMeetingUrl = _extractTeamsMeetingUrl(body.meetingLink)
|
||||||
|
|
||||||
|
|
@ -288,6 +295,7 @@ async def startSession(
|
||||||
sessionData = TeamsbotSession(
|
sessionData = TeamsbotSession(
|
||||||
instanceId=instanceId,
|
instanceId=instanceId,
|
||||||
mandateId=mandateId,
|
mandateId=mandateId,
|
||||||
|
moduleId=body.moduleId,
|
||||||
meetingLink=cleanMeetingUrl,
|
meetingLink=cleanMeetingUrl,
|
||||||
botName=body.botName or config.botName,
|
botName=body.botName or config.botName,
|
||||||
sessionContext=body.sessionContext,
|
sessionContext=body.sessionContext,
|
||||||
|
|
@ -426,6 +434,54 @@ async def listSessions(
|
||||||
return {"sessions": sessions}
|
return {"sessions": sessions}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{instanceId}/dashboard/stream")
|
||||||
|
@limiter.limit("60/minute")
|
||||||
|
async def streamDashboard(
|
||||||
|
request: Request,
|
||||||
|
instanceId: str,
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
SSE channel for the Teamsbot dashboard: repeated snapshots of sessions and meeting modules.
|
||||||
|
Push interval: 3s while any own session is pending/joining/active, otherwise 20s.
|
||||||
|
Same session visibility rules as GET /sessions (own sessions unless platform admin).
|
||||||
|
"""
|
||||||
|
_validateInstanceAccess(instanceId, context)
|
||||||
|
interface = _getInterface(context, instanceId)
|
||||||
|
userId = None if context.isPlatformAdmin else str(context.user.id)
|
||||||
|
activeStatuses = {
|
||||||
|
TeamsbotSessionStatus.PENDING.value,
|
||||||
|
TeamsbotSessionStatus.JOINING.value,
|
||||||
|
TeamsbotSessionStatus.ACTIVE.value,
|
||||||
|
}
|
||||||
|
|
||||||
|
async def eventGenerator():
|
||||||
|
while True:
|
||||||
|
sessionRows = []
|
||||||
|
try:
|
||||||
|
sessionRows = interface.getSessions(instanceId, includeEnded=True, userId=userId)
|
||||||
|
moduleRows = interface.getModules(instanceId)
|
||||||
|
payload = {"type": "dashboardState", "sessions": sessionRows, "modules": moduleRows}
|
||||||
|
yield f"data: {json.dumps(payload, default=str)}\n\n"
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
except Exception as ex:
|
||||||
|
logger.warning("dashboard stream tick failed: %s", ex)
|
||||||
|
yield f"data: {json.dumps({'type': 'error', 'message': 'dashboard_tick_failed'})}\n\n"
|
||||||
|
hasActive = any((s.get("status") in activeStatuses) for s in sessionRows)
|
||||||
|
await asyncio.sleep(3.0 if hasActive else 20.0)
|
||||||
|
|
||||||
|
return StreamingResponse(
|
||||||
|
eventGenerator(),
|
||||||
|
media_type="text/event-stream",
|
||||||
|
headers={
|
||||||
|
"Cache-Control": "no-cache",
|
||||||
|
"Connection": "keep-alive",
|
||||||
|
"X-Accel-Buffering": "no",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{instanceId}/sessions/{sessionId}")
|
@router.get("/{instanceId}/sessions/{sessionId}")
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit("30/minute")
|
||||||
async def getSession(
|
async def getSession(
|
||||||
|
|
@ -634,12 +690,10 @@ def _getEffectiveConfig(instanceId: str, userId: str, interface) -> TeamsbotConf
|
||||||
if not userSettings:
|
if not userSettings:
|
||||||
return baseConfig
|
return baseConfig
|
||||||
|
|
||||||
# Merge: user settings override instance defaults (only non-None values)
|
# Merge: user settings override instance defaults (only non-None values).
|
||||||
|
# Derive mergeable fields from TeamsbotConfig so new fields are picked up automatically.
|
||||||
overrides = {}
|
overrides = {}
|
||||||
for field in ["botName", "aiSystemPrompt", "responseMode",
|
for field in TeamsbotConfig.model_fields:
|
||||||
"responseChannel", "transferMode", "language", "voiceId",
|
|
||||||
"triggerIntervalSeconds", "triggerCooldownSeconds", "contextWindowSegments",
|
|
||||||
"debugMode"]:
|
|
||||||
value = userSettings.get(field)
|
value = userSettings.get(field)
|
||||||
if value is not None:
|
if value is not None:
|
||||||
overrides[field] = value
|
overrides[field] = value
|
||||||
|
|
|
||||||
|
|
@ -83,10 +83,10 @@ _EPHEMERAL_PHRASE_INTENTS: Dict[str, str] = {
|
||||||
),
|
),
|
||||||
"agentRound": (
|
"agentRound": (
|
||||||
"One short sentence (max ~14 words) the assistant says BETWEEN rounds "
|
"One short sentence (max ~14 words) the assistant says BETWEEN rounds "
|
||||||
"of a longer agent task to signal that work is still in progress. "
|
"of a longer agent task to update the audience on what it is doing. "
|
||||||
"Include the placeholder tokens '{round}' and '{maxRounds}' so the "
|
"Include the placeholder token '{activity}' which will be filled with "
|
||||||
"caller can substitute the actual numbers — e.g. 'Step {round} of "
|
"the current activity — e.g. 'I am {activity}, one moment...' or "
|
||||||
"{maxRounds}, still working.'"
|
"'Currently {activity}, almost there...'. Do NOT include step numbers."
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -602,6 +602,13 @@ class TeamsbotService:
|
||||||
self._lastTranscriptText: Optional[str] = None
|
self._lastTranscriptText: Optional[str] = None
|
||||||
self._lastTranscriptId: Optional[str] = None
|
self._lastTranscriptId: Optional[str] = None
|
||||||
self._lastSttTime: float = 0.0
|
self._lastSttTime: float = 0.0
|
||||||
|
|
||||||
|
# Audio chunk aggregation: collect chunks and send to STT only
|
||||||
|
# after a speech pause or when the buffer reaches a target duration.
|
||||||
|
self._audioBuffer: bytes = b""
|
||||||
|
self._audioBufferStartTime: float = 0.0
|
||||||
|
self._audioBufferLastChunkTime: float = 0.0
|
||||||
|
self._audioBufferSampleRate: int = 16000
|
||||||
self._lastBotResponseText: Optional[str] = None
|
self._lastBotResponseText: Optional[str] = None
|
||||||
self._lastBotResponseTs: float = 0.0
|
self._lastBotResponseTs: float = 0.0
|
||||||
|
|
||||||
|
|
@ -732,6 +739,12 @@ class TeamsbotService:
|
||||||
hasAuth = bool(botAccountEmail and botAccountPassword)
|
hasAuth = bool(botAccountEmail and botAccountPassword)
|
||||||
logger.info(f"Joining meeting for session {sessionId}: auth={hasAuth}, email={botAccountEmail or 'N/A'}, transferMode={self.config.transferMode}")
|
logger.info(f"Joining meeting for session {sessionId}: auth={hasAuth}, email={botAccountEmail or 'N/A'}, transferMode={self.config.transferMode}")
|
||||||
|
|
||||||
|
avatarMediaData = None
|
||||||
|
avatarMediaType = None
|
||||||
|
avatarFileId = self._resolveAvatarFileId(session, interface)
|
||||||
|
if avatarFileId:
|
||||||
|
avatarMediaData, avatarMediaType = self._loadAvatarFileData(avatarFileId, interface)
|
||||||
|
|
||||||
result = await self.browserBotConnector.joinMeeting(
|
result = await self.browserBotConnector.joinMeeting(
|
||||||
sessionId=sessionId,
|
sessionId=sessionId,
|
||||||
meetingUrl=meetingLink,
|
meetingUrl=meetingLink,
|
||||||
|
|
@ -743,6 +756,8 @@ class TeamsbotService:
|
||||||
botAccountPassword=botAccountPassword,
|
botAccountPassword=botAccountPassword,
|
||||||
transferMode=self.config.transferMode if hasattr(self.config, 'transferMode') else "auto",
|
transferMode=self.config.transferMode if hasattr(self.config, 'transferMode') else "auto",
|
||||||
debugMode=self.config.debugMode if hasattr(self.config, 'debugMode') else False,
|
debugMode=self.config.debugMode if hasattr(self.config, 'debugMode') else False,
|
||||||
|
avatarMediaData=avatarMediaData,
|
||||||
|
avatarMediaType=avatarMediaType,
|
||||||
)
|
)
|
||||||
|
|
||||||
if result.get("success"):
|
if result.get("success"):
|
||||||
|
|
@ -767,6 +782,37 @@ class TeamsbotService:
|
||||||
})
|
})
|
||||||
await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)})
|
await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)})
|
||||||
|
|
||||||
|
def _resolveAvatarFileId(self, session, interface):
|
||||||
|
"""Resolve avatarFileId: module override > config default."""
|
||||||
|
moduleId = session.get("moduleId")
|
||||||
|
if moduleId:
|
||||||
|
module = interface.getModule(moduleId)
|
||||||
|
if module and module.get("defaultAvatarFileId"):
|
||||||
|
return module["defaultAvatarFileId"]
|
||||||
|
return getattr(self.config, "avatarFileId", None)
|
||||||
|
|
||||||
|
def _loadAvatarFileData(self, fileId, _teamsbotInterface):
|
||||||
|
"""Load avatar file as base64 data + mime type. Returns (data, mimeType) or (None, None)."""
|
||||||
|
import base64
|
||||||
|
from modules.interfaces import interfaceDbManagement
|
||||||
|
try:
|
||||||
|
mgmt = interfaceDbManagement.getInterface(self.currentUser, self.mandateId)
|
||||||
|
fileRecord = mgmt.getFile(fileId)
|
||||||
|
if not fileRecord:
|
||||||
|
logger.warning(f"Avatar file {fileId} not found")
|
||||||
|
return None, None
|
||||||
|
mimeType = getattr(fileRecord, "mimeType", None) or "image/png"
|
||||||
|
rawBytes = mgmt.getFileData(fileId)
|
||||||
|
if not rawBytes:
|
||||||
|
logger.warning(f"Avatar file {fileId} has no data")
|
||||||
|
return None, None
|
||||||
|
b64 = base64.b64encode(rawBytes).decode("ascii")
|
||||||
|
logger.info(f"Avatar file loaded: {fileId}, {mimeType}, {len(b64)} chars base64")
|
||||||
|
return b64, mimeType
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to load avatar file {fileId}: {e}")
|
||||||
|
return None, None
|
||||||
|
|
||||||
async def leaveMeeting(self, sessionId: str):
|
async def leaveMeeting(self, sessionId: str):
|
||||||
"""Send leave command to the Browser Bot service."""
|
"""Send leave command to the Browser Bot service."""
|
||||||
from . import interfaceFeatureTeamsbot as interfaceDb
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
||||||
|
|
@ -1164,6 +1210,14 @@ class TeamsbotService:
|
||||||
interface.updateSession(sessionId, updates)
|
interface.updateSession(sessionId, updates)
|
||||||
await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
|
await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
|
||||||
|
|
||||||
|
# Flush remaining audio buffer before generating summary
|
||||||
|
if dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
|
||||||
|
if self._audioBuffer:
|
||||||
|
logger.info(f"[AudioChunk] Flushing remaining buffer on session end ({len(self._audioBuffer)} bytes)")
|
||||||
|
self._audioBuffer = b""
|
||||||
|
self._audioBufferStartTime = 0.0
|
||||||
|
self._audioBufferLastChunkTime = 0.0
|
||||||
|
|
||||||
# Generate summary when session ends
|
# Generate summary when session ends
|
||||||
if dbStatus == TeamsbotSessionStatus.ENDED.value:
|
if dbStatus == TeamsbotSessionStatus.ENDED.value:
|
||||||
asyncio.create_task(self._generateMeetingSummary(sessionId))
|
asyncio.create_task(self._generateMeetingSummary(sessionId))
|
||||||
|
|
@ -1178,11 +1232,18 @@ class TeamsbotService:
|
||||||
voiceInterface,
|
voiceInterface,
|
||||||
websocket: WebSocket,
|
websocket: WebSocket,
|
||||||
):
|
):
|
||||||
"""Process an audio chunk from WebRTC capture — run STT and feed into transcript pipeline."""
|
"""Process an audio chunk from WebRTC capture. The bot-side VAD
|
||||||
|
(AudioWorklet / ScriptProcessor) already segments speech into 1-8s
|
||||||
|
voiced chunks. Here we apply a minimum-duration safety net: very short
|
||||||
|
chunks (<1s) are buffered until they reach 1s; everything else goes
|
||||||
|
straight to STT. A wall-clock timeout flushes stale buffers."""
|
||||||
import base64
|
import base64
|
||||||
|
_MIN_CHUNK_SEC = 1.0
|
||||||
|
_STALE_TIMEOUT_SEC = 3.0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
audioBytes = base64.b64decode(audioBase64)
|
audioBytes = base64.b64decode(audioBase64)
|
||||||
if len(audioBytes) < 1000:
|
if len(audioBytes) < 500:
|
||||||
return
|
return
|
||||||
|
|
||||||
if captureDiagnostics:
|
if captureDiagnostics:
|
||||||
|
|
@ -1195,14 +1256,12 @@ class TeamsbotService:
|
||||||
f"rms={rms}, nativeRate={nativeSampleRate}, bytes={len(audioBytes)}"
|
f"rms={rms}, nativeRate={nativeSampleRate}, bytes={len(audioBytes)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Use RMS from capture diagnostics to skip real silence.
|
isSilent = False
|
||||||
# Byte-variation heuristics produced false positives and dropped valid speech.
|
|
||||||
if captureDiagnostics and captureDiagnostics.get("rms") is not None:
|
if captureDiagnostics and captureDiagnostics.get("rms") is not None:
|
||||||
try:
|
try:
|
||||||
rmsVal = float(captureDiagnostics.get("rms"))
|
rmsVal = float(captureDiagnostics.get("rms"))
|
||||||
if rmsVal < 0.0003:
|
if rmsVal < 0.0003:
|
||||||
logger.debug(f"[AudioChunk] Skipping silent audio ({len(audioBytes)} bytes, rms={rmsVal:.6f})")
|
isSilent = True
|
||||||
return
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -1210,21 +1269,51 @@ class TeamsbotService:
|
||||||
logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}")
|
logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Treat sampleRate=0 as unknown (triggers auto-detection)
|
now = time.time()
|
||||||
effectiveSampleRate = sampleRate if sampleRate and sampleRate > 0 else None
|
effectiveRate = sampleRate if sampleRate and sampleRate > 0 else 16000
|
||||||
|
|
||||||
|
if not isSilent:
|
||||||
|
if not self._audioBuffer:
|
||||||
|
self._audioBufferStartTime = now
|
||||||
|
self._audioBuffer += audioBytes
|
||||||
|
self._audioBufferLastChunkTime = now
|
||||||
|
self._audioBufferSampleRate = effectiveRate
|
||||||
|
|
||||||
|
bufferDuration = len(self._audioBuffer) / (effectiveRate * 2) if self._audioBuffer else 0.0
|
||||||
|
bufferAge = (now - self._audioBufferStartTime) if self._audioBuffer else 0.0
|
||||||
|
|
||||||
|
shouldFlush = (
|
||||||
|
self._audioBuffer
|
||||||
|
and (
|
||||||
|
bufferDuration >= _MIN_CHUNK_SEC
|
||||||
|
or (bufferAge >= _STALE_TIMEOUT_SEC and bufferDuration > 0.3)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not shouldFlush:
|
||||||
|
return
|
||||||
|
|
||||||
|
flushBytes = self._audioBuffer
|
||||||
|
flushRate = self._audioBufferSampleRate
|
||||||
|
self._audioBuffer = b""
|
||||||
|
self._audioBufferStartTime = 0.0
|
||||||
|
self._audioBufferLastChunkTime = 0.0
|
||||||
|
|
||||||
|
flushDuration = len(flushBytes) / (flushRate * 2)
|
||||||
|
logger.info(f"[AudioChunk] Flushing buffer: {len(flushBytes)} bytes, {flushDuration:.1f}s, {flushRate}Hz")
|
||||||
|
|
||||||
phraseHints = list(self._knownSpeakers)
|
phraseHints = list(self._knownSpeakers)
|
||||||
if self.config.botName:
|
if self.config.botName:
|
||||||
phraseHints.append(self.config.botName)
|
phraseHints.append(self.config.botName)
|
||||||
|
|
||||||
sttResult = await voiceInterface.speechToText(
|
sttResult = await voiceInterface.speechToText(
|
||||||
audioContent=audioBytes,
|
audioContent=flushBytes,
|
||||||
language=self.config.language or "de-DE",
|
language=self.config.language or "de-DE",
|
||||||
sampleRate=effectiveSampleRate,
|
sampleRate=flushRate,
|
||||||
channels=1,
|
channels=1,
|
||||||
skipFallbacks=True,
|
skipFallbacks=True,
|
||||||
phraseHints=phraseHints if phraseHints else None,
|
phraseHints=phraseHints if phraseHints else None,
|
||||||
alternativeLanguages=["en-US"],
|
audioFormat="linear16",
|
||||||
)
|
)
|
||||||
|
|
||||||
if sttResult and sttResult.get("success") and sttResult.get("text"):
|
if sttResult and sttResult.get("success") and sttResult.get("text"):
|
||||||
|
|
@ -1252,19 +1341,18 @@ class TeamsbotService:
|
||||||
|
|
||||||
def _registerSpeakerHint(self, speaker: str, text: str, sessionId: str = ""):
|
def _registerSpeakerHint(self, speaker: str, text: str, sessionId: str = ""):
|
||||||
"""Track current speaker from captions for STT attribution.
|
"""Track current speaker from captions for STT attribution.
|
||||||
When the first non-bot caption arrives, retroactively attributes
|
Retroactively attributes any unattributed STT segments whenever a
|
||||||
any STT segments that were created before a speaker was known."""
|
new non-bot caption speaker arrives (not just the first time)."""
|
||||||
if not speaker:
|
if not speaker:
|
||||||
return
|
return
|
||||||
normalizedSpeaker = speaker.strip()
|
normalizedSpeaker = speaker.strip()
|
||||||
if not normalizedSpeaker or self._isBotSpeaker(normalizedSpeaker):
|
if not normalizedSpeaker or self._isBotSpeaker(normalizedSpeaker):
|
||||||
return
|
return
|
||||||
|
|
||||||
prevSpeaker = self._lastCaptionSpeaker
|
|
||||||
self._lastCaptionSpeaker = normalizedSpeaker
|
self._lastCaptionSpeaker = normalizedSpeaker
|
||||||
self._knownSpeakers.add(normalizedSpeaker)
|
self._knownSpeakers.add(normalizedSpeaker)
|
||||||
|
|
||||||
if prevSpeaker is None and self._unattributedTranscriptIds:
|
if self._unattributedTranscriptIds:
|
||||||
from . import interfaceFeatureTeamsbot as interfaceDb
|
from . import interfaceFeatureTeamsbot as interfaceDb
|
||||||
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
|
||||||
for tid in self._unattributedTranscriptIds:
|
for tid in self._unattributedTranscriptIds:
|
||||||
|
|
@ -3243,17 +3331,53 @@ class TeamsbotService:
|
||||||
return await self._pickEphemeralPhrase("agentBusy")
|
return await self._pickEphemeralPhrase("agentBusy")
|
||||||
|
|
||||||
async def _interimAgentRoundMessage(
|
async def _interimAgentRoundMessage(
|
||||||
self, roundNum: int, maxRounds: int
|
self, lastToolLabel: Optional[str] = None
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
"""Per-round progress notice for long agent runs (meeting voice /
|
"""Per-round progress notice for long agent runs (meeting voice /
|
||||||
chat, ephemeral). Phrasing is AI-localised once per session;
|
chat, ephemeral). Generates a single short phrase in the bot's
|
||||||
``{round}`` and ``{maxRounds}`` placeholders are substituted at
|
configured language that describes the current activity. Unlike
|
||||||
render time. Returns ``None`` if generation failed."""
|
the cached ephemeral phrases, this is a per-call AI generation
|
||||||
return await self._pickEphemeralPhrase(
|
to avoid mixing English displayLabels into non-English speech."""
|
||||||
"agentRound",
|
targetLang = (self.config.language or "").strip() or "en-US"
|
||||||
substitutions={"round": roundNum, "maxRounds": maxRounds},
|
botName = (self.config.botName or "the assistant").strip()
|
||||||
|
activityHint = lastToolLabel or "working on the task"
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
f"You are a meeting assistant named '{botName}'.\n"
|
||||||
|
f"Target spoken language (BCP-47): {targetLang}\n\n"
|
||||||
|
f"The assistant is currently busy with: {activityHint}\n\n"
|
||||||
|
f"Generate ONE short sentence (max 12 words) in {targetLang} "
|
||||||
|
f"that tells the audience what the assistant is doing right now. "
|
||||||
|
f"Natural, spoken style. No step numbers. No quotes around the output.\n"
|
||||||
|
f"Output ONLY the sentence, nothing else."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
aiService = createAiService(
|
||||||
|
self.currentUser, self.mandateId, self.instanceId
|
||||||
|
)
|
||||||
|
await aiService.ensureAiObjectsInitialized()
|
||||||
|
request = AiCallRequest(
|
||||||
|
prompt=prompt,
|
||||||
|
context="",
|
||||||
|
options=AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||||
|
priority=PriorityEnum.SPEED,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
response = await aiService.callAi(request)
|
||||||
|
except Exception as aiErr:
|
||||||
|
logger.debug(f"Agent round phrase generation failed: {aiErr}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not response or response.errorCount != 0 or not response.content:
|
||||||
|
return None
|
||||||
|
|
||||||
|
result = response.content.strip().strip('"').strip("'")
|
||||||
|
if len(result) > 200:
|
||||||
|
result = result[:200]
|
||||||
|
return result
|
||||||
|
|
||||||
async def _notifyMeetingEphemeral(self, sessionId: str, text: str) -> None:
|
async def _notifyMeetingEphemeral(self, sessionId: str, text: str) -> None:
|
||||||
"""Deliver a short line to the meeting (TTS + chat per config) without
|
"""Deliver a short line to the meeting (TTS + chat per config) without
|
||||||
persisting botResponses/transcripts, so the main agent answer stays the
|
persisting botResponses/transcripts, so the main agent answer stays the
|
||||||
|
|
@ -3370,6 +3494,7 @@ class TeamsbotService:
|
||||||
|
|
||||||
finalText: str = ""
|
finalText: str = ""
|
||||||
rounds = 0
|
rounds = 0
|
||||||
|
lastToolLabel: Optional[str] = None
|
||||||
try:
|
try:
|
||||||
async for event in agentService.runAgent(
|
async for event in agentService.runAgent(
|
||||||
prompt=taskText,
|
prompt=taskText,
|
||||||
|
|
@ -3390,11 +3515,9 @@ class TeamsbotService:
|
||||||
"round": roundNum,
|
"round": roundNum,
|
||||||
"maxRounds": maxR,
|
"maxRounds": maxR,
|
||||||
})
|
})
|
||||||
# Runde 1: schon allgemeiner Start-Hinweis; ab Runde 2 ins Meeting melden.
|
|
||||||
# Director prompts bleiben still — keine Zwischen-Updates ins Meeting.
|
|
||||||
if roundNum >= 2 and not directorPromptMode:
|
if roundNum >= 2 and not directorPromptMode:
|
||||||
try:
|
try:
|
||||||
roundText = await self._interimAgentRoundMessage(roundNum, maxR)
|
roundText = await self._interimAgentRoundMessage(lastToolLabel)
|
||||||
if roundText:
|
if roundText:
|
||||||
await self._notifyMeetingEphemeral(sessionId, roundText)
|
await self._notifyMeetingEphemeral(sessionId, roundText)
|
||||||
except Exception as roundNoticeErr:
|
except Exception as roundNoticeErr:
|
||||||
|
|
@ -3402,12 +3525,26 @@ class TeamsbotService:
|
||||||
f"Session {sessionId}: Per-round agent notice failed: {roundNoticeErr}"
|
f"Session {sessionId}: Per-round agent notice failed: {roundNoticeErr}"
|
||||||
)
|
)
|
||||||
elif event.type == AgentEventTypeEnum.TOOL_CALL:
|
elif event.type == AgentEventTypeEnum.TOOL_CALL:
|
||||||
toolName = (event.data or {}).get("toolName") if event.data else None
|
evtData = event.data or {}
|
||||||
|
toolName = evtData.get("toolName")
|
||||||
|
lastToolLabel = evtData.get("displayLabel")
|
||||||
await _emitSessionEvent(sessionId, "agentRun", {
|
await _emitSessionEvent(sessionId, "agentRun", {
|
||||||
"source": sourceLabel,
|
"source": sourceLabel,
|
||||||
"promptId": promptId,
|
"promptId": promptId,
|
||||||
"status": "toolCall",
|
"status": "toolCall",
|
||||||
"toolName": toolName,
|
"toolName": toolName,
|
||||||
|
"displayLabel": lastToolLabel,
|
||||||
|
})
|
||||||
|
elif event.type == AgentEventTypeEnum.TOOL_RESULT:
|
||||||
|
evtData = event.data or {}
|
||||||
|
resultSnippet = (evtData.get("data") or "")[:200]
|
||||||
|
await _emitSessionEvent(sessionId, "agentRun", {
|
||||||
|
"source": sourceLabel,
|
||||||
|
"promptId": promptId,
|
||||||
|
"status": "toolResult",
|
||||||
|
"toolName": evtData.get("toolName", ""),
|
||||||
|
"success": evtData.get("success", True),
|
||||||
|
"summary": resultSnippet,
|
||||||
})
|
})
|
||||||
elif event.type == AgentEventTypeEnum.FILE_CREATED:
|
elif event.type == AgentEventTypeEnum.FILE_CREATED:
|
||||||
await _emitSessionEvent(sessionId, "documentCreated", event.data or {})
|
await _emitSessionEvent(sessionId, "documentCreated", event.data or {})
|
||||||
|
|
|
||||||
|
|
@ -33,11 +33,6 @@ UI_OBJECTS = [
|
||||||
"label": t("Einstellungen", context="UI"),
|
"label": t("Einstellungen", context="UI"),
|
||||||
"meta": {"area": "settings"}
|
"meta": {"area": "settings"}
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"objectKey": "ui.feature.workspace.rag-insights",
|
|
||||||
"label": t("Wissens-Insights", context="UI"),
|
|
||||||
"meta": {"area": "rag-insights"},
|
|
||||||
},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
RESOURCE_OBJECTS = [
|
RESOURCE_OBJECTS = [
|
||||||
|
|
@ -86,7 +81,6 @@ TEMPLATE_ROLES = [
|
||||||
{"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
|
{"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
|
||||||
{"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
|
{"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
|
||||||
{"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
|
{"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
|
||||||
{"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True},
|
|
||||||
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "n", "update": "n", "delete": "n"},
|
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "n", "update": "n", "delete": "n"},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -97,7 +91,6 @@ TEMPLATE_ROLES = [
|
||||||
{"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
|
{"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
|
||||||
{"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
|
{"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
|
||||||
{"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
|
{"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
|
||||||
{"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True},
|
|
||||||
{"context": "RESOURCE", "item": "resource.feature.workspace.start", "view": True},
|
{"context": "RESOURCE", "item": "resource.feature.workspace.start", "view": True},
|
||||||
{"context": "RESOURCE", "item": "resource.feature.workspace.stop", "view": True},
|
{"context": "RESOURCE", "item": "resource.feature.workspace.stop", "view": True},
|
||||||
{"context": "RESOURCE", "item": "resource.feature.workspace.files", "view": True},
|
{"context": "RESOURCE", "item": "resource.feature.workspace.files", "view": True},
|
||||||
|
|
|
||||||
|
|
@ -2192,49 +2192,4 @@ async def putWorkspaceUserSettings(
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
|
# RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
|
||||||
# =========================================================================
|
|
||||||
|
|
||||||
def _collectWorkspaceFileIdsForStats(instanceId: str, mandateId: Optional[str]) -> List[str]:
|
|
||||||
"""All FileItem ids for this feature instance (any user). Knowledge rows are often stored
|
|
||||||
without featureInstanceId; we correlate by file id from the Management DB."""
|
|
||||||
from modules.datamodels.datamodelFiles import FileItem
|
|
||||||
from modules.interfaces.interfaceDbManagement import ComponentObjects
|
|
||||||
|
|
||||||
co = ComponentObjects()
|
|
||||||
rows = co.db.getRecordset(FileItem, recordFilter={"featureInstanceId": instanceId})
|
|
||||||
out: List[str] = []
|
|
||||||
m = str(mandateId) if mandateId else ""
|
|
||||||
for r in rows or []:
|
|
||||||
rid = r.get("id") if isinstance(r, dict) else getattr(r, "id", None)
|
|
||||||
if not rid:
|
|
||||||
continue
|
|
||||||
if m:
|
|
||||||
mid = r.get("mandateId") if isinstance(r, dict) else getattr(r, "mandateId", "") or ""
|
|
||||||
if mid and mid != m:
|
|
||||||
continue
|
|
||||||
out.append(str(rid))
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{instanceId}/rag-statistics")
|
|
||||||
@limiter.limit("60/minute")
|
|
||||||
async def getRagStatistics(
|
|
||||||
request: Request,
|
|
||||||
instanceId: str = Path(...),
|
|
||||||
days: int = Query(90, ge=7, le=365, description="Timeline window in days"),
|
|
||||||
context: RequestContext = Depends(getRequestContext),
|
|
||||||
):
|
|
||||||
"""Aggregated, non-identifying knowledge-store metrics for this workspace instance."""
|
|
||||||
mandateId, _instanceConfig = _validateInstanceAccess(instanceId, context)
|
|
||||||
workspaceFileIds = _collectWorkspaceFileIdsForStats(instanceId, mandateId)
|
|
||||||
kdb = getKnowledgeInterface(context.user)
|
|
||||||
stats = kdb.getRagStatisticsForInstance(
|
|
||||||
featureInstanceId=instanceId,
|
|
||||||
mandateId=str(mandateId) if mandateId else "",
|
|
||||||
timelineDays=days,
|
|
||||||
workspaceFileIds=workspaceFileIds,
|
|
||||||
)
|
|
||||||
if isinstance(stats, dict):
|
|
||||||
stats.setdefault("scope", {})
|
|
||||||
stats["scope"]["workspaceFileIdsResolved"] = len(workspaceFileIds)
|
|
||||||
return JSONResponse(stats)
|
|
||||||
|
|
|
||||||
|
|
@ -133,6 +133,60 @@ class KnowledgeObjects:
|
||||||
|
|
||||||
return {"indexRows": indexCount, "chunks": chunkCount}
|
return {"indexRows": indexCount, "chunks": chunkCount}
|
||||||
|
|
||||||
|
def deleteFileContentIndexByDataSource(self, dataSourceId: str) -> Dict[str, int]:
|
||||||
|
"""Delete all FileContentIndex rows whose provenance.dataSourceId matches.
|
||||||
|
|
||||||
|
Used when a user disables ragIndexEnabled on a DataSource to purge
|
||||||
|
only those chunks that were ingested from that specific tree element.
|
||||||
|
"""
|
||||||
|
if not dataSourceId:
|
||||||
|
return {"indexRows": 0, "chunks": 0}
|
||||||
|
|
||||||
|
allRows = self.db.getRecordset(FileContentIndex)
|
||||||
|
matchedRows = []
|
||||||
|
for row in allRows:
|
||||||
|
prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None)
|
||||||
|
if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId:
|
||||||
|
matchedRows.append(row)
|
||||||
|
|
||||||
|
mandateIds: set = set()
|
||||||
|
chunkCount = 0
|
||||||
|
indexCount = 0
|
||||||
|
for row in matchedRows:
|
||||||
|
fid = row.get("id") if isinstance(row, dict) else getattr(row, "id", None)
|
||||||
|
mid = row.get("mandateId") if isinstance(row, dict) else getattr(row, "mandateId", "")
|
||||||
|
if not fid:
|
||||||
|
continue
|
||||||
|
chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fid})
|
||||||
|
for chunk in chunks:
|
||||||
|
if self.db.recordDelete(ContentChunk, chunk["id"]):
|
||||||
|
chunkCount += 1
|
||||||
|
if self.db.recordDelete(FileContentIndex, fid):
|
||||||
|
indexCount += 1
|
||||||
|
if mid:
|
||||||
|
mandateIds.add(str(mid))
|
||||||
|
|
||||||
|
for mid in mandateIds:
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbBilling import _getRootInterface
|
||||||
|
_getRootInterface().reconcileMandateStorageBilling(mid)
|
||||||
|
except Exception as ex:
|
||||||
|
logger.warning("reconcileMandateStorageBilling after datasource purge failed: %s", ex)
|
||||||
|
|
||||||
|
return {"indexRows": indexCount, "chunks": chunkCount}
|
||||||
|
|
||||||
|
def listFileContentIndexByDataSource(self, dataSourceId: str) -> List[Dict[str, Any]]:
|
||||||
|
"""List all FileContentIndex rows whose provenance.dataSourceId matches."""
|
||||||
|
if not dataSourceId:
|
||||||
|
return []
|
||||||
|
allRows = self.db.getRecordset(FileContentIndex)
|
||||||
|
out = []
|
||||||
|
for row in allRows:
|
||||||
|
prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None)
|
||||||
|
if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId:
|
||||||
|
out.append(dict(row) if not isinstance(row, dict) else row)
|
||||||
|
return out
|
||||||
|
|
||||||
def deleteFileContentIndex(self, fileId: str) -> bool:
|
def deleteFileContentIndex(self, fileId: str) -> bool:
|
||||||
"""Delete a FileContentIndex and all associated ContentChunks."""
|
"""Delete a FileContentIndex and all associated ContentChunks."""
|
||||||
existing = self.getFileContentIndex(fileId)
|
existing = self.getFileContentIndex(fileId)
|
||||||
|
|
|
||||||
|
|
@ -1274,17 +1274,20 @@ class ComponentObjects:
|
||||||
if getattr(permissions, "update", None) != AccessLevel.ALL:
|
if getattr(permissions, "update", None) != AccessLevel.ALL:
|
||||||
raise PermissionError("Setting global scope requires ALL permission")
|
raise PermissionError("Setting global scope requires ALL permission")
|
||||||
|
|
||||||
self.db.recordModify(FileFolder, folderId, {"scope": scope})
|
allFolderIds = self._collectChildFolderIds(folderId)
|
||||||
|
for fid in allFolderIds:
|
||||||
|
self.db.recordModify(FileFolder, fid, {"scope": scope})
|
||||||
|
|
||||||
filesUpdated = 0
|
filesUpdated = 0
|
||||||
if cascadeToFiles:
|
if cascadeToFiles:
|
||||||
items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId})
|
for fid in allFolderIds:
|
||||||
for item in items:
|
items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid})
|
||||||
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
|
for item in items:
|
||||||
if owner == self.userId:
|
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
|
||||||
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
|
if owner == self.userId:
|
||||||
self.db.recordModify(FileItem, iid, {"scope": scope})
|
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
|
||||||
filesUpdated += 1
|
self.db.recordModify(FileItem, iid, {"scope": scope})
|
||||||
|
filesUpdated += 1
|
||||||
|
|
||||||
return {"folderId": folderId, "scope": scope, "filesUpdated": filesUpdated}
|
return {"folderId": folderId, "scope": scope, "filesUpdated": filesUpdated}
|
||||||
|
|
||||||
|
|
@ -1294,16 +1297,19 @@ class ComponentObjects:
|
||||||
raise FileNotFoundError(f"Folder {folderId} not found")
|
raise FileNotFoundError(f"Folder {folderId} not found")
|
||||||
self._requireFolderWriteAccess(folder, folderId, "update")
|
self._requireFolderWriteAccess(folder, folderId, "update")
|
||||||
|
|
||||||
self.db.recordModify(FileFolder, folderId, {"neutralize": neutralize})
|
allFolderIds = self._collectChildFolderIds(folderId)
|
||||||
|
for fid in allFolderIds:
|
||||||
|
self.db.recordModify(FileFolder, fid, {"neutralize": neutralize})
|
||||||
|
|
||||||
items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId})
|
|
||||||
filesUpdated = 0
|
filesUpdated = 0
|
||||||
for item in items:
|
for fid in allFolderIds:
|
||||||
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
|
items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid})
|
||||||
if owner == self.userId:
|
for item in items:
|
||||||
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
|
owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
|
||||||
self.db.recordModify(FileItem, iid, {"neutralize": neutralize})
|
if owner == self.userId:
|
||||||
filesUpdated += 1
|
iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
|
||||||
|
self.db.recordModify(FileItem, iid, {"neutralize": neutralize})
|
||||||
|
filesUpdated += 1
|
||||||
|
|
||||||
return {"folderId": folderId, "neutralize": neutralize, "filesUpdated": filesUpdated}
|
return {"folderId": folderId, "neutralize": neutralize, "filesUpdated": filesUpdated}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -69,7 +69,10 @@ class VoiceObjects:
|
||||||
sampleRate: int = None, channels: int = None,
|
sampleRate: int = None, channels: int = None,
|
||||||
skipFallbacks: bool = False,
|
skipFallbacks: bool = False,
|
||||||
phraseHints: list = None,
|
phraseHints: list = None,
|
||||||
alternativeLanguages: list = None) -> Dict[str, Any]:
|
alternativeLanguages: list = None,
|
||||||
|
model: str = "latest_long",
|
||||||
|
lightweight: bool = False,
|
||||||
|
audioFormat: Optional[str] = None) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Convert speech to text using Google Cloud Speech-to-Text API.
|
Convert speech to text using Google Cloud Speech-to-Text API.
|
||||||
|
|
||||||
|
|
@ -81,6 +84,9 @@ class VoiceObjects:
|
||||||
skipFallbacks: If True, skip fallback attempts (use when audio format is known)
|
skipFallbacks: If True, skip fallback attempts (use when audio format is known)
|
||||||
phraseHints: Optional list of phrases to boost recognition (names, terms)
|
phraseHints: Optional list of phrases to boost recognition (names, terms)
|
||||||
alternativeLanguages: Optional list of additional language codes for multi-language
|
alternativeLanguages: Optional list of additional language codes for multi-language
|
||||||
|
model: Google STT model (e.g. latest_long, latest_short)
|
||||||
|
lightweight: If True, omit word-level features and enhanced model
|
||||||
|
audioFormat: If set (webm_opus, linear16, ...), skip format auto-detection
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict containing transcribed text, confidence, and metadata
|
Dict containing transcribed text, confidence, and metadata
|
||||||
|
|
@ -97,6 +103,9 @@ class VoiceObjects:
|
||||||
skipFallbacks=skipFallbacks,
|
skipFallbacks=skipFallbacks,
|
||||||
phraseHints=phraseHints,
|
phraseHints=phraseHints,
|
||||||
alternativeLanguages=alternativeLanguages,
|
alternativeLanguages=alternativeLanguages,
|
||||||
|
model=model,
|
||||||
|
lightweight=lightweight,
|
||||||
|
audioFormat=audioFormat,
|
||||||
)
|
)
|
||||||
|
|
||||||
if result["success"]:
|
if result["success"]:
|
||||||
|
|
@ -120,13 +129,23 @@ class VoiceObjects:
|
||||||
audioQueue: asyncio.Queue,
|
audioQueue: asyncio.Queue,
|
||||||
language: str = "de-DE",
|
language: str = "de-DE",
|
||||||
phraseHints: Optional[list] = None,
|
phraseHints: Optional[list] = None,
|
||||||
|
model: str = "latest_long",
|
||||||
|
lightweight: bool = False,
|
||||||
|
singleUtterance: bool = False,
|
||||||
) -> AsyncGenerator[Dict[str, Any], None]:
|
) -> AsyncGenerator[Dict[str, Any], None]:
|
||||||
"""
|
"""
|
||||||
Stream audio to Google Streaming STT and yield interim/final results.
|
Stream audio to Google Streaming STT and yield interim/final results.
|
||||||
Billing is recorded for each final result.
|
Billing is recorded for each final result.
|
||||||
"""
|
"""
|
||||||
connector = self._getGoogleSpeechConnector()
|
connector = self._getGoogleSpeechConnector()
|
||||||
async for event in connector.streamingRecognize(audioQueue, language, phraseHints):
|
async for event in connector.streamingRecognize(
|
||||||
|
audioQueue,
|
||||||
|
language,
|
||||||
|
phraseHints,
|
||||||
|
model=model,
|
||||||
|
lightweight=lightweight,
|
||||||
|
singleUtterance=singleUtterance,
|
||||||
|
):
|
||||||
if event.get("isFinal") and self.billingCallback:
|
if event.get("isFinal") and self.billingCallback:
|
||||||
durationSec = event.get("audioDurationSec", 0)
|
durationSec = event.get("audioDurationSec", 0)
|
||||||
priceCHF = connector.calculateSttCostCHF(durationSec)
|
priceCHF = connector.calculateSttCostCHF(durationSec)
|
||||||
|
|
|
||||||
|
|
@ -1986,10 +1986,10 @@ def getUserViewTransactions(
|
||||||
if not pagination:
|
if not pagination:
|
||||||
raise HTTPException(status_code=400, detail="pagination required for groupSummary")
|
raise HTTPException(status_code=400, detail="pagination required for groupSummary")
|
||||||
import json as _json
|
import json as _json
|
||||||
from collections import defaultdict
|
|
||||||
from modules.interfaces.interfaceDbApp import getInterface as getAppInterface
|
from modules.interfaces.interfaceDbApp import getInterface as getAppInterface
|
||||||
from modules.routes.routeHelpers import (
|
from modules.routes.routeHelpers import (
|
||||||
applyViewToParams,
|
applyViewToParams,
|
||||||
|
build_group_summary_groups,
|
||||||
effective_group_by_levels,
|
effective_group_by_levels,
|
||||||
resolveView,
|
resolveView,
|
||||||
)
|
)
|
||||||
|
|
@ -2018,28 +2018,7 @@ def getUserViewTransactions(
|
||||||
summary_params,
|
summary_params,
|
||||||
ctx.user,
|
ctx.user,
|
||||||
)
|
)
|
||||||
counts: Dict[str, int] = defaultdict(int)
|
groups_out = build_group_summary_groups(all_rows, field, null_label, groupByLevels=levels)
|
||||||
labels: Dict[str, str] = {}
|
|
||||||
null_key = "\x00NULL"
|
|
||||||
for item in all_rows:
|
|
||||||
raw = item.get(field)
|
|
||||||
if raw is None or raw == "":
|
|
||||||
nk = null_key
|
|
||||||
labels[nk] = null_label
|
|
||||||
else:
|
|
||||||
nk = str(raw)
|
|
||||||
if nk not in labels:
|
|
||||||
labels[nk] = nk
|
|
||||||
counts[nk] += 1
|
|
||||||
groups_out: List[Dict[str, Any]] = []
|
|
||||||
for nk in sorted(counts.keys(), key=lambda x: (x == null_key, labels.get(x, x).lower())):
|
|
||||||
groups_out.append(
|
|
||||||
{
|
|
||||||
"value": None if nk == null_key else nk,
|
|
||||||
"label": labels.get(nk, nk),
|
|
||||||
"totalCount": counts[nk],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return JSONResponse(content={"groups": groups_out})
|
return JSONResponse(content={"groups": groups_out})
|
||||||
|
|
||||||
paginationParams = None
|
paginationParams = None
|
||||||
|
|
|
||||||
|
|
@ -130,7 +130,7 @@ def get_auth_authority_options(
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
@router.get("/")
|
@router.get("/")
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit("60/minute")
|
||||||
async def get_connections(
|
async def get_connections(
|
||||||
request: Request,
|
request: Request,
|
||||||
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
|
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
|
||||||
|
|
@ -197,7 +197,9 @@ async def get_connections(
|
||||||
"lastChecked": connection.lastChecked,
|
"lastChecked": connection.lastChecked,
|
||||||
"expiresAt": connection.expiresAt,
|
"expiresAt": connection.expiresAt,
|
||||||
"tokenStatus": tokenStatus,
|
"tokenStatus": tokenStatus,
|
||||||
"tokenExpiresAt": tokenExpiresAt
|
"tokenExpiresAt": tokenExpiresAt,
|
||||||
|
"knowledgeIngestionEnabled": getattr(connection, "knowledgeIngestionEnabled", False),
|
||||||
|
"knowledgePreferences": getattr(connection, "knowledgePreferences", None) or {},
|
||||||
})
|
})
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
@ -264,7 +266,7 @@ async def get_connections(
|
||||||
})
|
})
|
||||||
enrichRowsWithFkLabels(enhanced_connections_dict, UserConnection)
|
enrichRowsWithFkLabels(enhanced_connections_dict, UserConnection)
|
||||||
filtered = apply_strategy_b_filters_and_sort(enhanced_connections_dict, paginationParams, currentUser)
|
filtered = apply_strategy_b_filters_and_sort(enhanced_connections_dict, paginationParams, currentUser)
|
||||||
groups_out = build_group_summary_groups(filtered, field, null_label)
|
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
|
||||||
return JSONResponse(content={"groups": groups_out})
|
return JSONResponse(content={"groups": groups_out})
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -725,3 +727,171 @@ def delete_connection(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail=f"Failed to delete connection: {str(e)}"
|
detail=f"Failed to delete connection: {str(e)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Knowledge Consent & Control Endpoints
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _findOwnConnection(interface, userId: str, connectionId: str):
|
||||||
|
"""Find a connection owned by the user. Returns None if not found."""
|
||||||
|
connections = interface.getUserConnections(userId)
|
||||||
|
for conn in connections:
|
||||||
|
if conn.id == connectionId:
|
||||||
|
return conn
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@router.patch("/{connectionId}/knowledge-consent")
|
||||||
|
@limiter.limit("10/minute")
|
||||||
|
def _updateKnowledgeConsent(
|
||||||
|
request: Request,
|
||||||
|
connectionId: str = Path(..., description="Connection ID"),
|
||||||
|
enabled: bool = Body(..., embed=True),
|
||||||
|
currentUser: User = Depends(getCurrentUser),
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Master switch: can PowerOn ingest data from this connection into the RAG knowledge store?
|
||||||
|
|
||||||
|
enabled=False: purge ALL chunks for this connection + cancel running jobs.
|
||||||
|
enabled=True: set flag; enqueue bootstrap only if rag-enabled DataSources exist.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
interface = getInterface(currentUser)
|
||||||
|
connection = _findOwnConnection(interface, currentUser.id, connectionId)
|
||||||
|
if not connection:
|
||||||
|
raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
|
||||||
|
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
rootIf.db.recordModify(UserConnection, connectionId, {"knowledgeIngestionEnabled": enabled})
|
||||||
|
|
||||||
|
purged = None
|
||||||
|
cancelled = 0
|
||||||
|
bootstrapEnqueued = False
|
||||||
|
|
||||||
|
if not enabled:
|
||||||
|
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||||
|
purged = getKnowledgeInterface(None).deleteFileContentIndexByConnectionId(connectionId)
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection
|
||||||
|
cancelled = cancelJobsByConnection(connectionId)
|
||||||
|
else:
|
||||||
|
from modules.datamodels.datamodelDataSource import DataSource
|
||||||
|
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId, "ragIndexEnabled": True})
|
||||||
|
if dataSources:
|
||||||
|
import asyncio
|
||||||
|
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
||||||
|
authority = connection.authority.value if hasattr(connection.authority, "value") else str(connection.authority or "")
|
||||||
|
|
||||||
|
async def _enqueue():
|
||||||
|
await startJob(
|
||||||
|
"connection.bootstrap",
|
||||||
|
{"connectionId": connectionId, "authority": authority.lower()},
|
||||||
|
triggeredBy=str(currentUser.id),
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
if loop.is_running():
|
||||||
|
loop.create_task(_enqueue())
|
||||||
|
else:
|
||||||
|
loop.run_until_complete(_enqueue())
|
||||||
|
except RuntimeError:
|
||||||
|
asyncio.run(_enqueue())
|
||||||
|
bootstrapEnqueued = True
|
||||||
|
|
||||||
|
import json as _json
|
||||||
|
from modules.shared.auditLogger import audit_logger
|
||||||
|
from modules.datamodels.datamodelAudit import AuditCategory
|
||||||
|
audit_logger.logEvent(
|
||||||
|
userId=str(currentUser.id),
|
||||||
|
mandateId=str(getattr(connection, "mandateId", "") or ""),
|
||||||
|
category=AuditCategory.PERMISSION.value,
|
||||||
|
action="knowledge_consent_changed",
|
||||||
|
details=_json.dumps({"connectionId": connectionId, "enabled": enabled}),
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info("Knowledge consent %s for connection %s by user %s",
|
||||||
|
"enabled" if enabled else "disabled", connectionId, currentUser.id)
|
||||||
|
return {
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"knowledgeIngestionEnabled": enabled,
|
||||||
|
"purged": purged,
|
||||||
|
"cancelledJobs": cancelled,
|
||||||
|
"bootstrapEnqueued": bootstrapEnqueued,
|
||||||
|
}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error updating knowledge consent: %s", e, exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.patch("/{connectionId}/knowledge-preferences")
|
||||||
|
@limiter.limit("20/minute")
|
||||||
|
def _updateKnowledgePreferences(
|
||||||
|
request: Request,
|
||||||
|
connectionId: str = Path(..., description="Connection ID"),
|
||||||
|
preferences: Dict[str, Any] = Body(..., embed=True),
|
||||||
|
currentUser: User = Depends(getCurrentUser),
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Update per-connection knowledge ingestion preferences (mail depth, attachments, etc.)."""
|
||||||
|
_ALLOWED_KEYS = {"mailContentDepth", "mailIndexAttachments", "filesIndexBinaries",
|
||||||
|
"clickupScope", "clickupIndexAttachments", "maxAgeDays"}
|
||||||
|
try:
|
||||||
|
interface = getInterface(currentUser)
|
||||||
|
connection = _findOwnConnection(interface, currentUser.id, connectionId)
|
||||||
|
if not connection:
|
||||||
|
raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
|
||||||
|
|
||||||
|
existing = getattr(connection, "knowledgePreferences", None) or {}
|
||||||
|
cleaned = {k: v for k, v in preferences.items() if k in _ALLOWED_KEYS}
|
||||||
|
merged = {**existing, **cleaned, "schemaVersion": 1}
|
||||||
|
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
getRootInterface().db.recordModify(UserConnection, connectionId, {"knowledgePreferences": merged})
|
||||||
|
|
||||||
|
logger.info("Knowledge preferences updated for connection %s", connectionId)
|
||||||
|
return {"connectionId": connectionId, "knowledgePreferences": merged, "updated": True}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error updating knowledge preferences: %s", e, exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{connectionId}/knowledge-stop")
|
||||||
|
@limiter.limit("10/minute")
|
||||||
|
def _stopKnowledgeJobs(
|
||||||
|
request: Request,
|
||||||
|
connectionId: str = Path(..., description="Connection ID"),
|
||||||
|
currentUser: User = Depends(getCurrentUser),
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Cancel all running/pending bootstrap jobs for this connection."""
|
||||||
|
try:
|
||||||
|
interface = getInterface(currentUser)
|
||||||
|
connection = _findOwnConnection(interface, currentUser.id, connectionId)
|
||||||
|
if not connection:
|
||||||
|
raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection
|
||||||
|
cancelled = cancelJobsByConnection(connectionId)
|
||||||
|
|
||||||
|
import json as _json
|
||||||
|
from modules.shared.auditLogger import audit_logger
|
||||||
|
from modules.datamodels.datamodelAudit import AuditCategory
|
||||||
|
audit_logger.logEvent(
|
||||||
|
userId=str(currentUser.id),
|
||||||
|
mandateId=str(getattr(connection, "mandateId", "") or ""),
|
||||||
|
category=AuditCategory.PERMISSION.value,
|
||||||
|
action="knowledge_jobs_stopped",
|
||||||
|
details=_json.dumps({"connectionId": connectionId, "cancelledCount": cancelled}),
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info("Stopped %d knowledge jobs for connection %s", cancelled, connectionId)
|
||||||
|
return {"connectionId": connectionId, "cancelled": cancelled}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error stopping knowledge jobs: %s", e, exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
@ -413,7 +413,7 @@ def patch_folder_scope(
|
||||||
scope = body.get("scope")
|
scope = body.get("scope")
|
||||||
if not scope:
|
if not scope:
|
||||||
raise HTTPException(status_code=400, detail="scope is required")
|
raise HTTPException(status_code=400, detail="scope is required")
|
||||||
cascadeToFiles = body.get("cascadeToFiles", False)
|
cascadeToFiles = body.get("cascadeChildren", body.get("cascadeToFiles", False))
|
||||||
managementInterface = interfaceDbManagement.getInterface(
|
managementInterface = interfaceDbManagement.getInterface(
|
||||||
currentUser,
|
currentUser,
|
||||||
mandateId=str(context.mandateId) if context.mandateId else None,
|
mandateId=str(context.mandateId) if context.mandateId else None,
|
||||||
|
|
@ -543,7 +543,7 @@ def get_files(
|
||||||
FileItem,
|
FileItem,
|
||||||
)
|
)
|
||||||
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
|
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
|
||||||
groups_out = build_group_summary_groups(filtered, field, null_label)
|
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
|
||||||
return JSONResponse(content={"groups": groups_out})
|
return JSONResponse(content={"groups": groups_out})
|
||||||
|
|
||||||
if mode == "filterValues":
|
if mode == "filterValues":
|
||||||
|
|
|
||||||
|
|
@ -100,7 +100,7 @@ def get_prompts(
|
||||||
result if isinstance(result, list) else (result.items if hasattr(result, "items") else [])
|
result if isinstance(result, list) else (result.items if hasattr(result, "items") else [])
|
||||||
)
|
)
|
||||||
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
|
filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
|
||||||
groups_out = build_group_summary_groups(filtered, field, null_label)
|
groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
|
||||||
return JSONResponse(content={"groups": groups_out})
|
return JSONResponse(content={"groups": groups_out})
|
||||||
|
|
||||||
if mode == "filterValues":
|
if mode == "filterValues":
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize tagging."""
|
"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize/rag-index tagging."""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
@ -125,3 +125,75 @@ def _updateNeutralizeFields(
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Error updating neutralizeFields: %s", e)
|
logger.error("Error updating neutralizeFields: %s", e)
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.patch("/{sourceId}/rag-index")
|
||||||
|
@limiter.limit("30/minute")
|
||||||
|
def _updateDataSourceRagIndex(
|
||||||
|
request: Request,
|
||||||
|
sourceId: str = Path(..., description="ID of the DataSource"),
|
||||||
|
ragIndexEnabled: bool = Body(..., embed=True),
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Toggle RAG indexing for a DataSource.
|
||||||
|
|
||||||
|
true: sets flag + enqueues mini-bootstrap for this DataSource only.
|
||||||
|
false: sets flag + synchronously purges all chunks from this DataSource.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
rec = rootIf.db.getRecord(DataSource, sourceId)
|
||||||
|
if not rec:
|
||||||
|
raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
|
||||||
|
|
||||||
|
rootIf.db.recordModify(DataSource, sourceId, {"ragIndexEnabled": ragIndexEnabled})
|
||||||
|
logger.info("Updated ragIndexEnabled=%s for DataSource %s", ragIndexEnabled, sourceId)
|
||||||
|
|
||||||
|
if ragIndexEnabled:
|
||||||
|
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
connectionId = rec.get("connectionId") or rec.get("connection_id") or ""
|
||||||
|
conn = rootIf.getUserConnectionById(connectionId) if connectionId else None
|
||||||
|
authority = ""
|
||||||
|
if conn:
|
||||||
|
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
|
||||||
|
|
||||||
|
async def _enqueue():
|
||||||
|
await startJob(
|
||||||
|
"connection.bootstrap",
|
||||||
|
{"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": [sourceId]},
|
||||||
|
triggeredBy=str(context.user.id),
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
if loop.is_running():
|
||||||
|
loop.create_task(_enqueue())
|
||||||
|
else:
|
||||||
|
loop.run_until_complete(_enqueue())
|
||||||
|
except RuntimeError:
|
||||||
|
asyncio.run(_enqueue())
|
||||||
|
else:
|
||||||
|
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||||
|
purgeResult = getKnowledgeInterface(None).deleteFileContentIndexByDataSource(sourceId)
|
||||||
|
logger.info("Purged %d index rows / %d chunks for DataSource %s",
|
||||||
|
purgeResult.get("indexRows", 0), purgeResult.get("chunks", 0), sourceId)
|
||||||
|
|
||||||
|
import json
|
||||||
|
from modules.shared.auditLogger import audit_logger
|
||||||
|
from modules.datamodels.datamodelAudit import AuditCategory
|
||||||
|
audit_logger.logEvent(
|
||||||
|
userId=str(context.user.id),
|
||||||
|
mandateId=context.mandateId,
|
||||||
|
category=AuditCategory.PERMISSION.value,
|
||||||
|
action="rag_index_toggled",
|
||||||
|
details=json.dumps({"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled}),
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled, "updated": True}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error updating datasource ragIndexEnabled: %s", e)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
|
||||||
|
|
@ -825,45 +825,106 @@ def build_group_summary_groups(
|
||||||
items: List[Dict[str, Any]],
|
items: List[Dict[str, Any]],
|
||||||
field: str,
|
field: str,
|
||||||
null_label: str = "—",
|
null_label: str = "—",
|
||||||
|
groupByLevels: List[Dict[str, Any]] | None = None,
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Build {"value", "label", "totalCount"} for mode=groupSummary (single grouping level).
|
Build {"value", "label", "totalCount"} summaries for mode=groupSummary.
|
||||||
|
|
||||||
|
When *groupByLevels* contains more than one level the function produces one
|
||||||
|
entry per unique combination of all level values (flat permutations).
|
||||||
|
``value`` becomes a ``///``-joined composite key and ``label`` the ``/``-joined
|
||||||
|
human-readable label so the frontend can split them back.
|
||||||
"""
|
"""
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
counts: Dict[str, int] = defaultdict(int)
|
fields: list[dict] = []
|
||||||
display_by_key: Dict[str, str] = {}
|
if groupByLevels and len(groupByLevels) > 1:
|
||||||
null_key = "\x00NULL"
|
for lvl in groupByLevels:
|
||||||
label_attr = f"{field}Label"
|
f = lvl.get("field", "")
|
||||||
|
nl = str(lvl.get("nullLabel") or null_label)
|
||||||
|
if f:
|
||||||
|
fields.append({"field": f, "nullLabel": nl})
|
||||||
|
if not fields:
|
||||||
|
fields = [{"field": field, "nullLabel": null_label}]
|
||||||
|
|
||||||
|
nullKey = "\x00NULL"
|
||||||
|
|
||||||
|
if len(fields) == 1:
|
||||||
|
f = fields[0]["field"]
|
||||||
|
nl = fields[0]["nullLabel"]
|
||||||
|
counts: Dict[str, int] = defaultdict(int)
|
||||||
|
displayByKey: Dict[str, str] = {}
|
||||||
|
labelAttr = f"{f}Label"
|
||||||
|
for item in items:
|
||||||
|
raw = item.get(f)
|
||||||
|
if raw is None or raw == "":
|
||||||
|
nk = nullKey
|
||||||
|
display = nl
|
||||||
|
else:
|
||||||
|
nk = str(raw)
|
||||||
|
display = None
|
||||||
|
lbl = item.get(labelAttr)
|
||||||
|
if lbl is not None and lbl != "":
|
||||||
|
display = str(lbl)
|
||||||
|
if display is None:
|
||||||
|
display = nk
|
||||||
|
counts[nk] += 1
|
||||||
|
if nk not in displayByKey:
|
||||||
|
displayByKey[nk] = display
|
||||||
|
orderedKeys = sorted(
|
||||||
|
counts.keys(),
|
||||||
|
key=lambda x: (x == nullKey, str(displayByKey.get(x, x)).lower()),
|
||||||
|
)
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"value": None if nk == nullKey else nk,
|
||||||
|
"label": displayByKey.get(nk, nk),
|
||||||
|
"totalCount": counts[nk],
|
||||||
|
}
|
||||||
|
for nk in orderedKeys
|
||||||
|
]
|
||||||
|
|
||||||
|
counts = defaultdict(int)
|
||||||
|
displayByComposite: Dict[str, list] = {}
|
||||||
|
filtersByComposite: Dict[str, dict] = {}
|
||||||
for item in items:
|
for item in items:
|
||||||
raw = item.get(field)
|
parts: list[str] = []
|
||||||
if raw is None or raw == "":
|
labels: list[str] = []
|
||||||
nk = null_key
|
filterMap: dict = {}
|
||||||
display = null_label
|
for fd in fields:
|
||||||
else:
|
f = fd["field"]
|
||||||
nk = str(raw)
|
nl = fd["nullLabel"]
|
||||||
display = None
|
labelAttr = f"{f}Label"
|
||||||
lbl = item.get(label_attr)
|
raw = item.get(f)
|
||||||
if lbl is not None and lbl != "":
|
if raw is None or raw == "":
|
||||||
display = str(lbl)
|
parts.append(nullKey)
|
||||||
if display is None:
|
labels.append(nl)
|
||||||
display = nk
|
filterMap[f] = None
|
||||||
counts[nk] += 1
|
else:
|
||||||
if nk not in display_by_key:
|
parts.append(str(raw))
|
||||||
display_by_key[nk] = display
|
lbl = item.get(labelAttr)
|
||||||
|
labels.append(str(lbl) if lbl not in (None, "") else str(raw))
|
||||||
|
filterMap[f] = str(raw)
|
||||||
|
compositeKey = "///".join(parts)
|
||||||
|
counts[compositeKey] += 1
|
||||||
|
if compositeKey not in displayByComposite:
|
||||||
|
displayByComposite[compositeKey] = labels
|
||||||
|
filtersByComposite[compositeKey] = filterMap
|
||||||
|
|
||||||
ordered_keys = sorted(
|
orderedKeys = sorted(
|
||||||
counts.keys(),
|
counts.keys(),
|
||||||
key=lambda x: (x == null_key, str(display_by_key.get(x, x)).lower()),
|
key=lambda x: tuple(
|
||||||
|
(seg == nullKey, seg.lower()) for seg in x.split("///")
|
||||||
|
),
|
||||||
)
|
)
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
"value": None if nk == null_key else nk,
|
"value": ck.replace(nullKey, "__null__") if nullKey in ck else ck,
|
||||||
"label": display_by_key.get(nk, nk),
|
"label": " / ".join(displayByComposite[ck]),
|
||||||
"totalCount": counts[nk],
|
"totalCount": counts[ck],
|
||||||
|
"filters": filtersByComposite[ck],
|
||||||
}
|
}
|
||||||
for nk in ordered_keys
|
for ck in orderedKeys
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
277
modules/routes/routeRagInventory.py
Normal file
277
modules/routes/routeRagInventory.py
Normal file
|
|
@ -0,0 +1,277 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""RAG Inventory API — global knowledge-store visibility for users, admins, platform."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from fastapi import APIRouter, HTTPException, Depends, Request
|
||||||
|
from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext
|
||||||
|
from modules.datamodels.datamodelUam import User
|
||||||
|
from modules.shared.i18nRegistry import apiRouteContext
|
||||||
|
|
||||||
|
routeApiMsg = apiRouteContext("routeRagInventory")
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/api/rag/inventory",
|
||||||
|
tags=["RAG Inventory"],
|
||||||
|
responses={
|
||||||
|
401: {"description": "Unauthorized"},
|
||||||
|
403: {"description": "Forbidden"},
|
||||||
|
500: {"description": "Internal server error"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> List[Dict[str, Any]]:
|
||||||
|
from modules.datamodels.datamodelDataSource import DataSource
|
||||||
|
from modules.datamodels.datamodelKnowledge import FileContentIndex
|
||||||
|
|
||||||
|
out = []
|
||||||
|
for conn in connections:
|
||||||
|
connectionId = str(conn.id)
|
||||||
|
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||||
|
|
||||||
|
connIndexRows = knowledgeIf.db.getRecordset(FileContentIndex, recordFilter={"connectionId": connectionId})
|
||||||
|
connChunkTotal = len(connIndexRows)
|
||||||
|
|
||||||
|
chunksByDs: Dict[str, int] = {}
|
||||||
|
unassigned = 0
|
||||||
|
for idx in connIndexRows:
|
||||||
|
prov = (idx.get("provenance") if isinstance(idx, dict) else getattr(idx, "provenance", None)) or {}
|
||||||
|
dsIdRef = prov.get("dataSourceId", "") if isinstance(prov, dict) else ""
|
||||||
|
if dsIdRef:
|
||||||
|
chunksByDs[dsIdRef] = chunksByDs.get(dsIdRef, 0) + 1
|
||||||
|
else:
|
||||||
|
unassigned += 1
|
||||||
|
|
||||||
|
dsItems = []
|
||||||
|
for ds in dataSources:
|
||||||
|
dsId = ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")
|
||||||
|
dsItems.append({
|
||||||
|
"id": dsId,
|
||||||
|
"label": ds.get("label") if isinstance(ds, dict) else getattr(ds, "label", ""),
|
||||||
|
"path": ds.get("path") if isinstance(ds, dict) else getattr(ds, "path", ""),
|
||||||
|
"sourceType": ds.get("sourceType") if isinstance(ds, dict) else getattr(ds, "sourceType", ""),
|
||||||
|
"ragIndexEnabled": ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False),
|
||||||
|
"neutralize": ds.get("neutralize") if isinstance(ds, dict) else getattr(ds, "neutralize", False),
|
||||||
|
"lastIndexed": ds.get("lastIndexed") if isinstance(ds, dict) else getattr(ds, "lastIndexed", None),
|
||||||
|
"chunkCount": chunksByDs.get(dsId, 0),
|
||||||
|
})
|
||||||
|
|
||||||
|
if unassigned > 0 and len(dsItems) == 1:
|
||||||
|
dsItems[0]["chunkCount"] += unassigned
|
||||||
|
|
||||||
|
jobs = jobService.listJobs(jobType="connection.bootstrap", limit=5)
|
||||||
|
connJobs = [j for j in jobs if (j.get("payload") or {}).get("connectionId") == connectionId]
|
||||||
|
runningJobs = [
|
||||||
|
{"jobId": j["id"], "progress": j.get("progress", 0), "progressMessage": j.get("progressMessage", "")}
|
||||||
|
for j in connJobs
|
||||||
|
if j.get("status") in ("PENDING", "RUNNING")
|
||||||
|
]
|
||||||
|
lastError = None
|
||||||
|
for j in connJobs:
|
||||||
|
if j.get("status") == "ERROR":
|
||||||
|
lastError = {"jobId": j["id"], "errorMessage": j.get("errorMessage", "")}
|
||||||
|
break
|
||||||
|
|
||||||
|
out.append({
|
||||||
|
"id": connectionId,
|
||||||
|
"authority": conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority),
|
||||||
|
"externalEmail": getattr(conn, "externalEmail", ""),
|
||||||
|
"knowledgeIngestionEnabled": getattr(conn, "knowledgeIngestionEnabled", False),
|
||||||
|
"preferences": getattr(conn, "knowledgePreferences", None) or {},
|
||||||
|
"dataSources": dsItems,
|
||||||
|
"totalChunks": connChunkTotal,
|
||||||
|
"runningJobs": runningJobs,
|
||||||
|
"lastError": lastError,
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/me")
|
||||||
|
@limiter.limit("30/minute")
|
||||||
|
def _getInventoryMe(
|
||||||
|
request: Request,
|
||||||
|
currentUser: User = Depends(getCurrentUser),
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Personal RAG inventory: own connections + DataSources + chunk counts."""
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||||
|
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
|
||||||
|
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
knowledgeIf = getKnowledgeInterface(None)
|
||||||
|
connections = rootIf.getUserConnections(currentUser.id)
|
||||||
|
|
||||||
|
items = _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService)
|
||||||
|
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
||||||
|
|
||||||
|
return {"connections": items, "totals": {"chunks": totalChunks}}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error in RAG inventory /me: %s", e, exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/mandate")
|
||||||
|
@limiter.limit("20/minute")
|
||||||
|
def _getInventoryMandate(
|
||||||
|
request: Request,
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Mandate-level RAG aggregation (requires mandate membership)."""
|
||||||
|
if not context.mandateId:
|
||||||
|
raise HTTPException(status_code=403, detail=routeApiMsg("Mandate context required"))
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface, aggregateMandateRagTotalBytes
|
||||||
|
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
|
||||||
|
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
knowledgeIf = getKnowledgeInterface(None)
|
||||||
|
mandateId = str(context.mandateId) if context.mandateId else ""
|
||||||
|
|
||||||
|
from modules.datamodels.datamodelUam import UserConnection
|
||||||
|
allConnections = rootIf.db.getRecordset(UserConnection, recordFilter={"mandateId": mandateId})
|
||||||
|
connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
|
||||||
|
|
||||||
|
items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
|
||||||
|
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
||||||
|
totalBytes = aggregateMandateRagTotalBytes(mandateId)
|
||||||
|
|
||||||
|
return {"connections": items, "totals": {"chunks": totalChunks, "bytes": totalBytes}}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error in RAG inventory /mandate: %s", e, exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/platform")
|
||||||
|
@limiter.limit("10/minute")
|
||||||
|
def _getInventoryPlatform(
|
||||||
|
request: Request,
|
||||||
|
context: RequestContext = Depends(getRequestContext),
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Platform-wide RAG statistics (sysadmin only)."""
|
||||||
|
if not context.isSysAdmin:
|
||||||
|
raise HTTPException(status_code=403, detail=routeApiMsg("Platform admin required"))
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||||
|
from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
|
||||||
|
from modules.datamodels.datamodelUam import UserConnection
|
||||||
|
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
knowledgeIf = getKnowledgeInterface(None)
|
||||||
|
allConnections = rootIf.db.getRecordset(UserConnection)
|
||||||
|
connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
|
||||||
|
|
||||||
|
items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
|
||||||
|
totalChunks = sum(c.get("totalChunks", 0) for c in items)
|
||||||
|
|
||||||
|
return {"connections": items, "totals": {"chunks": totalChunks}}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error in RAG inventory /platform: %s", e, exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/reindex/{connectionId}")
|
||||||
|
@limiter.limit("10/minute")
|
||||||
|
def _reindexConnection(
|
||||||
|
request: Request,
|
||||||
|
connectionId: str,
|
||||||
|
currentUser: User = Depends(getCurrentUser),
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Re-trigger bootstrap for a connection (re-index all ragIndexEnabled DataSources).
|
||||||
|
|
||||||
|
Submits a new connection.bootstrap job, regardless of previous failures.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.serviceCenter.services.serviceBackgroundJobs import startJob
|
||||||
|
from modules.datamodels.datamodelDataSource import DataSource
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
conn = rootIf.getUserConnectionById(connectionId)
|
||||||
|
if conn is None:
|
||||||
|
raise HTTPException(status_code=404, detail="Connection not found")
|
||||||
|
|
||||||
|
if str(conn.userId) != str(currentUser.id):
|
||||||
|
raise HTTPException(status_code=403, detail="Not your connection")
|
||||||
|
|
||||||
|
dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||||
|
ragDs = [ds for ds in dataSources if (ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False))]
|
||||||
|
if not ragDs:
|
||||||
|
return {"status": "skipped", "reason": "no_rag_enabled_datasources"}
|
||||||
|
|
||||||
|
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
|
||||||
|
dsIds = [(ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")) for ds in ragDs]
|
||||||
|
|
||||||
|
async def _enqueue():
|
||||||
|
return await startJob(
|
||||||
|
"connection.bootstrap",
|
||||||
|
{"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": dsIds},
|
||||||
|
triggeredBy=str(currentUser.id),
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
if loop.is_running():
|
||||||
|
future = asyncio.ensure_future(_enqueue())
|
||||||
|
jobId = None
|
||||||
|
else:
|
||||||
|
jobId = loop.run_until_complete(_enqueue())
|
||||||
|
except RuntimeError:
|
||||||
|
jobId = asyncio.run(_enqueue())
|
||||||
|
|
||||||
|
logger.info("Reindex triggered for connection %s (%d DataSources)", connectionId, len(dsIds))
|
||||||
|
return {"status": "queued", "connectionId": connectionId, "dataSourceCount": len(dsIds), "jobId": jobId}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error triggering reindex: %s", e, exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/jobs")
|
||||||
|
@limiter.limit("60/minute")
|
||||||
|
def _getActiveJobs(
|
||||||
|
request: Request,
|
||||||
|
currentUser: User = Depends(getCurrentUser),
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Active RAG jobs for the current user (used by header badge)."""
|
||||||
|
try:
|
||||||
|
from modules.serviceCenter.services.serviceBackgroundJobs import listJobs
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
connections = rootIf.getUserConnections(currentUser.id)
|
||||||
|
connectionMap = {str(c.id): c for c in connections}
|
||||||
|
connectionIds = set(connectionMap.keys())
|
||||||
|
|
||||||
|
jobs = listJobs(jobType="connection.bootstrap", limit=50)
|
||||||
|
active = []
|
||||||
|
for j in jobs:
|
||||||
|
if j.get("status") not in ("PENDING", "RUNNING"):
|
||||||
|
continue
|
||||||
|
payload = j.get("payload") or {}
|
||||||
|
connId = payload.get("connectionId")
|
||||||
|
if connId in connectionIds:
|
||||||
|
conn = connectionMap[connId]
|
||||||
|
active.append({
|
||||||
|
"jobId": j["id"],
|
||||||
|
"connectionId": connId,
|
||||||
|
"connectionLabel": getattr(conn, "displayLabel", None) or getattr(conn, "authority", connId),
|
||||||
|
"jobType": j.get("jobType", "connection.bootstrap"),
|
||||||
|
"progress": j.get("progress", 0),
|
||||||
|
"progressMessage": j.get("progressMessage", ""),
|
||||||
|
})
|
||||||
|
return active
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error in RAG inventory /jobs: %s", e, exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
@ -155,12 +155,13 @@ async def sttStream(
|
||||||
|
|
||||||
Protocol:
|
Protocol:
|
||||||
Client sends JSON:
|
Client sends JSON:
|
||||||
{"type": "open", "language": "de-DE"}
|
{"type": "open", "language": "de-DE", "model": "latest_short", "lightweight": true, "singleUtterance": true}
|
||||||
{"type": "audio", "chunk": "<base64>"}
|
{"type": "audio", "chunk": "<base64>"}
|
||||||
{"type": "close"}
|
{"type": "close"}
|
||||||
Server sends JSON:
|
Server sends JSON:
|
||||||
{"type": "interim", "text": "..."}
|
{"type": "interim", "text": "..."}
|
||||||
{"type": "final", "text": "...", "confidence": 0.95}
|
{"type": "final", "text": "...", "confidence": 0.95}
|
||||||
|
{"type": "end_of_single_utterance", "audioDurationSec": 0.0}
|
||||||
{"type": "error", "message": "..."}
|
{"type": "error", "message": "..."}
|
||||||
{"type": "closed"}
|
{"type": "closed"}
|
||||||
"""
|
"""
|
||||||
|
|
@ -205,7 +206,12 @@ async def sttStream(
|
||||||
logger.warning(f"STT billing pre-flight skipped: {e}")
|
logger.warning(f"STT billing pre-flight skipped: {e}")
|
||||||
|
|
||||||
audioQueue: asyncio.Queue = asyncio.Queue()
|
audioQueue: asyncio.Queue = asyncio.Queue()
|
||||||
language = "de-DE"
|
sttOpenOptions: Dict[str, Any] = {
|
||||||
|
"language": "de-DE",
|
||||||
|
"model": "latest_long",
|
||||||
|
"lightweight": False,
|
||||||
|
"singleUtterance": False,
|
||||||
|
}
|
||||||
streamingTask: Optional[asyncio.Task] = None
|
streamingTask: Optional[asyncio.Task] = None
|
||||||
voiceInterface: Optional[VoiceObjects] = None
|
voiceInterface: Optional[VoiceObjects] = None
|
||||||
|
|
||||||
|
|
@ -233,10 +239,23 @@ async def sttStream(
|
||||||
voiceInterface.billingCallback = _billingCb
|
voiceInterface.billingCallback = _billingCb
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async for event in voiceInterface.streamingSpeechToText(audioQueue, language):
|
async for event in voiceInterface.streamingSpeechToText(
|
||||||
|
audioQueue,
|
||||||
|
sttOpenOptions["language"],
|
||||||
|
phraseHints=None,
|
||||||
|
model=sttOpenOptions["model"],
|
||||||
|
lightweight=sttOpenOptions["lightweight"],
|
||||||
|
singleUtterance=sttOpenOptions["singleUtterance"],
|
||||||
|
):
|
||||||
if event.get("reconnectRequired"):
|
if event.get("reconnectRequired"):
|
||||||
await _sendJson({"type": "reconnect_required"})
|
await _sendJson({"type": "reconnect_required"})
|
||||||
return
|
return
|
||||||
|
if event.get("endOfSingleUtterance"):
|
||||||
|
await _sendJson({
|
||||||
|
"type": "end_of_single_utterance",
|
||||||
|
"audioDurationSec": event.get("audioDurationSec", 0.0),
|
||||||
|
})
|
||||||
|
continue
|
||||||
if event.get("isFinal"):
|
if event.get("isFinal"):
|
||||||
if event.get("transcript"):
|
if event.get("transcript"):
|
||||||
await _sendJson({"type": "final", "text": event["transcript"], "confidence": event.get("confidence", 0.0)})
|
await _sendJson({"type": "final", "text": event["transcript"], "confidence": event.get("confidence", 0.0)})
|
||||||
|
|
@ -258,7 +277,10 @@ async def sttStream(
|
||||||
msgType = (msg.get("type") or "").strip()
|
msgType = (msg.get("type") or "").strip()
|
||||||
|
|
||||||
if msgType == "open":
|
if msgType == "open":
|
||||||
language = msg.get("language") or "de-DE"
|
sttOpenOptions["language"] = msg.get("language") or "de-DE"
|
||||||
|
sttOpenOptions["model"] = msg.get("model") or "latest_long"
|
||||||
|
sttOpenOptions["lightweight"] = bool(msg.get("lightweight"))
|
||||||
|
sttOpenOptions["singleUtterance"] = bool(msg.get("singleUtterance"))
|
||||||
if streamingTask and not streamingTask.done():
|
if streamingTask and not streamingTask.done():
|
||||||
await audioQueue.put((b"", True))
|
await audioQueue.put((b"", True))
|
||||||
streamingTask.cancel()
|
streamingTask.cancel()
|
||||||
|
|
|
||||||
|
|
@ -335,9 +335,14 @@ async def runAgentLoop(
|
||||||
|
|
||||||
# Execute tool calls
|
# Execute tool calls
|
||||||
for tc in toolCalls:
|
for tc in toolCalls:
|
||||||
|
toolDef = toolRegistry.getTool(tc.name)
|
||||||
yield AgentEvent(
|
yield AgentEvent(
|
||||||
type=AgentEventTypeEnum.TOOL_CALL,
|
type=AgentEventTypeEnum.TOOL_CALL,
|
||||||
data={"toolName": tc.name, "args": tc.args}
|
data={
|
||||||
|
"toolName": tc.name,
|
||||||
|
"displayLabel": toolDef.displayLabel if toolDef else None,
|
||||||
|
"args": tc.args,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
results = await _executeToolCalls(toolCalls, toolRegistry, {
|
results = await _executeToolCalls(toolCalls, toolRegistry, {
|
||||||
|
|
|
||||||
|
|
@ -184,4 +184,5 @@ def _registerConnectionTools(registry: ToolRegistry, services):
|
||||||
"required": ["connectionId", "to", "subject", "body"],
|
"required": ["connectionId", "to", "subject", "body"],
|
||||||
},
|
},
|
||||||
readOnly=False,
|
readOnly=False,
|
||||||
|
displayLabel="composing an email",
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -297,6 +297,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
readOnly=False,
|
readOnly=False,
|
||||||
|
displayLabel="creating a document",
|
||||||
)
|
)
|
||||||
|
|
||||||
# ── textToSpeech tool ──────────────────────────────────────────────
|
# ── textToSpeech tool ──────────────────────────────────────────────
|
||||||
|
|
@ -573,6 +574,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
"required": ["prompt"],
|
"required": ["prompt"],
|
||||||
},
|
},
|
||||||
readOnly=False,
|
readOnly=False,
|
||||||
|
displayLabel="generating an image",
|
||||||
)
|
)
|
||||||
|
|
||||||
# ── createChart tool ─────────────────────────────────────────────────
|
# ── createChart tool ─────────────────────────────────────────────────
|
||||||
|
|
@ -770,6 +772,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
"required": ["datasets"],
|
"required": ["datasets"],
|
||||||
},
|
},
|
||||||
readOnly=False,
|
readOnly=False,
|
||||||
|
displayLabel="creating a chart",
|
||||||
)
|
)
|
||||||
|
|
||||||
# ── Phase 3: speechToText, detectLanguage, neutralizeData, executeCode ──
|
# ── Phase 3: speechToText, detectLanguage, neutralizeData, executeCode ──
|
||||||
|
|
@ -917,5 +920,6 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
},
|
},
|
||||||
"required": ["code"]
|
"required": ["code"]
|
||||||
},
|
},
|
||||||
readOnly=True
|
readOnly=True,
|
||||||
|
displayLabel="running calculations",
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -310,11 +310,15 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
||||||
return ToolResult(toolCallId="", toolName="writeFile", success=False, error="name is required for mode=create")
|
return ToolResult(toolCallId="", toolName="writeFile", success=False, error="name is required for mode=create")
|
||||||
fileItem, _ = dbMgmt.saveUploadedFile(content.encode("utf-8"), name)
|
fileItem, _ = dbMgmt.saveUploadedFile(content.encode("utf-8"), name)
|
||||||
fiId = context.get("featureInstanceId") or (services.featureInstanceId if services else "")
|
fiId = context.get("featureInstanceId") or (services.featureInstanceId if services else "")
|
||||||
|
updateFields: Dict[str, Any] = {}
|
||||||
if fiId:
|
if fiId:
|
||||||
dbMgmt.updateFile(fileItem.id, {"featureInstanceId": fiId})
|
updateFields["featureInstanceId"] = fiId
|
||||||
# File group tree removed — groupId arg and instance-group assignment no longer apply
|
if args.get("folderId"):
|
||||||
|
updateFields["folderId"] = args["folderId"]
|
||||||
if args.get("tags"):
|
if args.get("tags"):
|
||||||
dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
|
updateFields["tags"] = args["tags"]
|
||||||
|
if updateFields:
|
||||||
|
dbMgmt.updateFile(fileItem.id, updateFields)
|
||||||
|
|
||||||
chatDocId = _attachFileAsChatDocument(
|
chatDocId = _attachFileAsChatDocument(
|
||||||
services, fileItem,
|
services, fileItem,
|
||||||
|
|
@ -359,7 +363,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
||||||
},
|
},
|
||||||
"required": ["fileId"]
|
"required": ["fileId"]
|
||||||
},
|
},
|
||||||
readOnly=True
|
readOnly=True,
|
||||||
|
displayLabel="reviewing a document",
|
||||||
)
|
)
|
||||||
|
|
||||||
registry.register(
|
registry.register(
|
||||||
|
|
@ -406,7 +411,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
||||||
"properties": {"query": {"type": "string", "description": "Search query"}},
|
"properties": {"query": {"type": "string", "description": "Search query"}},
|
||||||
"required": ["query"]
|
"required": ["query"]
|
||||||
},
|
},
|
||||||
readOnly=True
|
readOnly=True,
|
||||||
|
displayLabel="researching on the web",
|
||||||
)
|
)
|
||||||
|
|
||||||
registry.register(
|
registry.register(
|
||||||
|
|
@ -427,7 +433,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
||||||
"writeFile", _writeFile,
|
"writeFile", _writeFile,
|
||||||
description=(
|
description=(
|
||||||
"Create, append, or overwrite a file. Modes:\n"
|
"Create, append, or overwrite a file. Modes:\n"
|
||||||
"- create (default): create a new file (name required).\n"
|
"- create (default): create a new file (name required). Use folderId to place it in a specific folder.\n"
|
||||||
"- append: append content to an existing file (fileId required). "
|
"- append: append content to an existing file (fileId required). "
|
||||||
"Use for large content that exceeds a single tool call (~8000 chars per call).\n"
|
"Use for large content that exceeds a single tool call (~8000 chars per call).\n"
|
||||||
"- overwrite: replace entire file content (fileId required).\n"
|
"- overwrite: replace entire file content (fileId required).\n"
|
||||||
|
|
@ -443,7 +449,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
||||||
"content": {"type": "string", "description": "Content to write/append"},
|
"content": {"type": "string", "description": "Content to write/append"},
|
||||||
"mode": {"type": "string", "enum": ["create", "append", "overwrite"], "description": "Write mode (default: create)"},
|
"mode": {"type": "string", "enum": ["create", "append", "overwrite"], "description": "Write mode (default: create)"},
|
||||||
"fileId": {"type": "string", "description": "File ID (required for mode=append/overwrite)"},
|
"fileId": {"type": "string", "description": "File ID (required for mode=append/overwrite)"},
|
||||||
"groupId": {"type": "string", "description": "Group ID to place the file in (mode=create only). Omit to use the instance default group."},
|
"folderId": {"type": "string", "description": "Folder ID to place the file in (mode=create only). Use listFolders to find IDs. Omit for root."},
|
||||||
"tags": {"type": "array", "items": {"type": "string"}, "description": "Tags (mode=create only)"},
|
"tags": {"type": "array", "items": {"type": "string"}, "description": "Tags (mode=create only)"},
|
||||||
},
|
},
|
||||||
"required": ["content"]
|
"required": ["content"]
|
||||||
|
|
@ -581,7 +587,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
||||||
},
|
},
|
||||||
"required": ["url"]
|
"required": ["url"]
|
||||||
},
|
},
|
||||||
readOnly=True
|
readOnly=True,
|
||||||
|
displayLabel="reading a webpage",
|
||||||
)
|
)
|
||||||
|
|
||||||
registry.register(
|
registry.register(
|
||||||
|
|
@ -701,7 +708,147 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
||||||
readOnly=False
|
readOnly=False
|
||||||
)
|
)
|
||||||
|
|
||||||
# Group tree tools removed — file grouping now uses view-based display grouping (TableListView)
|
# ---- Folder management tools ----
|
||||||
|
|
||||||
|
async def _createFolder(args: Dict[str, Any], context: Dict[str, Any]):
|
||||||
|
name = args.get("name", "")
|
||||||
|
parentId = args.get("parentId") or None
|
||||||
|
if not name:
|
||||||
|
return ToolResult(toolCallId="", toolName="createFolder", success=False, error="name is required")
|
||||||
|
try:
|
||||||
|
chatService = services.chat
|
||||||
|
dbMgmt = chatService.interfaceDbComponent
|
||||||
|
folder = dbMgmt.createFolder(name, parentId=parentId)
|
||||||
|
folderId = folder.get("id") if isinstance(folder, dict) else getattr(folder, "id", None)
|
||||||
|
folderName = folder.get("name") if isinstance(folder, dict) else getattr(folder, "name", name)
|
||||||
|
return ToolResult(
|
||||||
|
toolCallId="", toolName="createFolder", success=True,
|
||||||
|
data=f"Folder '{folderName}' created (id: {folderId})" + (f" inside parent {parentId}" if parentId else ""),
|
||||||
|
sideEvents=[{"type": "folderCreated", "data": {"folderId": folderId, "folderName": folderName, "parentId": parentId}}],
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
return ToolResult(toolCallId="", toolName="createFolder", success=False, error=str(e))
|
||||||
|
|
||||||
|
async def _listFolders(args: Dict[str, Any], context: Dict[str, Any]):
|
||||||
|
try:
|
||||||
|
chatService = services.chat
|
||||||
|
dbMgmt = chatService.interfaceDbComponent
|
||||||
|
folders = dbMgmt.getOwnFolderTree()
|
||||||
|
if not folders:
|
||||||
|
return ToolResult(toolCallId="", toolName="listFolders", success=True, data="No folders found.")
|
||||||
|
lines = []
|
||||||
|
folderMap: Dict[Optional[str], List] = {}
|
||||||
|
for f in folders:
|
||||||
|
pid = f.get("parentId") if isinstance(f, dict) else getattr(f, "parentId", None)
|
||||||
|
folderMap.setdefault(pid, []).append(f)
|
||||||
|
|
||||||
|
def _walk(parentId: Optional[str], indent: int):
|
||||||
|
for f in sorted(folderMap.get(parentId, []), key=lambda x: (x.get("name") if isinstance(x, dict) else getattr(x, "name", "")).lower()):
|
||||||
|
fId = f.get("id") if isinstance(f, dict) else getattr(f, "id", "")
|
||||||
|
fName = f.get("name") if isinstance(f, dict) else getattr(f, "name", "")
|
||||||
|
prefix = " " * indent
|
||||||
|
lines.append(f"{prefix}- {fName} (id: {fId})")
|
||||||
|
_walk(fId, indent + 1)
|
||||||
|
|
||||||
|
_walk(None, 0)
|
||||||
|
return ToolResult(toolCallId="", toolName="listFolders", success=True, data="\n".join(lines))
|
||||||
|
except Exception as e:
|
||||||
|
return ToolResult(toolCallId="", toolName="listFolders", success=False, error=str(e))
|
||||||
|
|
||||||
|
async def _moveFile(args: Dict[str, Any], context: Dict[str, Any]):
|
||||||
|
fileId = args.get("fileId", "")
|
||||||
|
folderId = args.get("folderId")
|
||||||
|
if not fileId:
|
||||||
|
return ToolResult(toolCallId="", toolName="moveFile", success=False, error="fileId is required")
|
||||||
|
try:
|
||||||
|
chatService = services.chat
|
||||||
|
dbMgmt = chatService.interfaceDbComponent
|
||||||
|
file = dbMgmt.getFile(fileId)
|
||||||
|
if not file:
|
||||||
|
return ToolResult(toolCallId="", toolName="moveFile", success=False, error=f"File {fileId} not found")
|
||||||
|
dbMgmt.updateFile(fileId, {"folderId": folderId or None})
|
||||||
|
targetLabel = f"folder {folderId}" if folderId else "root"
|
||||||
|
return ToolResult(
|
||||||
|
toolCallId="", toolName="moveFile", success=True,
|
||||||
|
data=f"File '{file.fileName}' (id: {fileId}) moved to {targetLabel}",
|
||||||
|
sideEvents=[{"type": "fileUpdated", "data": {"fileId": fileId, "fileName": file.fileName}}],
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
return ToolResult(toolCallId="", toolName="moveFile", success=False, error=str(e))
|
||||||
|
|
||||||
|
registry.register(
|
||||||
|
"createFolder", _createFolder,
|
||||||
|
description=(
|
||||||
|
"Create a new folder in the workspace file tree. "
|
||||||
|
"Use parentId to create nested folders. Returns the new folder ID."
|
||||||
|
),
|
||||||
|
parameters={
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {"type": "string", "description": "Folder name"},
|
||||||
|
"parentId": {"type": "string", "description": "Parent folder ID for nesting. Omit to create at root level."},
|
||||||
|
},
|
||||||
|
"required": ["name"]
|
||||||
|
},
|
||||||
|
readOnly=False
|
||||||
|
)
|
||||||
|
|
||||||
|
registry.register(
|
||||||
|
"listFolders", _listFolders,
|
||||||
|
description=(
|
||||||
|
"List all folders in the workspace as an indented tree. "
|
||||||
|
"Use to find folder IDs for createFolder (parentId), writeFile (folderId), or moveFile."
|
||||||
|
),
|
||||||
|
parameters={"type": "object", "properties": {}},
|
||||||
|
readOnly=True
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _renameFolder(args: Dict[str, Any], context: Dict[str, Any]):
|
||||||
|
folderId = args.get("folderId", "")
|
||||||
|
newName = args.get("newName", "")
|
||||||
|
if not folderId or not newName:
|
||||||
|
return ToolResult(toolCallId="", toolName="renameFolder", success=False, error="folderId and newName are required")
|
||||||
|
try:
|
||||||
|
chatService = services.chat
|
||||||
|
dbMgmt = chatService.interfaceDbComponent
|
||||||
|
folder = dbMgmt.renameFolder(folderId, newName)
|
||||||
|
return ToolResult(
|
||||||
|
toolCallId="", toolName="renameFolder", success=True,
|
||||||
|
data=f"Folder {folderId} renamed to '{newName}'",
|
||||||
|
sideEvents=[{"type": "folderUpdated", "data": {"folderId": folderId, "folderName": newName}}],
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
return ToolResult(toolCallId="", toolName="renameFolder", success=False, error=str(e))
|
||||||
|
|
||||||
|
registry.register(
|
||||||
|
"renameFolder", _renameFolder,
|
||||||
|
description="Rename an existing folder in the workspace file tree.",
|
||||||
|
parameters={
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"folderId": {"type": "string", "description": "The folder ID to rename"},
|
||||||
|
"newName": {"type": "string", "description": "New folder name"},
|
||||||
|
},
|
||||||
|
"required": ["folderId", "newName"]
|
||||||
|
},
|
||||||
|
readOnly=False
|
||||||
|
)
|
||||||
|
|
||||||
|
registry.register(
|
||||||
|
"moveFile", _moveFile,
|
||||||
|
description=(
|
||||||
|
"Move a file into a specific folder. Set folderId to null or omit to move the file back to the root level."
|
||||||
|
),
|
||||||
|
parameters={
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"fileId": {"type": "string", "description": "The file ID to move"},
|
||||||
|
"folderId": {"type": "string", "description": "Target folder ID. Omit or null to move to root."},
|
||||||
|
},
|
||||||
|
"required": ["fileId"]
|
||||||
|
},
|
||||||
|
readOnly=False
|
||||||
|
)
|
||||||
|
|
||||||
registry.register(
|
registry.register(
|
||||||
"replaceInFile", _replaceInFile,
|
"replaceInFile", _replaceInFile,
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,12 @@ class ToolDefinition(BaseModel):
|
||||||
"""Schema for a tool available to the agent."""
|
"""Schema for a tool available to the agent."""
|
||||||
name: str = Field(description="Unique tool name")
|
name: str = Field(description="Unique tool name")
|
||||||
description: str = Field(description="What this tool does")
|
description: str = Field(description="What this tool does")
|
||||||
|
displayLabel: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Short human-readable activity phrase (e.g. 'researching on the web'). "
|
||||||
|
"Used for live progress messages in meetings. English gerund phrase; "
|
||||||
|
"localised by the caller."
|
||||||
|
)
|
||||||
parameters: Dict[str, Any] = Field(
|
parameters: Dict[str, Any] = Field(
|
||||||
default_factory=dict,
|
default_factory=dict,
|
||||||
description="JSON Schema for tool parameters"
|
description="JSON Schema for tool parameters"
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ class ToolRegistry:
|
||||||
def register(self, name: str, handler: Callable[..., Awaitable[ToolResult]],
|
def register(self, name: str, handler: Callable[..., Awaitable[ToolResult]],
|
||||||
description: str = "", parameters: Dict[str, Any] = None,
|
description: str = "", parameters: Dict[str, Any] = None,
|
||||||
readOnly: bool = False, featureType: str = None,
|
readOnly: bool = False, featureType: str = None,
|
||||||
toolSet: str = None):
|
toolSet: str = None, displayLabel: str = None):
|
||||||
"""Register a tool with its handler function."""
|
"""Register a tool with its handler function."""
|
||||||
if name in self._tools:
|
if name in self._tools:
|
||||||
logger.warning(f"Tool '{name}' already registered, overwriting")
|
logger.warning(f"Tool '{name}' already registered, overwriting")
|
||||||
|
|
@ -31,6 +31,7 @@ class ToolRegistry:
|
||||||
self._tools[name] = ToolDefinition(
|
self._tools[name] = ToolDefinition(
|
||||||
name=name,
|
name=name,
|
||||||
description=description,
|
description=description,
|
||||||
|
displayLabel=displayLabel,
|
||||||
parameters=parameters or {},
|
parameters=parameters or {},
|
||||||
readOnly=readOnly,
|
readOnly=readOnly,
|
||||||
featureType=featureType,
|
featureType=featureType,
|
||||||
|
|
|
||||||
|
|
@ -567,11 +567,14 @@ mit Web-Recherche, E-Mail-Versand, Dokumenten-Erzeugung und Datenquellen-Zugriff
|
||||||
|
|
||||||
Setze "needsAgent": true und "agentReason": "<kurze Beschreibung der Aufgabe in einem Satz>"
|
Setze "needsAgent": true und "agentReason": "<kurze Beschreibung der Aufgabe in einem Satz>"
|
||||||
WENN die Aufgabe eines oder mehrere dieser Merkmale hat:
|
WENN die Aufgabe eines oder mehrere dieser Merkmale hat:
|
||||||
- Recherche im Internet noetig (z.B. "recherchier was im Internet ueber XY", "schau mal nach", "google das")
|
- Recherche im Internet oder aktuelle Informationen noetig
|
||||||
- E-Mail an Teilnehmer/Kontakte versenden
|
- Informationen beschaffen die du NICHT im Transkript oder in deinem Vorwissen hast
|
||||||
- Dokument (PDF, Word, Excel) generieren oder im SharePoint/Drive ablegen
|
- E-Mail versenden
|
||||||
- Mehrere Schritte oder Tool-Aufrufe noetig (Zusammenfassung + Versand, Recherche + Empfehlung etc.)
|
- Dokument generieren oder in einer Datenquelle ablegen
|
||||||
- Daten aus externen Quellen abrufen (Outlook-Kontakte, SharePoint-Dateien, Kalender etc.)
|
- Mehrere Schritte oder Tool-Aufrufe noetig
|
||||||
|
- Daten aus externen Quellen abrufen
|
||||||
|
|
||||||
|
Wenn du den gewuenschten Inhalt nicht selbst liefern kannst, setze needsAgent=true.
|
||||||
|
|
||||||
Wenn needsAgent=true:
|
Wenn needsAgent=true:
|
||||||
- Setze shouldRespond=false (der Agent uebernimmt; du sprichst NICHT eigenstaendig).
|
- Setze shouldRespond=false (der Agent uebernimmt; du sprichst NICHT eigenstaendig).
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,9 @@ from .mainBackgroundJobService import (
|
||||||
startJob,
|
startJob,
|
||||||
getJobStatus,
|
getJobStatus,
|
||||||
listJobs,
|
listJobs,
|
||||||
|
cancelJob,
|
||||||
|
cancelJobsByConnection,
|
||||||
|
isTerminalStatus,
|
||||||
JobProgressCallback,
|
JobProgressCallback,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -15,5 +18,8 @@ __all__ = [
|
||||||
"startJob",
|
"startJob",
|
||||||
"getJobStatus",
|
"getJobStatus",
|
||||||
"listJobs",
|
"listJobs",
|
||||||
|
"cancelJob",
|
||||||
|
"cancelJobsByConnection",
|
||||||
|
"isTerminalStatus",
|
||||||
"JobProgressCallback",
|
"JobProgressCallback",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ clear message. No silent zombies.
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Any, Awaitable, Callable, Dict, List, Optional
|
from typing import Any, Awaitable, Callable, Dict, List, Optional
|
||||||
|
|
||||||
|
|
@ -49,7 +50,46 @@ JOBS_DATABASE = APP_CONFIG.get("DB_DATABASE", "poweron_app")
|
||||||
registerDatabase(JOBS_DATABASE)
|
registerDatabase(JOBS_DATABASE)
|
||||||
|
|
||||||
|
|
||||||
JobProgressCallback = Callable[[int, Optional[str]], None]
|
_CANCEL_CHECK_INTERVAL_S = 3.0
|
||||||
|
|
||||||
|
|
||||||
|
class JobProgressCallback:
|
||||||
|
"""Callable progress reporter with cooperative cancel-check for long-running walkers."""
|
||||||
|
|
||||||
|
def __init__(self, jobId: str):
|
||||||
|
self._jobId = jobId
|
||||||
|
self._cancelledCache: Optional[bool] = None
|
||||||
|
self._lastCheckedAt: float = 0.0
|
||||||
|
|
||||||
|
def __call__(self, progress: int, message: Optional[str] = None) -> None:
|
||||||
|
try:
|
||||||
|
clamped = max(0, min(100, int(progress)))
|
||||||
|
fields: Dict[str, Any] = {"progress": clamped}
|
||||||
|
if message is not None:
|
||||||
|
fields["progressMessage"] = message[:500]
|
||||||
|
_updateJob(self._jobId, fields)
|
||||||
|
except Exception as ex:
|
||||||
|
logger.warning("Progress update failed for job %s: %s", self._jobId, ex)
|
||||||
|
|
||||||
|
def isCancelled(self) -> bool:
|
||||||
|
"""Check if this job was cancelled. Reads DB at most every 3s to limit load."""
|
||||||
|
now = time.time()
|
||||||
|
if self._cancelledCache is True:
|
||||||
|
return True
|
||||||
|
if now - self._lastCheckedAt < _CANCEL_CHECK_INTERVAL_S:
|
||||||
|
return self._cancelledCache or False
|
||||||
|
self._lastCheckedAt = now
|
||||||
|
try:
|
||||||
|
job = _loadJob(self._jobId)
|
||||||
|
if job and job.get("status") == BackgroundJobStatusEnum.CANCELLED.value:
|
||||||
|
self._cancelledCache = True
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._cancelledCache = False
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
JobHandler = Callable[[Dict[str, Any], JobProgressCallback], Awaitable[Optional[Dict[str, Any]]]]
|
JobHandler = Callable[[Dict[str, Any], JobProgressCallback], Awaitable[Optional[Dict[str, Any]]]]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -155,16 +195,7 @@ def _markError(jobId: str, errorMessage: str) -> None:
|
||||||
|
|
||||||
|
|
||||||
def _makeProgressCallback(jobId: str) -> JobProgressCallback:
|
def _makeProgressCallback(jobId: str) -> JobProgressCallback:
|
||||||
def _cb(progress: int, message: Optional[str] = None) -> None:
|
return JobProgressCallback(jobId)
|
||||||
try:
|
|
||||||
clamped = max(0, min(100, int(progress)))
|
|
||||||
fields: Dict[str, Any] = {"progress": clamped}
|
|
||||||
if message is not None:
|
|
||||||
fields["progressMessage"] = message[:500]
|
|
||||||
_updateJob(jobId, fields)
|
|
||||||
except Exception as ex:
|
|
||||||
logger.warning("Progress update failed for job %s: %s", jobId, ex)
|
|
||||||
return _cb
|
|
||||||
|
|
||||||
|
|
||||||
async def _runJob(jobId: str) -> None:
|
async def _runJob(jobId: str) -> None:
|
||||||
|
|
@ -220,12 +251,51 @@ def isTerminalStatus(status: str) -> bool:
|
||||||
return status in {s.value for s in TERMINAL_JOB_STATUSES}
|
return status in {s.value for s in TERMINAL_JOB_STATUSES}
|
||||||
|
|
||||||
|
|
||||||
|
def cancelJob(jobId: str, *, reason: str = "user_requested") -> bool:
|
||||||
|
"""Mark a job as CANCELLED. Walkers detect this via JobProgressCallback.isCancelled().
|
||||||
|
|
||||||
|
Returns False if the job is already in a terminal state or does not exist.
|
||||||
|
"""
|
||||||
|
job = _loadJob(jobId)
|
||||||
|
if not job:
|
||||||
|
return False
|
||||||
|
if isTerminalStatus(job.get("status", "")):
|
||||||
|
return False
|
||||||
|
_updateJob(jobId, {
|
||||||
|
"status": BackgroundJobStatusEnum.CANCELLED.value,
|
||||||
|
"errorMessage": f"cancelled: {reason}"[:1000],
|
||||||
|
"finishedAt": datetime.now(timezone.utc).timestamp(),
|
||||||
|
})
|
||||||
|
logger.info("BackgroundJob %s cancelled (reason=%s)", jobId, reason)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def cancelJobsByConnection(connectionId: str, *, jobType: str = "connection.bootstrap") -> int:
|
||||||
|
"""Cancel all RUNNING/PENDING jobs whose payload.connectionId matches.
|
||||||
|
|
||||||
|
Returns count of jobs marked as cancelled.
|
||||||
|
"""
|
||||||
|
db = _getDb()
|
||||||
|
rows = db.getRecordset(BackgroundJob, recordFilter={"jobType": jobType})
|
||||||
|
count = 0
|
||||||
|
for row in rows:
|
||||||
|
status = row.get("status", "")
|
||||||
|
if status not in (BackgroundJobStatusEnum.PENDING.value, BackgroundJobStatusEnum.RUNNING.value):
|
||||||
|
continue
|
||||||
|
payload = row.get("payload") or {}
|
||||||
|
if payload.get("connectionId") == connectionId:
|
||||||
|
if cancelJob(row["id"], reason=f"connection_stop:{connectionId[:8]}"):
|
||||||
|
count += 1
|
||||||
|
return count
|
||||||
|
|
||||||
|
|
||||||
def recoverInterruptedJobs() -> int:
|
def recoverInterruptedJobs() -> int:
|
||||||
"""Flip any RUNNING jobs to ERROR (called at worker boot).
|
"""Flip any RUNNING jobs to ERROR and re-queue bootstrap jobs (called at worker boot).
|
||||||
|
|
||||||
A RUNNING job in the DB after process restart means the previous worker
|
A RUNNING job in the DB after process restart means the previous worker
|
||||||
died mid-execution; the asyncio task is gone and the job will never
|
died mid-execution; the asyncio task is gone and the job will never
|
||||||
finish on its own.
|
finish on its own. For connection.bootstrap jobs, a fresh job is
|
||||||
|
automatically re-queued so the user doesn't have to manually retry.
|
||||||
"""
|
"""
|
||||||
db = _getDb()
|
db = _getDb()
|
||||||
try:
|
try:
|
||||||
|
|
@ -234,12 +304,34 @@ def recoverInterruptedJobs() -> int:
|
||||||
logger.warning("recoverInterruptedJobs: failed to scan RUNNING jobs: %s", ex)
|
logger.warning("recoverInterruptedJobs: failed to scan RUNNING jobs: %s", ex)
|
||||||
return 0
|
return 0
|
||||||
count = 0
|
count = 0
|
||||||
|
requeued = 0
|
||||||
for row in rows:
|
for row in rows:
|
||||||
try:
|
try:
|
||||||
_markError(row["id"], "Interrupted by worker restart")
|
_markError(row["id"], "Interrupted by worker restart")
|
||||||
count += 1
|
count += 1
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
logger.warning("recoverInterruptedJobs: could not mark %s as ERROR: %s", row.get("id"), ex)
|
logger.warning("recoverInterruptedJobs: could not mark %s as ERROR: %s", row.get("id"), ex)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if row.get("jobType") == "connection.bootstrap":
|
||||||
|
payload = row.get("payload") or {}
|
||||||
|
if payload.get("connectionId"):
|
||||||
|
try:
|
||||||
|
newJob = BackgroundJob(
|
||||||
|
jobType="connection.bootstrap",
|
||||||
|
payload=payload,
|
||||||
|
triggeredBy="recovery.requeue",
|
||||||
|
)
|
||||||
|
record = db.recordCreate(BackgroundJob, _serialiseDatetimes(newJob.model_dump()))
|
||||||
|
asyncio.create_task(_runJob(record["id"]))
|
||||||
|
requeued += 1
|
||||||
|
logger.info(
|
||||||
|
"recoverInterruptedJobs: re-queued bootstrap for connectionId=%s (new jobId=%s)",
|
||||||
|
payload["connectionId"], record["id"],
|
||||||
|
)
|
||||||
|
except Exception as reqEx:
|
||||||
|
logger.warning("recoverInterruptedJobs: re-queue failed for %s: %s", row.get("id"), reqEx)
|
||||||
|
|
||||||
if count:
|
if count:
|
||||||
logger.warning("Recovered %d interrupted background job(s) after restart", count)
|
logger.warning("Recovered %d interrupted background job(s) after restart (re-queued %d)", count, requeued)
|
||||||
return count
|
return count
|
||||||
|
|
|
||||||
|
|
@ -77,6 +77,7 @@ class ContainerExtractor(Extractor):
|
||||||
"""Extract by recursively unpacking the container."""
|
"""Extract by recursively unpacking the container."""
|
||||||
fileName = context.get("fileName", "archive")
|
fileName = context.get("fileName", "archive")
|
||||||
mimeType = context.get("mimeType", "application/octet-stream")
|
mimeType = context.get("mimeType", "application/octet-stream")
|
||||||
|
cascadeDepth = context.get("_cascadeDepth", 0)
|
||||||
|
|
||||||
rootId = makeId()
|
rootId = makeId()
|
||||||
parts: List[ContentPart] = [
|
parts: List[ContentPart] = [
|
||||||
|
|
@ -97,7 +98,7 @@ class ContainerExtractor(Extractor):
|
||||||
parts.extend(lazy)
|
parts.extend(lazy)
|
||||||
return parts
|
return parts
|
||||||
|
|
||||||
state = {"totalSize": 0, "fileCount": 0}
|
state = {"totalSize": 0, "fileCount": 0, "cascadeDepth": cascadeDepth}
|
||||||
try:
|
try:
|
||||||
childParts = _resolveContainerRecursive(
|
childParts = _resolveContainerRecursive(
|
||||||
fileBytes, mimeType, fileName, rootId, "", 0, state
|
fileBytes, mimeType, fileName, rootId, "", 0, state
|
||||||
|
|
@ -209,7 +210,12 @@ def _addFilePart(
|
||||||
|
|
||||||
if extractor and not isinstance(extractor, ContainerExtractor):
|
if extractor and not isinstance(extractor, ContainerExtractor):
|
||||||
try:
|
try:
|
||||||
childParts = extractor.extract(data, {"fileName": fileName, "mimeType": detectedMime})
|
cascadeDepth = state.get("cascadeDepth", 0)
|
||||||
|
childParts = extractor.extract(data, {
|
||||||
|
"fileName": fileName,
|
||||||
|
"mimeType": detectedMime,
|
||||||
|
"_cascadeDepth": cascadeDepth + 1,
|
||||||
|
})
|
||||||
for part in childParts:
|
for part in childParts:
|
||||||
part.parentId = parentId
|
part.parentId = parentId
|
||||||
if not part.metadata:
|
if not part.metadata:
|
||||||
|
|
|
||||||
|
|
@ -53,12 +53,13 @@ class EmailExtractor(Extractor):
|
||||||
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
|
def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
|
||||||
fileName = context.get("fileName", "email")
|
fileName = context.get("fileName", "email")
|
||||||
lower = (fileName or "").lower()
|
lower = (fileName or "").lower()
|
||||||
|
depth = context.get("_cascadeDepth", 0)
|
||||||
|
|
||||||
if lower.endswith(".msg"):
|
if lower.endswith(".msg"):
|
||||||
return self._extractMsg(fileBytes, fileName)
|
return self._extractMsg(fileBytes, fileName, depth)
|
||||||
return self._extractEml(fileBytes, fileName)
|
return self._extractEml(fileBytes, fileName, depth)
|
||||||
|
|
||||||
def _extractEml(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
|
def _extractEml(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]:
|
||||||
"""Parse standard EML (RFC 822) using stdlib email."""
|
"""Parse standard EML (RFC 822) using stdlib email."""
|
||||||
rootId = makeId()
|
rootId = makeId()
|
||||||
parts: List[ContentPart] = []
|
parts: List[ContentPart] = []
|
||||||
|
|
@ -91,7 +92,7 @@ class EmailExtractor(Extractor):
|
||||||
attachName = part.get_filename() or "attachment"
|
attachName = part.get_filename() or "attachment"
|
||||||
attachData = part.get_payload(decode=True)
|
attachData = part.get_payload(decode=True)
|
||||||
if attachData:
|
if attachData:
|
||||||
parts.extend(_delegateAttachment(attachData, attachName, rootId))
|
parts.extend(_delegateAttachment(attachData, attachName, rootId, depth))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if contentType == "text/plain":
|
if contentType == "text/plain":
|
||||||
|
|
@ -113,7 +114,7 @@ class EmailExtractor(Extractor):
|
||||||
|
|
||||||
return parts
|
return parts
|
||||||
|
|
||||||
def _extractMsg(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
|
def _extractMsg(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]:
|
||||||
"""Parse Outlook MSG files using extract-msg (optional)."""
|
"""Parse Outlook MSG files using extract-msg (optional)."""
|
||||||
rootId = makeId()
|
rootId = makeId()
|
||||||
parts: List[ContentPart] = []
|
parts: List[ContentPart] = []
|
||||||
|
|
@ -179,7 +180,7 @@ class EmailExtractor(Extractor):
|
||||||
attachName = getattr(attachment, "longFilename", None) or getattr(attachment, "shortFilename", None) or "attachment"
|
attachName = getattr(attachment, "longFilename", None) or getattr(attachment, "shortFilename", None) or "attachment"
|
||||||
attachData = getattr(attachment, "data", None)
|
attachData = getattr(attachment, "data", None)
|
||||||
if attachData:
|
if attachData:
|
||||||
parts.extend(_delegateAttachment(attachData, attachName, rootId))
|
parts.extend(_delegateAttachment(attachData, attachName, rootId, depth))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
msgFile.close()
|
msgFile.close()
|
||||||
|
|
@ -199,18 +200,39 @@ def _buildHeaderText(msg) -> str:
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
def _delegateAttachment(attachData: bytes, attachName: str, parentId: str) -> List[ContentPart]:
|
_MAX_CASCADE_DEPTH = 10
|
||||||
"""Delegate an attachment to the appropriate type-specific extractor."""
|
|
||||||
|
def _delegateAttachment(attachData: bytes, attachName: str, parentId: str, depth: int = 0) -> List[ContentPart]:
|
||||||
|
"""Delegate an attachment to the appropriate type-specific extractor.
|
||||||
|
|
||||||
|
Passes ``_cascadeDepth`` through the context so nested Email→Container→Email
|
||||||
|
chains share a global depth counter and don't recurse infinitely.
|
||||||
|
"""
|
||||||
|
if depth >= _MAX_CASCADE_DEPTH:
|
||||||
|
logger.warning(f"Cascade depth {depth} reached for {attachName}, skipping extraction")
|
||||||
|
import base64
|
||||||
|
encodedData = base64.b64encode(attachData).decode("utf-8") if attachData else ""
|
||||||
|
return [ContentPart(
|
||||||
|
id=makeId(), parentId=parentId, label=attachName,
|
||||||
|
typeGroup="binary", mimeType="application/octet-stream",
|
||||||
|
data=encodedData,
|
||||||
|
metadata={"size": len(attachData), "emailAttachment": attachName, "cascadeDepthExceeded": True},
|
||||||
|
)]
|
||||||
|
|
||||||
guessedMime, _ = mimetypes.guess_type(attachName)
|
guessedMime, _ = mimetypes.guess_type(attachName)
|
||||||
detectedMime = guessedMime or "application/octet-stream"
|
detectedMime = guessedMime or "application/octet-stream"
|
||||||
|
|
||||||
from ..subRegistry import ExtractorRegistry
|
from ..subRegistry import getExtractorRegistry
|
||||||
registry = ExtractorRegistry()
|
registry = getExtractorRegistry()
|
||||||
extractor = registry.resolve(detectedMime, attachName)
|
extractor = registry.resolve(detectedMime, attachName)
|
||||||
|
|
||||||
if extractor and not isinstance(extractor, EmailExtractor):
|
if extractor:
|
||||||
try:
|
try:
|
||||||
childParts = extractor.extract(attachData, {"fileName": attachName, "mimeType": detectedMime})
|
childParts = extractor.extract(attachData, {
|
||||||
|
"fileName": attachName,
|
||||||
|
"mimeType": detectedMime,
|
||||||
|
"_cascadeDepth": depth + 1,
|
||||||
|
})
|
||||||
for part in childParts:
|
for part in childParts:
|
||||||
part.parentId = parentId
|
part.parentId = parentId
|
||||||
if not part.metadata:
|
if not part.metadata:
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ class ExtractionService:
|
||||||
self._interfaceDbComponent = getComponentInterface(
|
self._interfaceDbComponent = getComponentInterface(
|
||||||
context.user,
|
context.user,
|
||||||
mandateId=context.mandate_id,
|
mandateId=context.mandate_id,
|
||||||
|
featureInstanceId=context.feature_instance_id,
|
||||||
)
|
)
|
||||||
self._extractorRegistry = getExtractorRegistry()
|
self._extractorRegistry = getExtractorRegistry()
|
||||||
if ExtractionService._sharedChunkerRegistry is None:
|
if ExtractionService._sharedChunkerRegistry is None:
|
||||||
|
|
|
||||||
|
|
@ -122,21 +122,54 @@ def _onConnectionRevoked(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_SOURCE_TYPE_MAP = {
|
||||||
|
"msft": {
|
||||||
|
"sharepoint": ("sharepointFolder", "onedriveFolder"),
|
||||||
|
"outlook": ("outlookFolder", "calendarFolder", "contactFolder"),
|
||||||
|
},
|
||||||
|
"google": {
|
||||||
|
"drive": ("googleDriveFolder",),
|
||||||
|
"gmail": ("gmailFolder",),
|
||||||
|
},
|
||||||
|
"clickup": {
|
||||||
|
"clickup": ("clickupList",),
|
||||||
|
},
|
||||||
|
"infomaniak": {
|
||||||
|
"kdrive": ("kdriveFolder",),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list] = None):
|
||||||
|
"""Load DataSource rows with ragIndexEnabled=true for a connection.
|
||||||
|
|
||||||
|
If dataSourceIds is provided (mini-bootstrap), filter to only those IDs.
|
||||||
|
"""
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.datamodels.datamodelDataSource import DataSource
|
||||||
|
|
||||||
|
rootIf = getRootInterface()
|
||||||
|
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
||||||
|
if dataSourceIds:
|
||||||
|
return [ds for ds in allDs if ds.get("id") in dataSourceIds and ds.get("ragIndexEnabled")]
|
||||||
|
return [ds for ds in allDs if ds.get("ragIndexEnabled")]
|
||||||
|
|
||||||
|
|
||||||
async def _bootstrapJobHandler(
|
async def _bootstrapJobHandler(
|
||||||
job: Dict[str, Any],
|
job: Dict[str, Any],
|
||||||
progressCb,
|
progressCb,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Dispatch bootstrap by authority. Each authority runs its own sub-bootstraps."""
|
"""Dispatch bootstrap by authority, iterating only over ragIndexEnabled DataSources."""
|
||||||
payload = job.get("payload") or {}
|
payload = job.get("payload") or {}
|
||||||
connectionId = payload.get("connectionId")
|
connectionId = payload.get("connectionId")
|
||||||
authority = (payload.get("authority") or "").lower()
|
authority = (payload.get("authority") or "").lower()
|
||||||
|
dataSourceIds = payload.get("dataSourceIds")
|
||||||
if not connectionId:
|
if not connectionId:
|
||||||
raise ValueError("connection.bootstrap requires payload.connectionId")
|
raise ValueError("connection.bootstrap requires payload.connectionId")
|
||||||
|
|
||||||
progressCb(5, f"resolving {authority} connection")
|
progressCb(5, f"resolving {authority} connection")
|
||||||
|
|
||||||
# Defensive consent check: if the connection has since disabled knowledge ingestion
|
# Defensive consent check
|
||||||
# (e.g. user toggled setting after the job was enqueued), skip all walkers.
|
|
||||||
try:
|
try:
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
_root = getRootInterface()
|
_root = getRootInterface()
|
||||||
|
|
@ -156,6 +189,21 @@ async def _bootstrapJobHandler(
|
||||||
except Exception as _guardErr:
|
except Exception as _guardErr:
|
||||||
logger.debug("Could not load connection for consent guard: %s", _guardErr)
|
logger.debug("Could not load connection for consent guard: %s", _guardErr)
|
||||||
|
|
||||||
|
# Load only ragIndexEnabled DataSources for this connection
|
||||||
|
dataSources = _loadRagEnabledDataSources(connectionId, dataSourceIds)
|
||||||
|
if not dataSources:
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.skipped — no rag-enabled DataSources connectionId=%s",
|
||||||
|
connectionId,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.skipped",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": authority,
|
||||||
|
"reason": "no_data_sources",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return {"connectionId": connectionId, "authority": authority, "skipped": True, "reason": "no_data_sources"}
|
||||||
|
|
||||||
def _normalize(res: Any, label: str) -> Dict[str, Any]:
|
def _normalize(res: Any, label: str) -> Dict[str, Any]:
|
||||||
if isinstance(res, Exception):
|
if isinstance(res, Exception):
|
||||||
logger.error(
|
logger.error(
|
||||||
|
|
@ -165,6 +213,10 @@ async def _bootstrapJobHandler(
|
||||||
return {"error": str(res)}
|
return {"error": str(res)}
|
||||||
return res or {}
|
return res or {}
|
||||||
|
|
||||||
|
def _filterDs(walkerKey: str) -> list:
|
||||||
|
sourceTypes = _SOURCE_TYPE_MAP.get(authority, {}).get(walkerKey, ())
|
||||||
|
return [ds for ds in dataSources if ds.get("sourceType") in sourceTypes]
|
||||||
|
|
||||||
if authority == "msft":
|
if authority == "msft":
|
||||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
|
||||||
bootstrapSharepoint,
|
bootstrapSharepoint,
|
||||||
|
|
@ -174,9 +226,14 @@ async def _bootstrapJobHandler(
|
||||||
)
|
)
|
||||||
|
|
||||||
progressCb(10, "sharepoint + outlook")
|
progressCb(10, "sharepoint + outlook")
|
||||||
|
spDs = _filterDs("sharepoint")
|
||||||
|
olDs = _filterDs("outlook")
|
||||||
|
async def _noopResult():
|
||||||
|
return {"skipped": True, "reason": "no_datasources"}
|
||||||
|
|
||||||
spResult, olResult = await asyncio.gather(
|
spResult, olResult = await asyncio.gather(
|
||||||
bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb),
|
bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb, dataSources=spDs) if spDs else _noopResult(),
|
||||||
bootstrapOutlook(connectionId=connectionId, progressCb=progressCb),
|
bootstrapOutlook(connectionId=connectionId, progressCb=progressCb, dataSources=olDs) if olDs else _noopResult(),
|
||||||
return_exceptions=True,
|
return_exceptions=True,
|
||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
|
|
@ -195,9 +252,14 @@ async def _bootstrapJobHandler(
|
||||||
)
|
)
|
||||||
|
|
||||||
progressCb(10, "drive + gmail")
|
progressCb(10, "drive + gmail")
|
||||||
|
gdDs = _filterDs("drive")
|
||||||
|
gmDs = _filterDs("gmail")
|
||||||
|
async def _noopResult():
|
||||||
|
return {"skipped": True, "reason": "no_datasources"}
|
||||||
|
|
||||||
gdResult, gmResult = await asyncio.gather(
|
gdResult, gmResult = await asyncio.gather(
|
||||||
bootstrapGdrive(connectionId=connectionId, progressCb=progressCb),
|
bootstrapGdrive(connectionId=connectionId, progressCb=progressCb, dataSources=gdDs) if gdDs else _noopResult(),
|
||||||
bootstrapGmail(connectionId=connectionId, progressCb=progressCb),
|
bootstrapGmail(connectionId=connectionId, progressCb=progressCb, dataSources=gmDs) if gmDs else _noopResult(),
|
||||||
return_exceptions=True,
|
return_exceptions=True,
|
||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
|
|
@ -213,7 +275,8 @@ async def _bootstrapJobHandler(
|
||||||
)
|
)
|
||||||
|
|
||||||
progressCb(10, "clickup tasks")
|
progressCb(10, "clickup tasks")
|
||||||
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb)
|
cuDs = _filterDs("clickup")
|
||||||
|
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb, dataSources=cuDs) if cuDs else {"skipped": True, "reason": "no_datasources"}
|
||||||
return {
|
return {
|
||||||
"connectionId": connectionId,
|
"connectionId": connectionId,
|
||||||
"authority": authority,
|
"authority": authority,
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ is None).
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -21,10 +21,11 @@ _DEFAULT_CLICKUP_SCOPE = "title_description"
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ConnectionIngestionPrefs:
|
class ConnectionIngestionPrefs:
|
||||||
"""Parsed per-connection preferences for knowledge ingestion walkers."""
|
"""Parsed per-connection preferences for knowledge ingestion walkers.
|
||||||
|
|
||||||
# PII
|
Neutralization is now controlled per DataSource.neutralize (not here).
|
||||||
neutralizeBeforeEmbed: bool = False
|
Surface toggles are obsolete — walker iterates only over ragIndexEnabled DataSources.
|
||||||
|
"""
|
||||||
|
|
||||||
# Mail (Outlook + Gmail)
|
# Mail (Outlook + Gmail)
|
||||||
mailContentDepth: str = _DEFAULT_MAIL_DEPTH # "metadata" | "snippet" | "full"
|
mailContentDepth: str = _DEFAULT_MAIL_DEPTH # "metadata" | "snippet" | "full"
|
||||||
|
|
@ -32,18 +33,11 @@ class ConnectionIngestionPrefs:
|
||||||
|
|
||||||
# Files (Drive / SharePoint / OneDrive)
|
# Files (Drive / SharePoint / OneDrive)
|
||||||
filesIndexBinaries: bool = True
|
filesIndexBinaries: bool = True
|
||||||
mimeAllowlist: List[str] = field(default_factory=list) # empty = all allowed
|
|
||||||
|
|
||||||
# ClickUp
|
# ClickUp
|
||||||
clickupScope: str = _DEFAULT_CLICKUP_SCOPE # "titles" | "title_description" | "with_comments"
|
clickupScope: str = _DEFAULT_CLICKUP_SCOPE # "titles" | "title_description" | "with_comments"
|
||||||
clickupIndexAttachments: bool = False
|
clickupIndexAttachments: bool = False
|
||||||
|
|
||||||
# Per-authority surface toggles (default everything on)
|
|
||||||
gmailEnabled: bool = True
|
|
||||||
driveEnabled: bool = True
|
|
||||||
sharepointEnabled: bool = True
|
|
||||||
outlookEnabled: bool = True
|
|
||||||
|
|
||||||
# Time window
|
# Time window
|
||||||
maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS # 0 = no limit
|
maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS # 0 = no limit
|
||||||
|
|
||||||
|
|
@ -78,22 +72,12 @@ def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs:
|
||||||
v = raw.get(key)
|
v = raw.get(key)
|
||||||
return int(v) if isinstance(v, int) else default
|
return int(v) if isinstance(v, int) else default
|
||||||
|
|
||||||
surface = raw.get("surfaceToggles") or {}
|
|
||||||
google_surf = surface.get("google") or {}
|
|
||||||
msft_surf = surface.get("msft") or {}
|
|
||||||
|
|
||||||
return ConnectionIngestionPrefs(
|
return ConnectionIngestionPrefs(
|
||||||
neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False),
|
|
||||||
mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH),
|
mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH),
|
||||||
mailIndexAttachments=_bool("mailIndexAttachments", False),
|
mailIndexAttachments=_bool("mailIndexAttachments", False),
|
||||||
filesIndexBinaries=_bool("filesIndexBinaries", True),
|
filesIndexBinaries=_bool("filesIndexBinaries", True),
|
||||||
mimeAllowlist=list(raw.get("mimeAllowlist") or []),
|
|
||||||
clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE),
|
clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE),
|
||||||
clickupIndexAttachments=_bool("clickupIndexAttachments", False),
|
clickupIndexAttachments=_bool("clickupIndexAttachments", False),
|
||||||
gmailEnabled=bool(google_surf.get("gmail", True)),
|
|
||||||
driveEnabled=bool(google_surf.get("drive", True)),
|
|
||||||
sharepointEnabled=bool(msft_surf.get("sharepoint", True)),
|
|
||||||
outlookEnabled=bool(msft_surf.get("outlook", True)),
|
|
||||||
maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS),
|
maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS),
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ import logging
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from typing import Any, Callable, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -150,8 +150,6 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -
|
||||||
"data": description,
|
"data": description,
|
||||||
"contextRef": {"part": "description"},
|
"contextRef": {"part": "description"},
|
||||||
})
|
})
|
||||||
# text_content is ClickUp's rendered-markdown version; include if it adds
|
|
||||||
# something beyond the plain description (common for bullet lists, checklists).
|
|
||||||
textContent = _truncate(task.get("text_content"), limits.maxDescriptionChars)
|
textContent = _truncate(task.get("text_content"), limits.maxDescriptionChars)
|
||||||
if textContent and textContent != description:
|
if textContent and textContent != description:
|
||||||
parts.append({
|
parts.append({
|
||||||
|
|
@ -166,33 +164,35 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -
|
||||||
async def bootstrapClickup(
|
async def bootstrapClickup(
|
||||||
connectionId: str,
|
connectionId: str,
|
||||||
*,
|
*,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
dataSources: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
progressCb: Optional[Any] = None,
|
||||||
adapter: Any = None,
|
adapter: Any = None,
|
||||||
connection: Any = None,
|
connection: Any = None,
|
||||||
knowledgeService: Any = None,
|
knowledgeService: Any = None,
|
||||||
limits: Optional[ClickupBootstrapLimits] = None,
|
limits: Optional[ClickupBootstrapLimits] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Walk workspaces → lists → tasks and ingest each task as a virtual doc."""
|
"""Walk workspaces → lists → tasks and ingest each task as a virtual doc.
|
||||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
|
||||||
prefs = loadConnectionPrefs(connectionId)
|
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
|
||||||
|
Each DataSource defines the neutralize policy for its subtree.
|
||||||
|
"""
|
||||||
|
if not dataSources:
|
||||||
|
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
|
||||||
|
|
||||||
if not limits:
|
if not limits:
|
||||||
limits = ClickupBootstrapLimits(
|
limits = ClickupBootstrapLimits()
|
||||||
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
|
||||||
neutralize=prefs.neutralizeBeforeEmbed,
|
|
||||||
clickupScope=prefs.clickupScope,
|
|
||||||
)
|
|
||||||
|
|
||||||
startMs = time.time()
|
startMs = time.time()
|
||||||
result = ClickupBootstrapResult(connectionId=connectionId)
|
result = ClickupBootstrapResult(connectionId=connectionId)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"ingestion.connection.bootstrap.started part=clickup connectionId=%s",
|
"ingestion.connection.bootstrap.started part=clickup connectionId=%s dataSources=%d",
|
||||||
connectionId,
|
connectionId, len(dataSources),
|
||||||
extra={
|
extra={
|
||||||
"event": "ingestion.connection.bootstrap.started",
|
"event": "ingestion.connection.bootstrap.started",
|
||||||
"part": "clickup",
|
"part": "clickup",
|
||||||
"connectionId": connectionId,
|
"connectionId": connectionId,
|
||||||
|
"dataSourceCount": len(dataSources),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -215,30 +215,56 @@ async def bootstrapClickup(
|
||||||
return _finalizeResult(connectionId, result, startMs)
|
return _finalizeResult(connectionId, result, startMs)
|
||||||
|
|
||||||
teams = (teamsResp or {}).get("teams") or []
|
teams = (teamsResp or {}).get("teams") or []
|
||||||
for team in teams[: limits.maxWorkspaces]:
|
|
||||||
|
cancelled = False
|
||||||
|
for ds in dataSources:
|
||||||
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||||
break
|
break
|
||||||
teamId = str(team.get("id", "") or "")
|
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
if not teamId:
|
cancelled = True
|
||||||
continue
|
break
|
||||||
result.workspaces += 1
|
|
||||||
try:
|
|
||||||
await _walkTeam(
|
|
||||||
svc=svc,
|
|
||||||
knowledgeService=knowledgeService,
|
|
||||||
connectionId=connectionId,
|
|
||||||
mandateId=mandateId,
|
|
||||||
userId=userId,
|
|
||||||
team=team,
|
|
||||||
limits=limits,
|
|
||||||
result=result,
|
|
||||||
progressCb=progressCb,
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
|
|
||||||
result.errors.append(f"team({teamId}): {exc}")
|
|
||||||
|
|
||||||
return _finalizeResult(connectionId, result, startMs)
|
dsId = ds.get("id", "")
|
||||||
|
dsNeutralize = ds.get("neutralize", False)
|
||||||
|
dsLimits = ClickupBootstrapLimits(
|
||||||
|
maxTasks=limits.maxTasks,
|
||||||
|
maxWorkspaces=limits.maxWorkspaces,
|
||||||
|
maxListsPerWorkspace=limits.maxListsPerWorkspace,
|
||||||
|
maxDescriptionChars=limits.maxDescriptionChars,
|
||||||
|
maxAgeDays=limits.maxAgeDays,
|
||||||
|
includeClosed=limits.includeClosed,
|
||||||
|
neutralize=dsNeutralize,
|
||||||
|
clickupScope=limits.clickupScope,
|
||||||
|
)
|
||||||
|
|
||||||
|
for team in teams[:dsLimits.maxWorkspaces]:
|
||||||
|
if result.indexed + result.skippedDuplicate >= dsLimits.maxTasks:
|
||||||
|
break
|
||||||
|
teamId = str(team.get("id", "") or "")
|
||||||
|
if not teamId:
|
||||||
|
continue
|
||||||
|
result.workspaces += 1
|
||||||
|
try:
|
||||||
|
await _walkTeam(
|
||||||
|
svc=svc,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
team=team,
|
||||||
|
limits=dsLimits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
dataSourceId=dsId,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
|
||||||
|
result.errors.append(f"team({teamId}): {exc}")
|
||||||
|
|
||||||
|
finalResult = _finalizeResult(connectionId, result, startMs)
|
||||||
|
if cancelled:
|
||||||
|
finalResult["cancelled"] = True
|
||||||
|
return finalResult
|
||||||
|
|
||||||
|
|
||||||
async def _resolveDependencies(connectionId: str):
|
async def _resolveDependencies(connectionId: str):
|
||||||
|
|
@ -280,8 +306,12 @@ async def _walkTeam(
|
||||||
team: Dict[str, Any],
|
team: Dict[str, Any],
|
||||||
limits: ClickupBootstrapLimits,
|
limits: ClickupBootstrapLimits,
|
||||||
result: ClickupBootstrapResult,
|
result: ClickupBootstrapResult,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
progressCb: Optional[Any],
|
||||||
|
dataSourceId: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
|
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
|
return
|
||||||
|
|
||||||
teamId = str(team.get("id", "") or "")
|
teamId = str(team.get("id", "") or "")
|
||||||
spacesResp = await svc.getSpaces(teamId)
|
spacesResp = await svc.getSpaces(teamId)
|
||||||
spaces = (spacesResp or {}).get("spaces") or []
|
spaces = (spacesResp or {}).get("spaces") or []
|
||||||
|
|
@ -294,14 +324,12 @@ async def _walkTeam(
|
||||||
if not spaceId:
|
if not spaceId:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Folderless lists directly under the space
|
|
||||||
folderless = await svc.getFolderlessLists(spaceId)
|
folderless = await svc.getFolderlessLists(spaceId)
|
||||||
for lst in (folderless or {}).get("lists") or []:
|
for lst in (folderless or {}).get("lists") or []:
|
||||||
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
||||||
break
|
break
|
||||||
listsCollected.append({**lst, "_space": space})
|
listsCollected.append({**lst, "_space": space})
|
||||||
|
|
||||||
# Lists inside folders
|
|
||||||
foldersResp = await svc.getFolders(spaceId)
|
foldersResp = await svc.getFolders(spaceId)
|
||||||
for folder in (foldersResp or {}).get("folders") or []:
|
for folder in (foldersResp or {}).get("folders") or []:
|
||||||
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
||||||
|
|
@ -318,6 +346,8 @@ async def _walkTeam(
|
||||||
for lst in listsCollected:
|
for lst in listsCollected:
|
||||||
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||||
return
|
return
|
||||||
|
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
|
return
|
||||||
result.lists += 1
|
result.lists += 1
|
||||||
await _walkList(
|
await _walkList(
|
||||||
svc=svc,
|
svc=svc,
|
||||||
|
|
@ -330,6 +360,7 @@ async def _walkTeam(
|
||||||
limits=limits,
|
limits=limits,
|
||||||
result=result,
|
result=result,
|
||||||
progressCb=progressCb,
|
progressCb=progressCb,
|
||||||
|
dataSourceId=dataSourceId,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -344,13 +375,16 @@ async def _walkList(
|
||||||
lst: Dict[str, Any],
|
lst: Dict[str, Any],
|
||||||
limits: ClickupBootstrapLimits,
|
limits: ClickupBootstrapLimits,
|
||||||
result: ClickupBootstrapResult,
|
result: ClickupBootstrapResult,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
progressCb: Optional[Any],
|
||||||
|
dataSourceId: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
listId = str(lst.get("id", "") or "")
|
listId = str(lst.get("id", "") or "")
|
||||||
if not listId:
|
if not listId:
|
||||||
return
|
return
|
||||||
page = 0
|
page = 0
|
||||||
while result.indexed + result.skippedDuplicate < limits.maxTasks:
|
while result.indexed + result.skippedDuplicate < limits.maxTasks:
|
||||||
|
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
|
return
|
||||||
resp = await svc.getTasksInList(
|
resp = await svc.getTasksInList(
|
||||||
listId,
|
listId,
|
||||||
page=page,
|
page=page,
|
||||||
|
|
@ -371,7 +405,6 @@ async def _walkList(
|
||||||
if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
|
if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
|
||||||
result.skippedPolicy += 1
|
result.skippedPolicy += 1
|
||||||
continue
|
continue
|
||||||
# Inject the list/folder/space metadata we already loaded.
|
|
||||||
task["list"] = task.get("list") or {"id": listId, "name": lst.get("name")}
|
task["list"] = task.get("list") or {"id": listId, "name": lst.get("name")}
|
||||||
task["folder"] = task.get("folder") or lst.get("_folder") or {}
|
task["folder"] = task.get("folder") or lst.get("_folder") or {}
|
||||||
task["space"] = task.get("space") or lst.get("_space") or {}
|
task["space"] = task.get("space") or lst.get("_space") or {}
|
||||||
|
|
@ -385,9 +418,10 @@ async def _walkList(
|
||||||
limits=limits,
|
limits=limits,
|
||||||
result=result,
|
result=result,
|
||||||
progressCb=progressCb,
|
progressCb=progressCb,
|
||||||
|
dataSourceId=dataSourceId,
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(tasks) < 100: # ClickUp page-size hint: fewer than 100 => last page
|
if len(tasks) < 100:
|
||||||
return
|
return
|
||||||
page += 1
|
page += 1
|
||||||
|
|
||||||
|
|
@ -402,7 +436,8 @@ async def _ingestTask(
|
||||||
task: Dict[str, Any],
|
task: Dict[str, Any],
|
||||||
limits: ClickupBootstrapLimits,
|
limits: ClickupBootstrapLimits,
|
||||||
result: ClickupBootstrapResult,
|
result: ClickupBootstrapResult,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
progressCb: Optional[Any],
|
||||||
|
dataSourceId: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
||||||
|
|
@ -431,6 +466,7 @@ async def _ingestTask(
|
||||||
neutralize=limits.neutralize,
|
neutralize=limits.neutralize,
|
||||||
provenance={
|
provenance={
|
||||||
"connectionId": connectionId,
|
"connectionId": connectionId,
|
||||||
|
"dataSourceId": dataSourceId,
|
||||||
"authority": "clickup",
|
"authority": "clickup",
|
||||||
"service": "clickup",
|
"service": "clickup",
|
||||||
"externalItemId": taskId,
|
"externalItemId": taskId,
|
||||||
|
|
@ -456,8 +492,10 @@ async def _ingestTask(
|
||||||
else:
|
else:
|
||||||
result.failed += 1
|
result.failed += 1
|
||||||
|
|
||||||
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
processed = result.indexed + result.skippedDuplicate
|
||||||
processed = result.indexed + result.skippedDuplicate
|
if progressCb is not None and processed % 50 == 0:
|
||||||
|
if hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
progressCb(
|
progressCb(
|
||||||
min(90, 10 + int(80 * processed / max(1, limits.maxTasks))),
|
min(90, 10 + int(80 * processed / max(1, limits.maxTasks))),
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ via export), runs the standard extraction pipeline and routes results through
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
@ -30,7 +31,6 @@ SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
||||||
MAX_DEPTH_DEFAULT = 4
|
MAX_DEPTH_DEFAULT = 4
|
||||||
MAX_AGE_DAYS_DEFAULT = 365
|
MAX_AGE_DAYS_DEFAULT = 365
|
||||||
|
|
||||||
# Google Drive uses virtual mime-types for folders and non-downloadable assets.
|
|
||||||
FOLDER_MIME = "application/vnd.google-apps.folder"
|
FOLDER_MIME = "application/vnd.google-apps.folder"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -41,12 +41,8 @@ class GdriveBootstrapLimits:
|
||||||
maxFileSize: int = MAX_FILE_SIZE_DEFAULT
|
maxFileSize: int = MAX_FILE_SIZE_DEFAULT
|
||||||
skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
|
skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
|
||||||
maxDepth: int = MAX_DEPTH_DEFAULT
|
maxDepth: int = MAX_DEPTH_DEFAULT
|
||||||
# Only ingest files modified within the last N days. None disables filter.
|
|
||||||
maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
|
maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
|
||||||
# Pass-through to IngestionJob.neutralize
|
|
||||||
neutralize: bool = False
|
neutralize: bool = False
|
||||||
# Whether to skip binary/non-text files
|
|
||||||
filesIndexBinaries: bool = True
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -95,10 +91,8 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
|
||||||
if not maxAgeDays:
|
if not maxAgeDays:
|
||||||
return True
|
return True
|
||||||
if not modifiedIso:
|
if not modifiedIso:
|
||||||
# No timestamp -> be permissive (Drive native docs sometimes omit it on export).
|
|
||||||
return True
|
return True
|
||||||
try:
|
try:
|
||||||
# Google returns RFC 3339 with `Z` or offset; python 3.11+ parses both.
|
|
||||||
ts = datetime.fromisoformat(modifiedIso.replace("Z", "+00:00"))
|
ts = datetime.fromisoformat(modifiedIso.replace("Z", "+00:00"))
|
||||||
except Exception:
|
except Exception:
|
||||||
return True
|
return True
|
||||||
|
|
@ -111,34 +105,36 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
|
||||||
async def bootstrapGdrive(
|
async def bootstrapGdrive(
|
||||||
connectionId: str,
|
connectionId: str,
|
||||||
*,
|
*,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
dataSources: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
progressCb: Optional[Any] = None,
|
||||||
adapter: Any = None,
|
adapter: Any = None,
|
||||||
connection: Any = None,
|
connection: Any = None,
|
||||||
knowledgeService: Any = None,
|
knowledgeService: Any = None,
|
||||||
limits: Optional[GdriveBootstrapLimits] = None,
|
limits: Optional[GdriveBootstrapLimits] = None,
|
||||||
runExtractionFn: Optional[Callable[..., Any]] = None,
|
runExtractionFn: Optional[Callable[..., Any]] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Walk My Drive starting from the virtual root folder."""
|
"""Walk My Drive starting from the virtual root folder.
|
||||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
|
||||||
prefs = loadConnectionPrefs(connectionId)
|
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
|
||||||
|
Each DataSource defines the root path + neutralize policy for its subtree.
|
||||||
|
"""
|
||||||
|
if not dataSources:
|
||||||
|
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
|
||||||
|
|
||||||
if not limits:
|
if not limits:
|
||||||
limits = GdriveBootstrapLimits(
|
limits = GdriveBootstrapLimits()
|
||||||
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
|
||||||
neutralize=prefs.neutralizeBeforeEmbed,
|
|
||||||
filesIndexBinaries=prefs.filesIndexBinaries,
|
|
||||||
)
|
|
||||||
|
|
||||||
startMs = time.time()
|
startMs = time.time()
|
||||||
result = GdriveBootstrapResult(connectionId=connectionId)
|
result = GdriveBootstrapResult(connectionId=connectionId)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"ingestion.connection.bootstrap.started part=gdrive connectionId=%s",
|
"ingestion.connection.bootstrap.started part=gdrive connectionId=%s dataSources=%d",
|
||||||
connectionId,
|
connectionId, len(dataSources),
|
||||||
extra={
|
extra={
|
||||||
"event": "ingestion.connection.bootstrap.started",
|
"event": "ingestion.connection.bootstrap.started",
|
||||||
"part": "gdrive",
|
"part": "gdrive",
|
||||||
"connectionId": connectionId,
|
"connectionId": connectionId,
|
||||||
|
"dataSourceCount": len(dataSources),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -158,25 +154,51 @@ async def bootstrapGdrive(
|
||||||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||||
|
|
||||||
try:
|
cancelled = False
|
||||||
await _walkFolder(
|
for ds in dataSources:
|
||||||
adapter=adapter,
|
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||||
knowledgeService=knowledgeService,
|
break
|
||||||
runExtractionFn=runExtractionFn,
|
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
connectionId=connectionId,
|
cancelled = True
|
||||||
mandateId=mandateId,
|
break
|
||||||
userId=userId,
|
|
||||||
folderPath="/", # DriveAdapter.browse maps "" / "/" -> "root"
|
|
||||||
depth=0,
|
|
||||||
limits=limits,
|
|
||||||
result=result,
|
|
||||||
progressCb=progressCb,
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
logger.error("gdrive walk failed for %s: %s", connectionId, exc, exc_info=True)
|
|
||||||
result.errors.append(f"walk: {exc}")
|
|
||||||
|
|
||||||
return _finalizeResult(connectionId, result, startMs)
|
dsPath = ds.get("path", "/")
|
||||||
|
dsId = ds.get("id", "")
|
||||||
|
dsNeutralize = ds.get("neutralize", False)
|
||||||
|
dsMaxAgeDays = ds.get("maxAgeDays", limits.maxAgeDays)
|
||||||
|
dsLimits = GdriveBootstrapLimits(
|
||||||
|
maxItems=limits.maxItems,
|
||||||
|
maxBytes=limits.maxBytes,
|
||||||
|
maxFileSize=limits.maxFileSize,
|
||||||
|
skipMimePrefixes=limits.skipMimePrefixes,
|
||||||
|
maxDepth=limits.maxDepth,
|
||||||
|
maxAgeDays=dsMaxAgeDays,
|
||||||
|
neutralize=dsNeutralize,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
await _walkFolder(
|
||||||
|
adapter=adapter,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
runExtractionFn=runExtractionFn,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
folderPath=dsPath,
|
||||||
|
depth=0,
|
||||||
|
limits=dsLimits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
dataSourceId=dsId,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("gdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
|
||||||
|
result.errors.append(f"walk({dsPath}): {exc}")
|
||||||
|
|
||||||
|
finalResult = _finalizeResult(connectionId, result, startMs)
|
||||||
|
if cancelled:
|
||||||
|
finalResult["cancelled"] = True
|
||||||
|
return finalResult
|
||||||
|
|
||||||
|
|
||||||
async def _resolveDependencies(connectionId: str):
|
async def _resolveDependencies(connectionId: str):
|
||||||
|
|
@ -220,10 +242,13 @@ async def _walkFolder(
|
||||||
depth: int,
|
depth: int,
|
||||||
limits: GdriveBootstrapLimits,
|
limits: GdriveBootstrapLimits,
|
||||||
result: GdriveBootstrapResult,
|
result: GdriveBootstrapResult,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
progressCb: Optional[Any],
|
||||||
|
dataSourceId: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
if depth > limits.maxDepth:
|
if depth > limits.maxDepth:
|
||||||
return
|
return
|
||||||
|
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
entries = await adapter.browse(folderPath)
|
entries = await adapter.browse(folderPath)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
|
@ -236,6 +261,8 @@ async def _walkFolder(
|
||||||
return
|
return
|
||||||
if result.bytesProcessed >= limits.maxBytes:
|
if result.bytesProcessed >= limits.maxBytes:
|
||||||
return
|
return
|
||||||
|
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||||
|
return
|
||||||
|
|
||||||
entryPath = getattr(entry, "path", "") or ""
|
entryPath = getattr(entry, "path", "") or ""
|
||||||
metadata = getattr(entry, "metadata", {}) or {}
|
metadata = getattr(entry, "metadata", {}) or {}
|
||||||
|
|
@ -254,6 +281,7 @@ async def _walkFolder(
|
||||||
limits=limits,
|
limits=limits,
|
||||||
result=result,
|
result=result,
|
||||||
progressCb=progressCb,
|
progressCb=progressCb,
|
||||||
|
dataSourceId=dataSourceId,
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -288,6 +316,7 @@ async def _walkFolder(
|
||||||
limits=limits,
|
limits=limits,
|
||||||
result=result,
|
result=result,
|
||||||
progressCb=progressCb,
|
progressCb=progressCb,
|
||||||
|
dataSourceId=dataSourceId,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -306,7 +335,8 @@ async def _ingestOne(
|
||||||
revision: Optional[str],
|
revision: Optional[str],
|
||||||
limits: GdriveBootstrapLimits,
|
limits: GdriveBootstrapLimits,
|
||||||
result: GdriveBootstrapResult,
|
result: GdriveBootstrapResult,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
progressCb: Optional[Any],
|
||||||
|
dataSourceId: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
||||||
|
|
@ -321,14 +351,13 @@ async def _ingestOne(
|
||||||
result.errors.append(f"download({entryPath}): {exc}")
|
result.errors.append(f"download({entryPath}): {exc}")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Adapter.download returns raw bytes today; guard DownloadResult shape too.
|
|
||||||
fileBytes: bytes
|
fileBytes: bytes
|
||||||
if isinstance(downloaded, (bytes, bytearray)):
|
if isinstance(downloaded, (bytes, bytearray)):
|
||||||
fileBytes = bytes(downloaded)
|
fileBytes = bytes(downloaded)
|
||||||
else:
|
else:
|
||||||
fileBytes = bytes(getattr(downloaded, "data", b"") or b"")
|
fileBytes = bytes(getattr(downloaded, "data", b"") or b"")
|
||||||
if getattr(downloaded, "mimeType", None):
|
if getattr(downloaded, "mimeType", None):
|
||||||
mimeType = downloaded.mimeType # export may have changed the type
|
mimeType = downloaded.mimeType
|
||||||
if not fileBytes:
|
if not fileBytes:
|
||||||
result.failed += 1
|
result.failed += 1
|
||||||
return
|
return
|
||||||
|
|
@ -354,6 +383,15 @@ async def _ingestOne(
|
||||||
result.skippedPolicy += 1
|
result.skippedPolicy += 1
|
||||||
return
|
return
|
||||||
|
|
||||||
|
provenance: Dict[str, Any] = {
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"dataSourceId": dataSourceId,
|
||||||
|
"authority": "google",
|
||||||
|
"service": "drive",
|
||||||
|
"externalItemId": externalItemId,
|
||||||
|
"entryPath": entryPath,
|
||||||
|
"tier": "body",
|
||||||
|
}
|
||||||
try:
|
try:
|
||||||
handle = await knowledgeService.requestIngestion(
|
handle = await knowledgeService.requestIngestion(
|
||||||
IngestionJob(
|
IngestionJob(
|
||||||
|
|
@ -366,14 +404,7 @@ async def _ingestOne(
|
||||||
contentObjects=contentObjects,
|
contentObjects=contentObjects,
|
||||||
contentVersion=revision,
|
contentVersion=revision,
|
||||||
neutralize=limits.neutralize,
|
neutralize=limits.neutralize,
|
||||||
provenance={
|
provenance=provenance,
|
||||||
"connectionId": connectionId,
|
|
||||||
"authority": "google",
|
|
||||||
"service": "drive",
|
|
||||||
"externalItemId": externalItemId,
|
|
||||||
"entryPath": entryPath,
|
|
||||||
"tier": "body",
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
|
@ -388,6 +419,8 @@ async def _ingestOne(
|
||||||
result.indexed += 1
|
result.indexed += 1
|
||||||
else:
|
else:
|
||||||
result.failed += 1
|
result.failed += 1
|
||||||
|
if handle.error:
|
||||||
|
result.errors.append(f"ingest({entryPath}): {handle.error}")
|
||||||
|
|
||||||
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||||
processed = result.indexed + result.skippedDuplicate
|
processed = result.indexed + result.skippedDuplicate
|
||||||
|
|
@ -411,6 +444,8 @@ async def _ingestOne(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
|
||||||
def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
|
def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||||
durationMs = int((time.time() - startMs) * 1000)
|
durationMs = int((time.time() - startMs) * 1000)
|
||||||
|
|
|
||||||
|
|
@ -175,35 +175,36 @@ def _buildContentObjects(
|
||||||
async def bootstrapGmail(
|
async def bootstrapGmail(
|
||||||
connectionId: str,
|
connectionId: str,
|
||||||
*,
|
*,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
dataSources: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
progressCb: Optional[Any] = None,
|
||||||
adapter: Any = None,
|
adapter: Any = None,
|
||||||
connection: Any = None,
|
connection: Any = None,
|
||||||
knowledgeService: Any = None,
|
knowledgeService: Any = None,
|
||||||
limits: Optional[GmailBootstrapLimits] = None,
|
limits: Optional[GmailBootstrapLimits] = None,
|
||||||
googleGetFn: Optional[Callable[..., Any]] = None,
|
googleGetFn: Optional[Callable[..., Any]] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Enumerate Gmail labels (INBOX + SENT default) and ingest messages."""
|
"""Enumerate Gmail labels (INBOX + SENT default) and ingest messages.
|
||||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
|
||||||
prefs = loadConnectionPrefs(connectionId)
|
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
|
||||||
|
Each DataSource defines the neutralize policy for its scope.
|
||||||
|
"""
|
||||||
|
if not dataSources:
|
||||||
|
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
|
||||||
|
|
||||||
if not limits:
|
if not limits:
|
||||||
limits = GmailBootstrapLimits(
|
limits = GmailBootstrapLimits()
|
||||||
includeAttachments=prefs.mailIndexAttachments,
|
|
||||||
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
|
||||||
mailContentDepth=prefs.mailContentDepth,
|
|
||||||
neutralize=prefs.neutralizeBeforeEmbed,
|
|
||||||
)
|
|
||||||
|
|
||||||
startMs = time.time()
|
startMs = time.time()
|
||||||
result = GmailBootstrapResult(connectionId=connectionId)
|
result = GmailBootstrapResult(connectionId=connectionId)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"ingestion.connection.bootstrap.started part=gmail connectionId=%s",
|
"ingestion.connection.bootstrap.started part=gmail connectionId=%s dataSources=%d",
|
||||||
connectionId,
|
connectionId, len(dataSources),
|
||||||
extra={
|
extra={
|
||||||
"event": "ingestion.connection.bootstrap.started",
|
"event": "ingestion.connection.bootstrap.started",
|
||||||
"part": "gmail",
|
"part": "gmail",
|
||||||
"connectionId": connectionId,
|
"connectionId": connectionId,
|
||||||
|
"dataSourceCount": len(dataSources),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -221,26 +222,51 @@ async def bootstrapGmail(
|
||||||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||||
|
|
||||||
for labelId in limits.labels:
|
cancelled = False
|
||||||
|
for ds in dataSources:
|
||||||
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||||
break
|
break
|
||||||
try:
|
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
await _ingestLabel(
|
cancelled = True
|
||||||
googleGetFn=googleGetFn,
|
break
|
||||||
knowledgeService=knowledgeService,
|
|
||||||
connectionId=connectionId,
|
|
||||||
mandateId=mandateId,
|
|
||||||
userId=userId,
|
|
||||||
labelId=labelId,
|
|
||||||
limits=limits,
|
|
||||||
result=result,
|
|
||||||
progressCb=progressCb,
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
|
|
||||||
result.errors.append(f"label({labelId}): {exc}")
|
|
||||||
|
|
||||||
return _finalizeResult(connectionId, result, startMs)
|
dsId = ds.get("id", "")
|
||||||
|
dsNeutralize = ds.get("neutralize", False)
|
||||||
|
dsLimits = GmailBootstrapLimits(
|
||||||
|
maxMessages=limits.maxMessages,
|
||||||
|
labels=limits.labels,
|
||||||
|
maxBodyChars=limits.maxBodyChars,
|
||||||
|
includeAttachments=limits.includeAttachments,
|
||||||
|
maxAttachmentBytes=limits.maxAttachmentBytes,
|
||||||
|
maxAgeDays=limits.maxAgeDays,
|
||||||
|
mailContentDepth=limits.mailContentDepth,
|
||||||
|
neutralize=dsNeutralize,
|
||||||
|
)
|
||||||
|
|
||||||
|
for labelId in dsLimits.labels:
|
||||||
|
if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
await _ingestLabel(
|
||||||
|
googleGetFn=googleGetFn,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
labelId=labelId,
|
||||||
|
limits=dsLimits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
dataSourceId=dsId,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
|
||||||
|
result.errors.append(f"label({labelId}): {exc}")
|
||||||
|
|
||||||
|
finalResult = _finalizeResult(connectionId, result, startMs)
|
||||||
|
if cancelled:
|
||||||
|
finalResult["cancelled"] = True
|
||||||
|
return finalResult
|
||||||
|
|
||||||
|
|
||||||
async def _resolveDependencies(connectionId: str):
|
async def _resolveDependencies(connectionId: str):
|
||||||
|
|
@ -282,7 +308,8 @@ async def _ingestLabel(
|
||||||
labelId: str,
|
labelId: str,
|
||||||
limits: GmailBootstrapLimits,
|
limits: GmailBootstrapLimits,
|
||||||
result: GmailBootstrapResult,
|
result: GmailBootstrapResult,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
progressCb: Optional[Any],
|
||||||
|
dataSourceId: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
|
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
|
||||||
if remaining <= 0:
|
if remaining <= 0:
|
||||||
|
|
@ -316,6 +343,8 @@ async def _ingestLabel(
|
||||||
for stub in messageStubs:
|
for stub in messageStubs:
|
||||||
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||||
break
|
break
|
||||||
|
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||||
|
return
|
||||||
msgId = stub.get("id")
|
msgId = stub.get("id")
|
||||||
if not msgId:
|
if not msgId:
|
||||||
continue
|
continue
|
||||||
|
|
@ -337,6 +366,7 @@ async def _ingestLabel(
|
||||||
limits=limits,
|
limits=limits,
|
||||||
result=result,
|
result=result,
|
||||||
progressCb=progressCb,
|
progressCb=progressCb,
|
||||||
|
dataSourceId=dataSourceId,
|
||||||
)
|
)
|
||||||
|
|
||||||
nextPageToken = page.get("nextPageToken")
|
nextPageToken = page.get("nextPageToken")
|
||||||
|
|
@ -355,7 +385,8 @@ async def _ingestMessage(
|
||||||
message: Dict[str, Any],
|
message: Dict[str, Any],
|
||||||
limits: GmailBootstrapLimits,
|
limits: GmailBootstrapLimits,
|
||||||
result: GmailBootstrapResult,
|
result: GmailBootstrapResult,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
progressCb: Optional[Any],
|
||||||
|
dataSourceId: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
||||||
|
|
@ -386,6 +417,7 @@ async def _ingestMessage(
|
||||||
neutralize=limits.neutralize,
|
neutralize=limits.neutralize,
|
||||||
provenance={
|
provenance={
|
||||||
"connectionId": connectionId,
|
"connectionId": connectionId,
|
||||||
|
"dataSourceId": dataSourceId,
|
||||||
"authority": "google",
|
"authority": "google",
|
||||||
"service": "gmail",
|
"service": "gmail",
|
||||||
"externalItemId": messageId,
|
"externalItemId": messageId,
|
||||||
|
|
@ -420,6 +452,7 @@ async def _ingestMessage(
|
||||||
parentSyntheticId=syntheticId,
|
parentSyntheticId=syntheticId,
|
||||||
limits=limits,
|
limits=limits,
|
||||||
result=result,
|
result=result,
|
||||||
|
dataSourceId=dataSourceId,
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("gmail attachments %s failed: %s", messageId, exc)
|
logger.warning("gmail attachments %s failed: %s", messageId, exc)
|
||||||
|
|
@ -461,6 +494,7 @@ async def _ingestAttachments(
|
||||||
parentSyntheticId: str,
|
parentSyntheticId: str,
|
||||||
limits: GmailBootstrapLimits,
|
limits: GmailBootstrapLimits,
|
||||||
result: GmailBootstrapResult,
|
result: GmailBootstrapResult,
|
||||||
|
dataSourceId: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Child ingestion jobs for file attachments. Skips inline images (cid: refs)."""
|
"""Child ingestion jobs for file attachments. Skips inline images (cid: refs)."""
|
||||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
@ -561,6 +595,7 @@ async def _ingestAttachments(
|
||||||
contentObjects=contentObjects,
|
contentObjects=contentObjects,
|
||||||
provenance={
|
provenance={
|
||||||
"connectionId": connectionId,
|
"connectionId": connectionId,
|
||||||
|
"dataSourceId": dataSourceId,
|
||||||
"authority": "google",
|
"authority": "google",
|
||||||
"service": "gmail",
|
"service": "gmail",
|
||||||
"parentId": parentSyntheticId,
|
"parentId": parentSyntheticId,
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ import hashlib
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any, Callable, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
|
from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
|
||||||
|
|
||||||
|
|
@ -139,34 +139,35 @@ def _buildContentObjects(
|
||||||
async def bootstrapOutlook(
|
async def bootstrapOutlook(
|
||||||
connectionId: str,
|
connectionId: str,
|
||||||
*,
|
*,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
dataSources: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
progressCb: Optional[Any] = None,
|
||||||
adapter: Any = None,
|
adapter: Any = None,
|
||||||
connection: Any = None,
|
connection: Any = None,
|
||||||
knowledgeService: Any = None,
|
knowledgeService: Any = None,
|
||||||
limits: Optional[OutlookBootstrapLimits] = None,
|
limits: Optional[OutlookBootstrapLimits] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Enumerate Outlook folders (inbox + sent by default) and ingest messages."""
|
"""Enumerate Outlook folders (inbox + sent by default) and ingest messages.
|
||||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
|
||||||
prefs = loadConnectionPrefs(connectionId)
|
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
|
||||||
|
Each DataSource defines the neutralize policy for its messages.
|
||||||
|
"""
|
||||||
|
if not dataSources:
|
||||||
|
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
|
||||||
|
|
||||||
if not limits:
|
if not limits:
|
||||||
limits = OutlookBootstrapLimits(
|
limits = OutlookBootstrapLimits()
|
||||||
includeAttachments=prefs.mailIndexAttachments,
|
|
||||||
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
|
||||||
mailContentDepth=prefs.mailContentDepth,
|
|
||||||
neutralize=prefs.neutralizeBeforeEmbed,
|
|
||||||
)
|
|
||||||
|
|
||||||
startMs = time.time()
|
startMs = time.time()
|
||||||
result = OutlookBootstrapResult(connectionId=connectionId)
|
result = OutlookBootstrapResult(connectionId=connectionId)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"ingestion.connection.bootstrap.started part=outlook connectionId=%s",
|
"ingestion.connection.bootstrap.started part=outlook connectionId=%s dataSources=%d",
|
||||||
connectionId,
|
connectionId, len(dataSources),
|
||||||
extra={
|
extra={
|
||||||
"event": "ingestion.connection.bootstrap.started",
|
"event": "ingestion.connection.bootstrap.started",
|
||||||
"part": "outlook",
|
"part": "outlook",
|
||||||
"connectionId": connectionId,
|
"connectionId": connectionId,
|
||||||
|
"dataSourceCount": len(dataSources),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -176,27 +177,52 @@ async def bootstrapOutlook(
|
||||||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||||
|
|
||||||
folderIds = await _selectFolderIds(adapter, limits)
|
cancelled = False
|
||||||
for folderId in folderIds:
|
for ds in dataSources:
|
||||||
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||||
break
|
break
|
||||||
try:
|
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
await _ingestFolder(
|
cancelled = True
|
||||||
adapter=adapter,
|
break
|
||||||
knowledgeService=knowledgeService,
|
|
||||||
connectionId=connectionId,
|
|
||||||
mandateId=mandateId,
|
|
||||||
userId=userId,
|
|
||||||
folderId=folderId,
|
|
||||||
limits=limits,
|
|
||||||
result=result,
|
|
||||||
progressCb=progressCb,
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
|
|
||||||
result.errors.append(f"folder({folderId}): {exc}")
|
|
||||||
|
|
||||||
return _finalizeResult(connectionId, result, startMs)
|
dsId = ds.get("id", "")
|
||||||
|
dsNeutralize = ds.get("neutralize", False)
|
||||||
|
dsLimits = OutlookBootstrapLimits(
|
||||||
|
maxMessages=limits.maxMessages,
|
||||||
|
maxFolders=limits.maxFolders,
|
||||||
|
maxBodyChars=limits.maxBodyChars,
|
||||||
|
includeAttachments=limits.includeAttachments,
|
||||||
|
maxAttachmentBytes=limits.maxAttachmentBytes,
|
||||||
|
maxAgeDays=limits.maxAgeDays,
|
||||||
|
mailContentDepth=limits.mailContentDepth,
|
||||||
|
neutralize=dsNeutralize,
|
||||||
|
)
|
||||||
|
|
||||||
|
folderIds = await _selectFolderIds(adapter, dsLimits)
|
||||||
|
for folderId in folderIds:
|
||||||
|
if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
await _ingestFolder(
|
||||||
|
adapter=adapter,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
folderId=folderId,
|
||||||
|
limits=dsLimits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
dataSourceId=dsId,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
|
||||||
|
result.errors.append(f"folder({folderId}): {exc}")
|
||||||
|
|
||||||
|
finalResult = _finalizeResult(connectionId, result, startMs)
|
||||||
|
if cancelled:
|
||||||
|
finalResult["cancelled"] = True
|
||||||
|
return finalResult
|
||||||
|
|
||||||
|
|
||||||
async def _resolveDependencies(connectionId: str):
|
async def _resolveDependencies(connectionId: str):
|
||||||
|
|
@ -266,8 +292,12 @@ async def _ingestFolder(
|
||||||
folderId: str,
|
folderId: str,
|
||||||
limits: OutlookBootstrapLimits,
|
limits: OutlookBootstrapLimits,
|
||||||
result: OutlookBootstrapResult,
|
result: OutlookBootstrapResult,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
progressCb: Optional[Any],
|
||||||
|
dataSourceId: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
|
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
|
return
|
||||||
|
|
||||||
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
|
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
|
||||||
if remaining <= 0:
|
if remaining <= 0:
|
||||||
return
|
return
|
||||||
|
|
@ -307,6 +337,8 @@ async def _ingestFolder(
|
||||||
for message in page.get("value", []) or []:
|
for message in page.get("value", []) or []:
|
||||||
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||||
break
|
break
|
||||||
|
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||||
|
return
|
||||||
await _ingestMessage(
|
await _ingestMessage(
|
||||||
adapter=adapter,
|
adapter=adapter,
|
||||||
knowledgeService=knowledgeService,
|
knowledgeService=knowledgeService,
|
||||||
|
|
@ -317,6 +349,7 @@ async def _ingestFolder(
|
||||||
limits=limits,
|
limits=limits,
|
||||||
result=result,
|
result=result,
|
||||||
progressCb=progressCb,
|
progressCb=progressCb,
|
||||||
|
dataSourceId=dataSourceId,
|
||||||
)
|
)
|
||||||
|
|
||||||
nextLink = page.get("@odata.nextLink")
|
nextLink = page.get("@odata.nextLink")
|
||||||
|
|
@ -338,7 +371,8 @@ async def _ingestMessage(
|
||||||
message: Dict[str, Any],
|
message: Dict[str, Any],
|
||||||
limits: OutlookBootstrapLimits,
|
limits: OutlookBootstrapLimits,
|
||||||
result: OutlookBootstrapResult,
|
result: OutlookBootstrapResult,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
progressCb: Optional[Any],
|
||||||
|
dataSourceId: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
||||||
|
|
@ -369,6 +403,7 @@ async def _ingestMessage(
|
||||||
neutralize=limits.neutralize,
|
neutralize=limits.neutralize,
|
||||||
provenance={
|
provenance={
|
||||||
"connectionId": connectionId,
|
"connectionId": connectionId,
|
||||||
|
"dataSourceId": dataSourceId,
|
||||||
"authority": "msft",
|
"authority": "msft",
|
||||||
"service": "outlook",
|
"service": "outlook",
|
||||||
"externalItemId": messageId,
|
"externalItemId": messageId,
|
||||||
|
|
@ -402,6 +437,7 @@ async def _ingestMessage(
|
||||||
parentSyntheticId=syntheticId,
|
parentSyntheticId=syntheticId,
|
||||||
limits=limits,
|
limits=limits,
|
||||||
result=result,
|
result=result,
|
||||||
|
dataSourceId=dataSourceId,
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("outlook attachments %s failed: %s", messageId, exc)
|
logger.warning("outlook attachments %s failed: %s", messageId, exc)
|
||||||
|
|
@ -443,6 +479,7 @@ async def _ingestAttachments(
|
||||||
parentSyntheticId: str,
|
parentSyntheticId: str,
|
||||||
limits: OutlookBootstrapLimits,
|
limits: OutlookBootstrapLimits,
|
||||||
result: OutlookBootstrapResult,
|
result: OutlookBootstrapResult,
|
||||||
|
dataSourceId: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Child ingestion jobs for file attachments (skip inline & oversized)."""
|
"""Child ingestion jobs for file attachments (skip inline & oversized)."""
|
||||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
@ -531,6 +568,7 @@ async def _ingestAttachments(
|
||||||
neutralize=limits.neutralize,
|
neutralize=limits.neutralize,
|
||||||
provenance={
|
provenance={
|
||||||
"connectionId": connectionId,
|
"connectionId": connectionId,
|
||||||
|
"dataSourceId": dataSourceId,
|
||||||
"authority": "msft",
|
"authority": "msft",
|
||||||
"service": "outlook",
|
"service": "outlook",
|
||||||
"parentId": parentSyntheticId,
|
"parentId": parentSyntheticId,
|
||||||
|
|
|
||||||
|
|
@ -94,35 +94,36 @@ def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
|
||||||
async def bootstrapSharepoint(
|
async def bootstrapSharepoint(
|
||||||
connectionId: str,
|
connectionId: str,
|
||||||
*,
|
*,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
dataSources: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
progressCb: Optional[Any] = None,
|
||||||
adapter: Any = None,
|
adapter: Any = None,
|
||||||
connection: Any = None,
|
connection: Any = None,
|
||||||
knowledgeService: Any = None,
|
knowledgeService: Any = None,
|
||||||
limits: Optional[SharepointBootstrapLimits] = None,
|
limits: Optional[SharepointBootstrapLimits] = None,
|
||||||
runExtractionFn: Optional[Callable[..., Any]] = None,
|
runExtractionFn: Optional[Callable[..., Any]] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Enumerate SharePoint drives and ingest every reachable file via the façade.
|
"""Enumerate SharePoint drives and ingest files via the facade.
|
||||||
|
|
||||||
Parameters allow injection for tests; production callers pass only
|
Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
|
||||||
`connectionId` (and optionally a progressCb) and everything else is
|
Each DataSource defines the root path + neutralize policy for its subtree.
|
||||||
resolved against the registered services.
|
|
||||||
"""
|
"""
|
||||||
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
if not dataSources:
|
||||||
prefs = loadConnectionPrefs(connectionId)
|
return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
|
||||||
|
|
||||||
if not limits:
|
if not limits:
|
||||||
limits = SharepointBootstrapLimits(neutralize=prefs.neutralizeBeforeEmbed)
|
limits = SharepointBootstrapLimits()
|
||||||
|
|
||||||
startMs = time.time()
|
startMs = time.time()
|
||||||
result = SharepointBootstrapResult(connectionId=connectionId)
|
result = SharepointBootstrapResult(connectionId=connectionId)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"ingestion.connection.bootstrap.started part=sharepoint connectionId=%s",
|
"ingestion.connection.bootstrap.started part=sharepoint connectionId=%s dataSources=%d",
|
||||||
connectionId,
|
connectionId, len(dataSources),
|
||||||
extra={
|
extra={
|
||||||
"event": "ingestion.connection.bootstrap.started",
|
"event": "ingestion.connection.bootstrap.started",
|
||||||
"part": "sharepoint",
|
"part": "sharepoint",
|
||||||
"connectionId": connectionId,
|
"connectionId": connectionId,
|
||||||
|
"dataSourceCount": len(dataSources),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -142,17 +143,27 @@ async def bootstrapSharepoint(
|
||||||
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||||
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||||
|
|
||||||
try:
|
cancelled = False
|
||||||
sites = await adapter.browse("/", limit=limits.maxSites)
|
for ds in dataSources:
|
||||||
except Exception as exc:
|
|
||||||
logger.error("sharepoint site discovery failed for %s: %s", connectionId, exc, exc_info=True)
|
|
||||||
result.errors.append(f"site_discovery: {exc}")
|
|
||||||
return _finalizeResult(connectionId, result, startMs)
|
|
||||||
|
|
||||||
for site in sites[: limits.maxSites]:
|
|
||||||
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||||
break
|
break
|
||||||
sitePath = getattr(site, "path", "") or ""
|
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
|
cancelled = True
|
||||||
|
break
|
||||||
|
|
||||||
|
dsPath = ds.get("path", "")
|
||||||
|
dsId = ds.get("id", "")
|
||||||
|
dsNeutralize = ds.get("neutralize", False)
|
||||||
|
dsLimits = SharepointBootstrapLimits(
|
||||||
|
maxItems=limits.maxItems,
|
||||||
|
maxBytes=limits.maxBytes,
|
||||||
|
maxFileSize=limits.maxFileSize,
|
||||||
|
skipMimePrefixes=limits.skipMimePrefixes,
|
||||||
|
maxDepth=limits.maxDepth,
|
||||||
|
maxSites=limits.maxSites,
|
||||||
|
neutralize=dsNeutralize,
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await _walkFolder(
|
await _walkFolder(
|
||||||
adapter=adapter,
|
adapter=adapter,
|
||||||
|
|
@ -161,17 +172,21 @@ async def bootstrapSharepoint(
|
||||||
connectionId=connectionId,
|
connectionId=connectionId,
|
||||||
mandateId=mandateId,
|
mandateId=mandateId,
|
||||||
userId=userId,
|
userId=userId,
|
||||||
folderPath=sitePath,
|
folderPath=dsPath,
|
||||||
depth=0,
|
depth=0,
|
||||||
limits=limits,
|
limits=dsLimits,
|
||||||
result=result,
|
result=result,
|
||||||
progressCb=progressCb,
|
progressCb=progressCb,
|
||||||
|
dataSourceId=dsId,
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.error("sharepoint walk failed for site %s: %s", sitePath, exc, exc_info=True)
|
logger.error("sharepoint walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
|
||||||
result.errors.append(f"walk({sitePath}): {exc}")
|
result.errors.append(f"walk({dsPath}): {exc}")
|
||||||
|
|
||||||
return _finalizeResult(connectionId, result, startMs)
|
finalResult = _finalizeResult(connectionId, result, startMs)
|
||||||
|
if cancelled:
|
||||||
|
finalResult["cancelled"] = True
|
||||||
|
return finalResult
|
||||||
|
|
||||||
|
|
||||||
async def _resolveDependencies(connectionId: str):
|
async def _resolveDependencies(connectionId: str):
|
||||||
|
|
@ -221,10 +236,13 @@ async def _walkFolder(
|
||||||
depth: int,
|
depth: int,
|
||||||
limits: SharepointBootstrapLimits,
|
limits: SharepointBootstrapLimits,
|
||||||
result: SharepointBootstrapResult,
|
result: SharepointBootstrapResult,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
progressCb: Optional[Any],
|
||||||
|
dataSourceId: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
if depth > limits.maxDepth:
|
if depth > limits.maxDepth:
|
||||||
return
|
return
|
||||||
|
if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
entries = await adapter.browse(folderPath)
|
entries = await adapter.browse(folderPath)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
|
@ -237,6 +255,8 @@ async def _walkFolder(
|
||||||
return
|
return
|
||||||
if result.bytesProcessed >= limits.maxBytes:
|
if result.bytesProcessed >= limits.maxBytes:
|
||||||
return
|
return
|
||||||
|
if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
|
||||||
|
return
|
||||||
|
|
||||||
entryPath = getattr(entry, "path", "") or ""
|
entryPath = getattr(entry, "path", "") or ""
|
||||||
if getattr(entry, "isFolder", False):
|
if getattr(entry, "isFolder", False):
|
||||||
|
|
@ -252,6 +272,7 @@ async def _walkFolder(
|
||||||
limits=limits,
|
limits=limits,
|
||||||
result=result,
|
result=result,
|
||||||
progressCb=progressCb,
|
progressCb=progressCb,
|
||||||
|
dataSourceId=dataSourceId,
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -283,6 +304,7 @@ async def _walkFolder(
|
||||||
limits=limits,
|
limits=limits,
|
||||||
result=result,
|
result=result,
|
||||||
progressCb=progressCb,
|
progressCb=progressCb,
|
||||||
|
dataSourceId=dataSourceId,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -301,7 +323,8 @@ async def _ingestOne(
|
||||||
revision: Optional[str],
|
revision: Optional[str],
|
||||||
limits: SharepointBootstrapLimits,
|
limits: SharepointBootstrapLimits,
|
||||||
result: SharepointBootstrapResult,
|
result: SharepointBootstrapResult,
|
||||||
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
progressCb: Optional[Any],
|
||||||
|
dataSourceId: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
||||||
|
|
@ -339,6 +362,7 @@ async def _ingestOne(
|
||||||
|
|
||||||
provenance: Dict[str, Any] = {
|
provenance: Dict[str, Any] = {
|
||||||
"connectionId": connectionId,
|
"connectionId": connectionId,
|
||||||
|
"dataSourceId": dataSourceId,
|
||||||
"authority": "msft",
|
"authority": "msft",
|
||||||
"service": "sharepoint",
|
"service": "sharepoint",
|
||||||
"externalItemId": externalItemId,
|
"externalItemId": externalItemId,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,78 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Resolve effective policies (neutralize, ragIndexEnabled) for DataSource tree hierarchies.
|
||||||
|
|
||||||
|
Tree-inheritance rule: nearest ancestor DataSource with an explicit value wins.
|
||||||
|
If no ancestor has a value, the default (False) is used.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def resolveEffectiveNeutralize(
|
||||||
|
ds: Dict[str, Any],
|
||||||
|
allDataSources: List[Dict[str, Any]],
|
||||||
|
) -> bool:
|
||||||
|
"""Compute effective neutralize by walking up the path tree.
|
||||||
|
|
||||||
|
A DataSource at /sites/HR/Documents inherits from /sites/HR if
|
||||||
|
that ancestor has neutralize=True and the child has no explicit override.
|
||||||
|
"""
|
||||||
|
ownValue = ds.get("neutralize")
|
||||||
|
if ownValue is not None and ownValue is not False:
|
||||||
|
return True
|
||||||
|
if ownValue is False:
|
||||||
|
return False
|
||||||
|
return _findAncestorPolicy(ds, allDataSources, "neutralize")
|
||||||
|
|
||||||
|
|
||||||
|
def resolveEffectiveRagIndexEnabled(
|
||||||
|
ds: Dict[str, Any],
|
||||||
|
allDataSources: List[Dict[str, Any]],
|
||||||
|
) -> bool:
|
||||||
|
"""Compute effective ragIndexEnabled by walking up the path tree."""
|
||||||
|
ownValue = ds.get("ragIndexEnabled")
|
||||||
|
if ownValue is True:
|
||||||
|
return True
|
||||||
|
if ownValue is False:
|
||||||
|
return False
|
||||||
|
return _findAncestorPolicy(ds, allDataSources, "ragIndexEnabled")
|
||||||
|
|
||||||
|
|
||||||
|
def _findAncestorPolicy(
|
||||||
|
ds: Dict[str, Any],
|
||||||
|
allDataSources: List[Dict[str, Any]],
|
||||||
|
field: str,
|
||||||
|
) -> bool:
|
||||||
|
"""Walk ancestors (longest-prefix match) to find an inherited policy value."""
|
||||||
|
dsPath = ds.get("path", "")
|
||||||
|
connectionId = ds.get("connectionId", "")
|
||||||
|
if not dsPath:
|
||||||
|
return False
|
||||||
|
|
||||||
|
ancestors = []
|
||||||
|
for candidate in allDataSources:
|
||||||
|
if candidate.get("id") == ds.get("id"):
|
||||||
|
continue
|
||||||
|
if candidate.get("connectionId") != connectionId:
|
||||||
|
continue
|
||||||
|
candidatePath = candidate.get("path", "")
|
||||||
|
if not candidatePath:
|
||||||
|
continue
|
||||||
|
if dsPath.startswith(candidatePath) and len(candidatePath) < len(dsPath):
|
||||||
|
ancestors.append(candidate)
|
||||||
|
|
||||||
|
ancestors.sort(key=lambda a: len(a.get("path", "")), reverse=True)
|
||||||
|
|
||||||
|
for ancestor in ancestors:
|
||||||
|
val = ancestor.get(field)
|
||||||
|
if val is True:
|
||||||
|
return True
|
||||||
|
if val is False:
|
||||||
|
return False
|
||||||
|
return False
|
||||||
|
|
@ -98,7 +98,8 @@ class WebService:
|
||||||
searchUrls = []
|
searchUrls = []
|
||||||
searchResultsWithContent = []
|
searchResultsWithContent = []
|
||||||
if needsSearch and (not allUrls or len(allUrls) < maxNumberPages):
|
if needsSearch and (not allUrls or len(allUrls) < maxNumberPages):
|
||||||
self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content")
|
if operationId:
|
||||||
|
self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
searchUrls, searchResultsWithContent = await self._performWebSearch(
|
searchUrls, searchResultsWithContent = await self._performWebSearch(
|
||||||
|
|
@ -113,16 +114,14 @@ class WebService:
|
||||||
searchUrls = []
|
searchUrls = []
|
||||||
searchResultsWithContent = []
|
searchResultsWithContent = []
|
||||||
|
|
||||||
# Prioritize Tavily search URLs over AI-extracted URLs (they're more relevant)
|
|
||||||
if searchUrls:
|
if searchUrls:
|
||||||
# Prepend Tavily URLs to the list (they're more relevant)
|
|
||||||
allUrls = searchUrls + allUrls
|
allUrls = searchUrls + allUrls
|
||||||
logger.info(f"Using {len(searchUrls)} Tavily URLs + {len(allUrls) - len(searchUrls)} other URLs = {len(allUrls)} total")
|
logger.info(f"Using {len(searchUrls)} Tavily URLs + {len(allUrls) - len(searchUrls)} other URLs = {len(allUrls)} total")
|
||||||
else:
|
else:
|
||||||
# If Tavily search failed, use AI-extracted URLs
|
|
||||||
logger.warning("Tavily search returned no URLs, using AI-extracted URLs only")
|
logger.warning("Tavily search returned no URLs, using AI-extracted URLs only")
|
||||||
|
|
||||||
self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
|
if operationId:
|
||||||
|
self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
|
||||||
|
|
||||||
# If we have search results (even without content), use them directly instead of crawling
|
# If we have search results (even without content), use them directly instead of crawling
|
||||||
# Tavily search results are more relevant than generic AI-extracted URLs
|
# Tavily search results are more relevant than generic AI-extracted URLs
|
||||||
|
|
|
||||||
|
|
@ -144,6 +144,14 @@ NAVIGATION_SECTIONS = [
|
||||||
"path": "/automations",
|
"path": "/automations",
|
||||||
"order": 30,
|
"order": 30,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": "rag-inventory",
|
||||||
|
"objectKey": "ui.system.ragInventory",
|
||||||
|
"label": t("RAG-Inventar"),
|
||||||
|
"icon": "FaDatabase",
|
||||||
|
"path": "/rag-inventory",
|
||||||
|
"order": 35,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "store",
|
"id": "store",
|
||||||
"objectKey": "ui.system.store",
|
"objectKey": "ui.system.store",
|
||||||
|
|
|
||||||
|
|
@ -110,6 +110,9 @@ asyncpg==0.30.0
|
||||||
## Stripe payments
|
## Stripe payments
|
||||||
stripe>=11.0.0
|
stripe>=11.0.0
|
||||||
|
|
||||||
|
## Outlook MSG file extraction
|
||||||
|
extract-msg>=0.55.0
|
||||||
|
|
||||||
## Geospatial libraries for STAC connector
|
## Geospatial libraries for STAC connector
|
||||||
pyproj>=3.6.0 # For coordinate transformations (EPSG:2056 <-> EPSG:4326)
|
pyproj>=3.6.0 # For coordinate transformations (EPSG:2056 <-> EPSG:4326)
|
||||||
shapely>=2.0.0 # For geometric operations (intersections, area calculations)
|
shapely>=2.0.0 # For geometric operations (intersections, area calculations)
|
||||||
|
|
|
||||||
88
scripts/script_db_migrate_datasource_rag.py
Normal file
88
scripts/script_db_migrate_datasource_rag.py
Normal file
|
|
@ -0,0 +1,88 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Migration: Rename DataSource.autoSync -> ragIndexEnabled, lastSynced -> lastIndexed.
|
||||||
|
|
||||||
|
This is a one-off migration for the RAG consent & control unification.
|
||||||
|
Safe to run multiple times (checks column existence before acting).
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python script_db_migrate_datasource_rag.py [--dry-run]
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
scriptPath = Path(__file__).resolve()
|
||||||
|
gatewayPath = scriptPath.parent.parent
|
||||||
|
sys.path.insert(0, str(gatewayPath))
|
||||||
|
os.chdir(str(gatewayPath))
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
|
|
||||||
|
def _getConnection():
|
||||||
|
return psycopg2.connect(
|
||||||
|
host=APP_CONFIG.get("DB_HOST", "localhost"),
|
||||||
|
port=int(APP_CONFIG.get("DB_PORT", "5432")),
|
||||||
|
database=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
|
||||||
|
user=APP_CONFIG.get("DB_USER"),
|
||||||
|
password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _columnExists(cur, table: str, column: str) -> bool:
|
||||||
|
cur.execute(
|
||||||
|
"""SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_schema = 'public' AND table_name = %s AND column_name = %s""",
|
||||||
|
(table, column),
|
||||||
|
)
|
||||||
|
return cur.fetchone() is not None
|
||||||
|
|
||||||
|
|
||||||
|
def migrate(dryRun: bool = False):
|
||||||
|
conn = _getConnection()
|
||||||
|
conn.autocommit = False
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
renames = [
|
||||||
|
("DataSource", "autoSync", "ragIndexEnabled"),
|
||||||
|
("DataSource", "lastSynced", "lastIndexed"),
|
||||||
|
]
|
||||||
|
|
||||||
|
executed = []
|
||||||
|
for table, oldCol, newCol in renames:
|
||||||
|
if _columnExists(cur, table, oldCol) and not _columnExists(cur, table, newCol):
|
||||||
|
sql = f'ALTER TABLE public."{table}" RENAME COLUMN "{oldCol}" TO "{newCol}";'
|
||||||
|
logger.info("EXEC: %s", sql)
|
||||||
|
if not dryRun:
|
||||||
|
cur.execute(sql)
|
||||||
|
executed.append(sql)
|
||||||
|
elif _columnExists(cur, table, newCol):
|
||||||
|
logger.info("SKIP: %s.%s already exists (migration already applied)", table, newCol)
|
||||||
|
elif not _columnExists(cur, table, oldCol):
|
||||||
|
logger.warning("SKIP: %s.%s does not exist (table schema may differ)", table, oldCol)
|
||||||
|
|
||||||
|
if not dryRun and executed:
|
||||||
|
conn.commit()
|
||||||
|
logger.info("Migration committed (%d statements)", len(executed))
|
||||||
|
elif dryRun and executed:
|
||||||
|
conn.rollback()
|
||||||
|
logger.info("DRY RUN — would execute %d statements", len(executed))
|
||||||
|
else:
|
||||||
|
logger.info("Nothing to do — schema already up to date")
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="Print SQL without executing")
|
||||||
|
args = parser.parse_args()
|
||||||
|
migrate(dryRun=args.dry_run)
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
"""Unit tests for Google STT helper config (no API calls)."""
|
||||||
|
|
||||||
|
from modules.connectors.connectorVoiceGoogle import _buildPrimarySttRecognitionFields
|
||||||
|
|
||||||
|
|
||||||
|
def test_buildPrimaryStt_lightweight_stripsHeavyFeatures():
|
||||||
|
d = _buildPrimarySttRecognitionFields(model="latest_short", lightweight=True)
|
||||||
|
assert d["model"] == "latest_short"
|
||||||
|
assert d["enable_word_time_offsets"] is False
|
||||||
|
assert d["enable_word_confidence"] is False
|
||||||
|
assert d["max_alternatives"] == 1
|
||||||
|
assert d["use_enhanced"] is False
|
||||||
|
assert d["enable_automatic_punctuation"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_buildPrimaryStt_full_matchesLegacyDefaults():
|
||||||
|
d = _buildPrimarySttRecognitionFields(model="latest_long", lightweight=False)
|
||||||
|
assert d["model"] == "latest_long"
|
||||||
|
assert d["enable_word_time_offsets"] is True
|
||||||
|
assert d["enable_word_confidence"] is True
|
||||||
|
assert d["max_alternatives"] == 3
|
||||||
|
assert d["use_enhanced"] is True
|
||||||
|
|
@ -100,6 +100,9 @@ def _adapter(svc):
|
||||||
return SimpleNamespace(_svc=svc)
|
return SimpleNamespace(_svc=svc)
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_DS = [{"id": "ds-1", "neutralize": False}]
|
||||||
|
|
||||||
|
|
||||||
def test_bootstrap_walks_team_space_lists_and_tasks():
|
def test_bootstrap_walks_team_space_lists_and_tasks():
|
||||||
svc = _FakeClickupService(taskCount=2)
|
svc = _FakeClickupService(taskCount=2)
|
||||||
knowledge = _FakeKnowledgeService()
|
knowledge = _FakeKnowledgeService()
|
||||||
|
|
@ -108,6 +111,7 @@ def test_bootstrap_walks_team_space_lists_and_tasks():
|
||||||
async def _run():
|
async def _run():
|
||||||
return await bootstrapClickup(
|
return await bootstrapClickup(
|
||||||
connectionId="c1",
|
connectionId="c1",
|
||||||
|
dataSources=_DEFAULT_DS,
|
||||||
adapter=_adapter(svc),
|
adapter=_adapter(svc),
|
||||||
connection=connection,
|
connection=connection,
|
||||||
knowledgeService=knowledge,
|
knowledgeService=knowledge,
|
||||||
|
|
@ -126,10 +130,10 @@ def test_bootstrap_walks_team_space_lists_and_tasks():
|
||||||
assert job.mimeType == "application/vnd.clickup.task+json"
|
assert job.mimeType == "application/vnd.clickup.task+json"
|
||||||
assert job.mandateId == "m1"
|
assert job.mandateId == "m1"
|
||||||
assert job.provenance["connectionId"] == "c1"
|
assert job.provenance["connectionId"] == "c1"
|
||||||
|
assert job.provenance["dataSourceId"] == "ds-1"
|
||||||
assert job.provenance["authority"] == "clickup"
|
assert job.provenance["authority"] == "clickup"
|
||||||
assert job.provenance["teamId"] == "team-1"
|
assert job.provenance["teamId"] == "team-1"
|
||||||
assert job.contentVersion # numeric millisecond string
|
assert job.contentVersion # numeric millisecond string
|
||||||
# At least the header content-object is present.
|
|
||||||
ids = [co["contentObjectId"] for co in job.contentObjects]
|
ids = [co["contentObjectId"] for co in job.contentObjects]
|
||||||
assert "header" in ids
|
assert "header" in ids
|
||||||
|
|
||||||
|
|
@ -146,6 +150,7 @@ def test_bootstrap_reports_duplicates_on_second_run():
|
||||||
async def _run():
|
async def _run():
|
||||||
return await bootstrapClickup(
|
return await bootstrapClickup(
|
||||||
connectionId="c1",
|
connectionId="c1",
|
||||||
|
dataSources=_DEFAULT_DS,
|
||||||
adapter=_adapter(svc),
|
adapter=_adapter(svc),
|
||||||
connection=connection,
|
connection=connection,
|
||||||
knowledgeService=knowledge,
|
knowledgeService=knowledge,
|
||||||
|
|
@ -165,6 +170,7 @@ def test_bootstrap_skips_tasks_older_than_maxAgeDays():
|
||||||
async def _run():
|
async def _run():
|
||||||
return await bootstrapClickup(
|
return await bootstrapClickup(
|
||||||
connectionId="c1",
|
connectionId="c1",
|
||||||
|
dataSources=_DEFAULT_DS,
|
||||||
adapter=_adapter(svc),
|
adapter=_adapter(svc),
|
||||||
connection=connection,
|
connection=connection,
|
||||||
knowledgeService=knowledge,
|
knowledgeService=knowledge,
|
||||||
|
|
@ -185,6 +191,7 @@ def test_bootstrap_maxTasks_caps_ingestion():
|
||||||
async def _run():
|
async def _run():
|
||||||
return await bootstrapClickup(
|
return await bootstrapClickup(
|
||||||
connectionId="c1",
|
connectionId="c1",
|
||||||
|
dataSources=_DEFAULT_DS,
|
||||||
adapter=_adapter(svc),
|
adapter=_adapter(svc),
|
||||||
connection=connection,
|
connection=connection,
|
||||||
knowledgeService=knowledge,
|
knowledgeService=knowledge,
|
||||||
|
|
@ -195,9 +202,41 @@ def test_bootstrap_maxTasks_caps_ingestion():
|
||||||
assert result["indexed"] == 3
|
assert result["indexed"] == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_skips_when_no_datasources():
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapClickup(connectionId="c1")
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["skipped"] is True
|
||||||
|
assert result["reason"] == "no_datasources"
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_honours_datasource_neutralize():
|
||||||
|
svc = _FakeClickupService(taskCount=1)
|
||||||
|
knowledge = _FakeKnowledgeService()
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapClickup(
|
||||||
|
connectionId="c1",
|
||||||
|
dataSources=[{"id": "ds-n", "neutralize": True}],
|
||||||
|
adapter=_adapter(svc),
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
limits=ClickupBootstrapLimits(maxAgeDays=None),
|
||||||
|
)
|
||||||
|
|
||||||
|
asyncio.run(_run())
|
||||||
|
for job in knowledge.calls:
|
||||||
|
assert job.neutralize is True
|
||||||
|
assert job.provenance["dataSourceId"] == "ds-n"
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_bootstrap_walks_team_space_lists_and_tasks()
|
test_bootstrap_walks_team_space_lists_and_tasks()
|
||||||
test_bootstrap_reports_duplicates_on_second_run()
|
test_bootstrap_reports_duplicates_on_second_run()
|
||||||
test_bootstrap_skips_tasks_older_than_maxAgeDays()
|
test_bootstrap_skips_tasks_older_than_maxAgeDays()
|
||||||
test_bootstrap_maxTasks_caps_ingestion()
|
test_bootstrap_maxTasks_caps_ingestion()
|
||||||
|
test_bootstrap_skips_when_no_datasources()
|
||||||
|
test_bootstrap_honours_datasource_neutralize()
|
||||||
print("OK — bootstrapClickup tests passed")
|
print("OK — bootstrapClickup tests passed")
|
||||||
|
|
|
||||||
|
|
@ -119,6 +119,9 @@ def _fakeRunExtraction(data, name, mime, options):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_DS = [{"id": "ds1", "path": "/", "neutralize": False}]
|
||||||
|
|
||||||
|
|
||||||
def test_bootstrap_walks_drive_and_subfolders():
|
def test_bootstrap_walks_drive_and_subfolders():
|
||||||
adapter = _FakeDriveAdapter()
|
adapter = _FakeDriveAdapter()
|
||||||
knowledge = _FakeKnowledgeService()
|
knowledge = _FakeKnowledgeService()
|
||||||
|
|
@ -127,6 +130,7 @@ def test_bootstrap_walks_drive_and_subfolders():
|
||||||
async def _run():
|
async def _run():
|
||||||
return await bootstrapGdrive(
|
return await bootstrapGdrive(
|
||||||
connectionId="c1",
|
connectionId="c1",
|
||||||
|
dataSources=_DEFAULT_DS,
|
||||||
adapter=adapter,
|
adapter=adapter,
|
||||||
connection=connection,
|
connection=connection,
|
||||||
knowledgeService=knowledge,
|
knowledgeService=knowledge,
|
||||||
|
|
@ -160,6 +164,7 @@ def test_bootstrap_reports_duplicates_on_second_run():
|
||||||
async def _run():
|
async def _run():
|
||||||
return await bootstrapGdrive(
|
return await bootstrapGdrive(
|
||||||
connectionId="c1",
|
connectionId="c1",
|
||||||
|
dataSources=_DEFAULT_DS,
|
||||||
adapter=adapter,
|
adapter=adapter,
|
||||||
connection=connection,
|
connection=connection,
|
||||||
knowledgeService=knowledge,
|
knowledgeService=knowledge,
|
||||||
|
|
@ -180,11 +185,11 @@ def test_bootstrap_skips_files_older_than_maxAgeDays():
|
||||||
async def _run():
|
async def _run():
|
||||||
return await bootstrapGdrive(
|
return await bootstrapGdrive(
|
||||||
connectionId="c1",
|
connectionId="c1",
|
||||||
|
dataSources=[{"id": "ds1", "path": "/", "neutralize": False, "maxAgeDays": 180}],
|
||||||
adapter=adapter,
|
adapter=adapter,
|
||||||
connection=connection,
|
connection=connection,
|
||||||
knowledgeService=knowledge,
|
knowledgeService=knowledge,
|
||||||
runExtractionFn=_fakeRunExtraction,
|
runExtractionFn=_fakeRunExtraction,
|
||||||
limits=GdriveBootstrapLimits(maxAgeDays=180),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
result = asyncio.run(_run())
|
result = asyncio.run(_run())
|
||||||
|
|
@ -200,6 +205,7 @@ def test_bootstrap_passes_connection_provenance():
|
||||||
async def _run():
|
async def _run():
|
||||||
return await bootstrapGdrive(
|
return await bootstrapGdrive(
|
||||||
connectionId="c1",
|
connectionId="c1",
|
||||||
|
dataSources=_DEFAULT_DS,
|
||||||
adapter=adapter,
|
adapter=adapter,
|
||||||
connection=connection,
|
connection=connection,
|
||||||
knowledgeService=knowledge,
|
knowledgeService=knowledge,
|
||||||
|
|
@ -212,14 +218,25 @@ def test_bootstrap_passes_connection_provenance():
|
||||||
assert job.sourceKind == "gdrive_item"
|
assert job.sourceKind == "gdrive_item"
|
||||||
assert job.mandateId == "m1"
|
assert job.mandateId == "m1"
|
||||||
assert job.provenance["connectionId"] == "c1"
|
assert job.provenance["connectionId"] == "c1"
|
||||||
|
assert job.provenance["dataSourceId"] == "ds1"
|
||||||
assert job.provenance["authority"] == "google"
|
assert job.provenance["authority"] == "google"
|
||||||
assert job.provenance["service"] == "drive"
|
assert job.provenance["service"] == "drive"
|
||||||
assert job.contentVersion # modifiedTime ISO string
|
assert job.contentVersion # modifiedTime ISO string
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_skips_when_no_datasources():
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapGdrive(connectionId="c1")
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["skipped"] is True
|
||||||
|
assert result["reason"] == "no_datasources"
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_bootstrap_walks_drive_and_subfolders()
|
test_bootstrap_walks_drive_and_subfolders()
|
||||||
test_bootstrap_reports_duplicates_on_second_run()
|
test_bootstrap_reports_duplicates_on_second_run()
|
||||||
test_bootstrap_skips_files_older_than_maxAgeDays()
|
test_bootstrap_skips_files_older_than_maxAgeDays()
|
||||||
test_bootstrap_passes_connection_provenance()
|
test_bootstrap_passes_connection_provenance()
|
||||||
|
test_bootstrap_skips_when_no_datasources()
|
||||||
print("OK — bootstrapGdrive tests passed")
|
print("OK — bootstrapGdrive tests passed")
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
|
||||||
async def _run():
|
async def _run():
|
||||||
return await bootstrapOutlook(
|
return await bootstrapOutlook(
|
||||||
connectionId="c1",
|
connectionId="c1",
|
||||||
|
dataSources=[{"id": "ds1", "neutralize": False}],
|
||||||
adapter=adapter,
|
adapter=adapter,
|
||||||
connection=connection,
|
connection=connection,
|
||||||
knowledgeService=knowledge,
|
knowledgeService=knowledge,
|
||||||
|
|
@ -129,6 +130,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
|
||||||
assert job.sourceKind == "outlook_message"
|
assert job.sourceKind == "outlook_message"
|
||||||
assert job.mimeType == "message/rfc822"
|
assert job.mimeType == "message/rfc822"
|
||||||
assert job.provenance["connectionId"] == "c1"
|
assert job.provenance["connectionId"] == "c1"
|
||||||
|
assert job.provenance["dataSourceId"] == "ds1"
|
||||||
assert job.provenance["service"] == "outlook"
|
assert job.provenance["service"] == "outlook"
|
||||||
assert job.contentVersion == "ck1"
|
assert job.contentVersion == "ck1"
|
||||||
assert any(co["contentObjectId"] == "header" for co in job.contentObjects)
|
assert any(co["contentObjectId"] == "header" for co in job.contentObjects)
|
||||||
|
|
@ -146,6 +148,7 @@ def test_bootstrap_outlook_follows_pagination():
|
||||||
async def _run():
|
async def _run():
|
||||||
return await bootstrapOutlook(
|
return await bootstrapOutlook(
|
||||||
connectionId="c1",
|
connectionId="c1",
|
||||||
|
dataSources=[{"id": "ds1", "neutralize": False}],
|
||||||
adapter=adapter,
|
adapter=adapter,
|
||||||
connection=connection,
|
connection=connection,
|
||||||
knowledgeService=knowledge,
|
knowledgeService=knowledge,
|
||||||
|
|
@ -171,6 +174,7 @@ def test_bootstrap_outlook_reports_duplicates():
|
||||||
async def _run():
|
async def _run():
|
||||||
return await bootstrapOutlook(
|
return await bootstrapOutlook(
|
||||||
connectionId="c1",
|
connectionId="c1",
|
||||||
|
dataSources=[{"id": "ds1", "neutralize": False}],
|
||||||
adapter=adapter,
|
adapter=adapter,
|
||||||
connection=connection,
|
connection=connection,
|
||||||
knowledgeService=knowledge,
|
knowledgeService=knowledge,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue