Compare commits
15 commits
4d7ccb0418
...
ce671f61b6
| Author | SHA1 | Date | |
|---|---|---|---|
| ce671f61b6 | |||
| 4a840e9e6e | |||
| 93cb6939dc | |||
| 3add5c9a80 | |||
| 6a5ff1ff7c | |||
| dff3d41845 | |||
| a7f4055130 | |||
| 078b4eaaaf | |||
| 9d82d3d353 | |||
|
|
ba21005401 | ||
|
|
052647a52b | ||
|
|
49f3660d89 | ||
|
|
9816f13ae9 | ||
|
|
b405cebdec | ||
|
|
fb3a1f0a51 |
76 changed files with 9400 additions and 942 deletions
10
app.py
10
app.py
|
|
@ -405,6 +405,16 @@ async def lifespan(app: FastAPI):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"BackgroundJob recovery failed (non-critical): {e}")
|
logger.warning(f"BackgroundJob recovery failed (non-critical): {e}")
|
||||||
|
|
||||||
|
# Subscribe knowledge ingestion to connection lifecycle events so OAuth
|
||||||
|
# connect/disconnect reliably trigger bootstrap/purge.
|
||||||
|
try:
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorIngestConsumer import (
|
||||||
|
registerKnowledgeIngestionConsumer,
|
||||||
|
)
|
||||||
|
registerKnowledgeIngestionConsumer()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"KnowledgeIngestionConsumer registration failed (non-critical): {e}")
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
||||||
# --- Stop Managers ---
|
# --- Stop Managers ---
|
||||||
|
|
|
||||||
107
env_dev.20260428_213450.backup
Normal file
107
env_dev.20260428_213450.backup
Normal file
|
|
@ -0,0 +1,107 @@
|
||||||
|
# Development Environment Configuration
|
||||||
|
|
||||||
|
# System Configuration
|
||||||
|
APP_ENV_TYPE = dev
|
||||||
|
APP_ENV_LABEL = Development Instance Patrick
|
||||||
|
APP_API_URL = http://localhost:8000
|
||||||
|
APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/notes/key.txt
|
||||||
|
APP_INIT_PASS_ADMIN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEeFFtRGtQeVUtcjlrU3dab1ZxUm9WSks0MlJVYUtERFlqUElHemZrOGNENk1tcmJNX3Vxc01UMDhlNU40VzZZRVBpUGNmT3podzZrOGhOeEJIUEt4eVlSWG5UYXA3d09DVXlLT21Kb1JYSUU9
|
||||||
|
APP_INIT_PASS_EVENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERzZjNm56WGVBdjJTeG5Udjd6OGQwUVotYXUzQjJ1YVNyVXVBa3NZVml3ODU0MVNkZjhWWmJwNUFkc19BcHlHMTU1Q3BRcHU0cDBoZkFlR2l6UEZQU3d2U3MtMDh5UDZteGFoQ0EyMUE1ckE9
|
||||||
|
|
||||||
|
# PostgreSQL DB Host
|
||||||
|
DB_HOST=localhost
|
||||||
|
DB_USER=poweron_dev
|
||||||
|
DB_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEcUIxNEFfQ2xnS0RrSC1KNnUxTlVvTGZoMHgzaEI4Z3NlVzVROTVLak5Ubi1vaEZubFZaMTFKMGd6MXAxekN2d2NvMy1hRjg2UVhybktlcFA5anZ1WjFlQmZhcXdwaGhWdzRDc3ExeUhzWTg9
|
||||||
|
DB_PORT=5432
|
||||||
|
|
||||||
|
# Security Configuration
|
||||||
|
APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2ZlUFRlcFdOZ001RnlzM2FhLWhRV2tjWWFhaWQwQ3hkcUFvbThMcndxSjFpYTdfRV9OZGhTcksxbXFTZWg5MDZvOHpCVXBHcDJYaHlJM0tyNWRZckZsVHpQcmxTZHJoZUs1M3lfU2ljRnJaTmNSQ0w0X085OXI0QW80M2xfQnJqZmZ6VEh3TUltX0xzeE42SGtZPQ==
|
||||||
|
APP_TOKEN_EXPIRY=300
|
||||||
|
|
||||||
|
# CORS Configuration
|
||||||
|
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron-center.net
|
||||||
|
|
||||||
|
# Logging configuration
|
||||||
|
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||||
|
APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs
|
||||||
|
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
|
||||||
|
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
|
||||||
|
APP_LOGGING_CONSOLE_ENABLED = True
|
||||||
|
APP_LOGGING_FILE_ENABLED = True
|
||||||
|
APP_LOGGING_ROTATION_SIZE = 10485760
|
||||||
|
APP_LOGGING_BACKUP_COUNT = 5
|
||||||
|
|
||||||
|
# OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP.
|
||||||
|
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||||
|
Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
|
||||||
|
Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback
|
||||||
|
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||||
|
Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
|
||||||
|
Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback
|
||||||
|
|
||||||
|
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||||
|
Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
|
||||||
|
Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback
|
||||||
|
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||||
|
Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
|
||||||
|
Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback
|
||||||
|
|
||||||
|
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||||
|
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
||||||
|
Service_CLICKUP_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd4ZWVBeHVtRnpIT0VBN0tSZDhLRmFmN05DOVBOelJtLWhkVnJDRVBqUkh3bDFTZFRWaWQ1cWowdGNLUk5IQzlGN1J6RFVCaW8zRnBwLVBnclJfdWgxV3pVRzFEV2lwcW5Rc19Xa1ROWXNJcUF0ajZaYUxOUXk0WHRsRmJLM25FaHV5T2IxdV92ZW1nRjhzaGpwU0l2Wm9FTkRnY2lJVjhuNHUwT29salAxYV8wPQ==
|
||||||
|
Service_CLICKUP_OAUTH_REDIRECT_URI = http://localhost:8000/api/clickup/auth/connect/callback
|
||||||
|
|
||||||
|
# Infomaniak OAuth -- Data App (kDrive + Mail)
|
||||||
|
Service_INFOMANIAK_DATA_CLIENT_ID = abd71a95-7c67-465a-b7ab-963cc5eccb4b
|
||||||
|
Service_INFOMANIAK_DATA_CLIENT_SECRET = jwaEZza0VnmAHA1vIQJcpaCC1O4ND6IS0mkQ0GGiVlmof7XHxUcl9YMl7TbtEINz
|
||||||
|
Service_INFOMANIAK_OAUTH_REDIRECT_URI = http://localhost:8000/api/infomaniak/auth/connect/callback
|
||||||
|
|
||||||
|
# Stripe Billing (both end with _SECRET for encryption script)
|
||||||
|
STRIPE_SECRET_KEY_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5aHNGejgzQmpTdmprdzQxR19KZkh3MlhYUTNseFN3WnlaWjh2SDZyalN6aU9xSktkbUQwUnZrVnlvbGVRQm4yZFdiRU5aSEk5WVJuUnR4VUwtTm9OVk1WWmJQeU5QaDdib0hfVWV5U1BfYTFXRmdoOWdnOWxkb3JFQmF3bm45UjFUVUxmWGtGRkFKUGd6bmhpQlFnaVI3Q2lLdDlsY1VESk1vOEM0ZFBJNW1qcVZ0N2tPYmRLNmVKajZ2M3o3S05lWnRRVG5LdkRseW4wQ3VjNHNQZTZUdz09
|
||||||
|
STRIPE_WEBHOOK_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5dDJMSHBrVk8wTzJhU2xzTTZCZWdvWmU2NGI2WklfRXRJZVUzaVYyOU9GLUZsalUwa2lPdEgtUHo0dVVvRDU1cy1saHJyU0Rxa2xQZjBuakExQzk3bmxBcU9WbEIxUEtpR1JoUFMxZG9ISGRZUXFhdFpSMGxvQUV3a0VLQllfUUtCOHZwTGdteV9rYTFOazBfSlN3ekNWblFpakJlZVlCTmNkWWQ4Sm01a1RCWTlnTlFHWVA0MkZYMlprUExrWFN2V0NVU1BTd1NKczFJbVo3VHpLdlc4UT09
|
||||||
|
STRIPE_API_VERSION = 2026-01-28.clover
|
||||||
|
STRIPE_AUTOMATIC_TAX_ENABLED = false
|
||||||
|
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
|
||||||
|
|
||||||
|
# AI configuration
|
||||||
|
Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQnBaSnM4TWFRRmxVQmNQblVIYmc1Y0Q3aW9zZUtDWlNWdGZjbFpncGp2NHN2QjkxMWxibUJnZDBId252MWk5TXN3Yk14ajFIdi1CTkx2ZWx2QzF5OFR6LUx5azQ3dnNLaXJBOHNxc0tlWmtZcTFVelF4eXBSM2JkbHd2eTM0VHNXdHNtVUprZWtPVzctNlJsZHNmM20tU1N6Q1Q2cHFYSi1tNlhZNDNabTVuaEVGWmIydEhadTcyMlBURmw2aUJxOF9GTzR0dTZiNGZfOFlHaVpPZ1A1LXhhOEFtN1J5TEVNNWtMcGpyNkMzSl8xRnZsaTF1WTZrOUZmb0cxVURjSGFLS2dIYTQyZEJtTm90bEYxVWxNNXVPdTVjaVhYbXhxT3JsVDM5VjZMVFZKSE1tZnM9
|
||||||
|
Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpENmFBWG16STFQUVZxNzZZRzRLYTA4X3lRanF1VkF4cU45OExNMzlsQmdISGFxTUxud1dXODBKcFhMVG9KNjdWVnlTTFFROVc3NDlsdlNHLUJXeG41NDBHaXhHR0VHVWl5UW9RNkVWbmlhakRKVW5pM0R4VHk0LUw0TV9LdkljNHdBLXJua21NQkl2b3l4UkVkMGN1YjBrMmJEeWtMay1jbmxrYWJNbUV0aktCXzU1djR2d2RSQXZORTNwcG92ZUVvVGMtQzQzTTVncEZTRGRtZUFIZWQ0dz09
|
||||||
|
Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5ZmdDZ3hrSElrMnQzNFAtel9wX191VjVzN2g1LWZoa0V1YklubEdmMEJDdEZiR1RWeVZrM3V3enBHX3p6WUtTS0kwYkFyVEF0Nm8zX05CelVQcFJUc0lwVW5iNFczc1p1WWJ2WFBmd0lpLUxxWndEeUh0b2hGUHVpN19vb19nMTBnV1A1VmNpWERVX05lQ29VS20wTjZ3PT0=
|
||||||
|
Connector_AiTavily_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQTdnUHMwd2pIaXNtMmtCTFREd0pyQXRKb1F5eGtHSnkyOGZiUnlBOFc0b3Vzcndrc3ViRm1nMDJIOEZKYWxqdWNkZGh5N0Z4R0JlQmxXSG5pVnJUR2VYckZhMWNMZ1FNeXJ3enJLVlpiblhOZTNleUg3ZzZyUzRZanFSeDlVMkI=
|
||||||
|
Connector_AiPrivateLlm_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGRHM5eFdUVmVZU1R1cHBwN1RlMUx4T0NlLTJLUFFVX3J2OElDWFpuZmJHVmp4Z3BNNWMwZUVVZUd2TFhRSjVmVkVlcFlVRWtybXh0ZHloZ01ZcnVvX195YjdlWVdEcjZSWFFTTlNBWUlaTlNoLWhqVFBIb0thVlBiaWhjYjFQOFY=
|
||||||
|
Connector_AiMistral_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGeEQxYUIxOHhia0JlQWpWQ2dWQWZzY3l6SWwyUnJoR1hRQWloX2lxb2lGNkc4UnA4U2tWNjJaYzB1d1hvNG9fWUp1N3V4OW9FMGhaWVhjSlVwWEc1X2loVDBSZDEtdHdfcTA5QkcxQTR4OHc4RkRzclJrU2d1RFZpNDJkRDRURlE=
|
||||||
|
|
||||||
|
Service_MSFT_TENANT_ID = common
|
||||||
|
|
||||||
|
# Google Cloud Speech Services configuration
|
||||||
|
Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0=
|
||||||
|
|
||||||
|
# Feature SyncDelta JIRA configuration
|
||||||
|
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0=
|
||||||
|
|
||||||
|
# Teamsbot Browser Bot Service
|
||||||
|
# For local testing: run the bot locally with `npm run dev` in service-teams-browser-bot
|
||||||
|
# The bot will connect back to localhost:8000 via WebSocket
|
||||||
|
TEAMSBOT_BROWSER_BOT_URL = http://localhost:4100
|
||||||
|
|
||||||
|
# Debug Configuration
|
||||||
|
APP_DEBUG_CHAT_WORKFLOW_ENABLED = True
|
||||||
|
APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug
|
||||||
|
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True
|
||||||
|
APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync
|
||||||
|
|
||||||
|
# Manadate Pre-Processing Servers
|
||||||
|
PREPROCESS_ALTHAUS_CHAT_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGbEphQ3ZUMlFMQ2EwSGpoSE9NNzRJNTJtaGk1N0RGakdIYnVVeVFHZmF5OXB3QTVWLVNaZk9wNkhfQkZWRnVwRGRxem9iRzJIWXdpX1NIN2FwSExfT3c9PQ==
|
||||||
|
|
||||||
|
# Preprocessor API Configuration
|
||||||
|
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
|
||||||
|
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
|
||||||
|
|
||||||
|
# Azure Communication Services Email Configuration
|
||||||
|
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||||
|
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||||
|
|
||||||
|
# Zurich WFS Parcels (dynamic map layer). Default: Stadt Zürich OGD. Override for full canton if wfs.zh.ch resolves.
|
||||||
|
# Connector_ZhWfsParcels_WFS_URL = https://wfs.zh.ch/av
|
||||||
|
# Connector_ZhWfsParcels_TYPENAMES = av_li_liegenschaften_a
|
||||||
|
|
||||||
|
|
@ -51,6 +51,8 @@ Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
||||||
Service_CLICKUP_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd4ZWVBeHVtRnpIT0VBN0tSZDhLRmFmN05DOVBOelJtLWhkVnJDRVBqUkh3bDFTZFRWaWQ1cWowdGNLUk5IQzlGN1J6RFVCaW8zRnBwLVBnclJfdWgxV3pVRzFEV2lwcW5Rc19Xa1ROWXNJcUF0ajZaYUxOUXk0WHRsRmJLM25FaHV5T2IxdV92ZW1nRjhzaGpwU0l2Wm9FTkRnY2lJVjhuNHUwT29salAxYV8wPQ==
|
Service_CLICKUP_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd4ZWVBeHVtRnpIT0VBN0tSZDhLRmFmN05DOVBOelJtLWhkVnJDRVBqUkh3bDFTZFRWaWQ1cWowdGNLUk5IQzlGN1J6RFVCaW8zRnBwLVBnclJfdWgxV3pVRzFEV2lwcW5Rc19Xa1ROWXNJcUF0ajZaYUxOUXk0WHRsRmJLM25FaHV5T2IxdV92ZW1nRjhzaGpwU0l2Wm9FTkRnY2lJVjhuNHUwT29salAxYV8wPQ==
|
||||||
Service_CLICKUP_OAUTH_REDIRECT_URI = http://localhost:8000/api/clickup/auth/connect/callback
|
Service_CLICKUP_OAUTH_REDIRECT_URI = http://localhost:8000/api/clickup/auth/connect/callback
|
||||||
|
|
||||||
|
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
|
||||||
|
|
||||||
# Stripe Billing (both end with _SECRET for encryption script)
|
# Stripe Billing (both end with _SECRET for encryption script)
|
||||||
STRIPE_SECRET_KEY_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5aHNGejgzQmpTdmprdzQxR19KZkh3MlhYUTNseFN3WnlaWjh2SDZyalN6aU9xSktkbUQwUnZrVnlvbGVRQm4yZFdiRU5aSEk5WVJuUnR4VUwtTm9OVk1WWmJQeU5QaDdib0hfVWV5U1BfYTFXRmdoOWdnOWxkb3JFQmF3bm45UjFUVUxmWGtGRkFKUGd6bmhpQlFnaVI3Q2lLdDlsY1VESk1vOEM0ZFBJNW1qcVZ0N2tPYmRLNmVKajZ2M3o3S05lWnRRVG5LdkRseW4wQ3VjNHNQZTZUdz09
|
STRIPE_SECRET_KEY_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5aHNGejgzQmpTdmprdzQxR19KZkh3MlhYUTNseFN3WnlaWjh2SDZyalN6aU9xSktkbUQwUnZrVnlvbGVRQm4yZFdiRU5aSEk5WVJuUnR4VUwtTm9OVk1WWmJQeU5QaDdib0hfVWV5U1BfYTFXRmdoOWdnOWxkb3JFQmF3bm45UjFUVUxmWGtGRkFKUGd6bmhpQlFnaVI3Q2lLdDlsY1VESk1vOEM0ZFBJNW1qcVZ0N2tPYmRLNmVKajZ2M3o3S05lWnRRVG5LdkRseW4wQ3VjNHNQZTZUdz09
|
||||||
STRIPE_WEBHOOK_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5dDJMSHBrVk8wTzJhU2xzTTZCZWdvWmU2NGI2WklfRXRJZVUzaVYyOU9GLUZsalUwa2lPdEgtUHo0dVVvRDU1cy1saHJyU0Rxa2xQZjBuakExQzk3bmxBcU9WbEIxUEtpR1JoUFMxZG9ISGRZUXFhdFpSMGxvQUV3a0VLQllfUUtCOHZwTGdteV9rYTFOazBfSlN3ekNWblFpakJlZVlCTmNkWWQ4Sm01a1RCWTlnTlFHWVA0MkZYMlprUExrWFN2V0NVU1BTd1NKczFJbVo3VHpLdlc4UT09
|
STRIPE_WEBHOOK_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5dDJMSHBrVk8wTzJhU2xzTTZCZWdvWmU2NGI2WklfRXRJZVUzaVYyOU9GLUZsalUwa2lPdEgtUHo0dVVvRDU1cy1saHJyU0Rxa2xQZjBuakExQzk3bmxBcU9WbEIxUEtpR1JoUFMxZG9ISGRZUXFhdFpSMGxvQUV3a0VLQllfUUtCOHZwTGdteV9rYTFOazBfSlN3ekNWblFpakJlZVlCTmNkWWQ4Sm01a1RCWTlnTlFHWVA0MkZYMlprUExrWFN2V0NVU1BTd1NKczFJbVo3VHpLdlc4UT09
|
||||||
|
|
|
||||||
100
env_int.20260428_213451.backup
Normal file
100
env_int.20260428_213451.backup
Normal file
|
|
@ -0,0 +1,100 @@
|
||||||
|
# Integration Environment Configuration
|
||||||
|
|
||||||
|
# System Configuration
|
||||||
|
APP_ENV_TYPE = int
|
||||||
|
APP_ENV_LABEL = Integration Instance
|
||||||
|
APP_API_URL = https://gateway-int.poweron-center.net
|
||||||
|
APP_KEY_SYSVAR = CONFIG_KEY
|
||||||
|
APP_INIT_PASS_ADMIN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjWm41MWZ4TUZGaVlrX3pWZWNwakJsY3Facm0wLVZDd1VKeTFoZEVZQnItcEdUUnVJS1NXeDBpM2xKbGRsYmxOSmRhc29PZjJSU2txQjdLbUVrTTE1NEJjUXBHbV9NOVJWZUR3QlJkQnJvTEU9
|
||||||
|
APP_INIT_PASS_EVENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjdmtrakgxa0djekZVNGtTZV8wM2I5UUpCZllveVBMWXROYk5yS3BiV3JEelJSM09VYTRONHpnY3VtMGxDRk5JTEZSRFhtcDZ0RVRmZ1RicTFhb3c5dVZRQ1o4SmlkLVpPTW5MMTU2eTQ0Vkk9
|
||||||
|
|
||||||
|
# PostgreSQL DB Host
|
||||||
|
DB_HOST=gateway-int-server.postgres.database.azure.com
|
||||||
|
DB_USER=heeshkdlby
|
||||||
|
DB_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjczYzOUtTa21MMGJVTUQ5UmFfdWc3YlhCbWZOeXFaNEE1QzdJV3BLVjhnalBkLVVCMm5BZzdxdlFXQXc2RHYzLWtPSFZkZE1iWG9rQ1NkVWlpRnF5TURVbnl1cm9iYXlSMGYxd1BGYVc0VDA9
|
||||||
|
DB_PORT=5432
|
||||||
|
|
||||||
|
# Security Configuration
|
||||||
|
APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZzTEp4aDR0MktWRjNoeVBrY1Npd1R0VE9YVHp3M2w1cXRzbUxNaU82QUJvaDNFeVQyN05KblRWblBvbWtoT0VXbkNBbDQ5OHhwSUFnaDZGRG10Vmgtdm1YUkRsYUhFMzRVZURmSFlDTFIzVWg4MXNueDZyMGc5aVpFdWRxY3dkTExGM093ZTVUZVl5LUhGWnlRPQ==
|
||||||
|
APP_TOKEN_EXPIRY=300
|
||||||
|
|
||||||
|
# CORS Configuration
|
||||||
|
APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net, https://nyla-int.poweron-center.net
|
||||||
|
|
||||||
|
# Logging configuration
|
||||||
|
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||||
|
APP_LOGGING_LOG_DIR = /home/site/wwwroot/
|
||||||
|
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
|
||||||
|
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
|
||||||
|
APP_LOGGING_CONSOLE_ENABLED = True
|
||||||
|
APP_LOGGING_FILE_ENABLED = True
|
||||||
|
APP_LOGGING_ROTATION_SIZE = 10485760
|
||||||
|
APP_LOGGING_BACKUP_COUNT = 5
|
||||||
|
|
||||||
|
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
||||||
|
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||||
|
Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
|
||||||
|
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/login/callback
|
||||||
|
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||||
|
Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
|
||||||
|
Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/connect/callback
|
||||||
|
|
||||||
|
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||||
|
Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
|
||||||
|
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/login/callback
|
||||||
|
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||||
|
Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
|
||||||
|
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/connect/callback
|
||||||
|
|
||||||
|
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||||
|
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
||||||
|
Service_CLICKUP_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5SE1uVURMNVE3NkM4cHBKa2R2TjBnLWdpSXI5dHpKWGExZVFiUF95TFNnZ1NwLWFLdmh6eWFZTHVHYTBzU2FGRUpLYkVyM1NvZjZkWDZHN21qUER5ZVNOaGpCc3NrUGd3VnFTclF3OW1nUlVuWXQ1UVhDLVpyb1BwRExOeFpDeVhtbEhDVnd4TVdpbzNBNk5QQWFPdjdza0xBWGxFY1E3WFpCSUlNa1l4RDlBPQ==
|
||||||
|
Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/clickup/auth/connect/callback
|
||||||
|
|
||||||
|
# Infomaniak OAuth -- Data App (kDrive + Mail)
|
||||||
|
Service_INFOMANIAK_DATA_CLIENT_ID = abd71a95-7c67-465a-b7ab-963cc5eccb4b
|
||||||
|
Service_INFOMANIAK_DATA_CLIENT_SECRET = jwaEZza0VnmAHA1vIQJcpaCC1O4ND6IS0mkQ0GGiVlmof7XHxUcl9YMl7TbtEINz
|
||||||
|
Service_INFOMANIAK_OAUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/infomaniak/auth/connect/callback
|
||||||
|
|
||||||
|
# Stripe Billing (both end with _SECRET for encryption script)
|
||||||
|
STRIPE_SECRET_KEY_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5ekdBaGNGVUlOQUpncTlzLWlTV0V5OWZzQkpDczhCUGw4U1JpTHZ0d3pfYlFNWElLRlNiNlNsaDRYTGZUTkg2OUFrTW1GZXpOUjBVbmRQWjN6ekhHd2ZSQ195OHlaeWh1TmxrUm10V2R3YmdncmFLbFMzVjdqcWJMSUJPR2xuSEozclNoZG1rZVBTaWg3OFQ1Qzdxb0wyQ2RKazc2dG1aZXBUTXlvbDZqLS1KOVI5M3BGc3NQZkZRbnFpRjIwWmh2ZHlVNlpxZVo2dWNmMjQ5eW02QmtzUT09
|
||||||
|
STRIPE_WEBHOOK_SECRET = whsec_2agCQEbDPSOn2C40EJcwoPCqlvaPLF7M
|
||||||
|
STRIPE_API_VERSION = 2026-01-28.clover
|
||||||
|
STRIPE_AUTOMATIC_TAX_ENABLED = false
|
||||||
|
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
|
||||||
|
|
||||||
|
# AI configuration
|
||||||
|
Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4MENkQ2xJVmE5WFZKUkh2SHJFby1YVXN3ZmVxRkptS3ZWRmlwdU93ZEJjSjlMV2NGbU5mS3NCdmFfcmFYTEJNZXFIQ3ozTWE4ZC1pemlQNk9wbjU1d3BPS0ZCTTZfOF8yWmVXMWx0TU1DamlJLVFhSTJXclZsY3hMVWlPcXVqQWtMdER4T252NHZUWEhUOTdIN1VGR3ltazEweXFqQ0lvb0hYWmxQQnpxb0JwcFNhRDNGWXdoRTVJWm9FalZpTUF5b1RqZlRaYnVKYkp0NWR5Vko1WWJ0Wmg2VWJzYXZ0Z3Q4UkpsTldDX2dsekhKMmM4YjRoa2RwemMwYVQwM2cyMFlvaU5mOTVTWGlROU8xY2ZVRXlxZzJqWkxURWlGZGI2STZNb0NpdEtWUnM9
|
||||||
|
Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjT1ZlRWVJdVZMT3ljSFJDcFdxRFBRVkZhS204NnN5RDBlQ0tpenhTM0FFVktuWW9mWHNwRWx2dHB0eDBSZ0JFQnZKWlp6c01pVGREWHd1eGpERnU0Q2xhaks1clQ1ZXVsdnd2ZzhpNXNQS1BhY3FjSkdkVEhHalNaRGR4emhpakZncnpDQUVxOHVXQzVUWmtQc0FsYmFwTF9TSG5FOUFtWk5Ick1NcHFvY2s1T1c2WXlRUFFJZnh6TWhuaVpMYmppcDR0QUx0a0R6RXlwbGRYb1R4dzJkUT09
|
||||||
|
Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQnB5dkd6UkhtU3lhYmZMSlo0bklQZ2s3UTFBSkprZTNwWkg5Q2lVa0wtenhxWXpva21xVDVMRjdKSmhpTmxWS05IUTRoRHdCbktSRVVjcVFnY1RfV0N2S2dyV0dTMlhxQlRFVm41RkFTWVQzQThuVkZwdlNuVC05QlVRVXB6Qjk3akNpYmY1MFR6R1ByMzlIMllRZlRRYVVRN2ZBPT0=
|
||||||
|
Connector_AiTavily_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkdkJMTDY0akhXNzZDWHVYSEt1cDZoOWEzSktneHZEV2JndTNmWlNSMV9KbFNIZmQzeVlrNE5qUEIwcUlBSGM1a0hOZ3J6djIyOVhnZzI3M1dIUkdicl9FVXF3RGktMmlEYmhnaHJfWTdGUkktSXVUSGdQMC1vSEV6VE8zR2F1SVk=
|
||||||
|
Connector_AiPrivateLlm_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGSjZ1NWh0aWc1R3Z4MHNaeS1HamtUbndhcUZFZDlqUDhjSmg5eHFfdlVkU0RsVkJ2UVRaMWs3aWhraG5jSlc0YkxNWHVmR2JoSW5ENFFCdkJBM0VienlKSnhzNnBKbTJOUTFKczRfWlQ3bWpmUkRTT1I1OGNUSTlQdExacGRpeXg=
|
||||||
|
Connector_AiMistral_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGZTNtZ1E4TWIxSEU1OUlreUpxZkJIR0Vxcm9xRHRUbnBxbTQ1cXlkbnltWkJVdTdMYWZ4c3Fsam42TERWUTVhNzZFMU9xVjdyRGFCYml6bmZsZFd2YmJzemlrSWN6Q3o3X0NXX2xXNUQteTNONHdKYzJ5YVpLLWdhU2JhSTJQZnI=
|
||||||
|
|
||||||
|
Service_MSFT_TENANT_ID = common
|
||||||
|
|
||||||
|
# Google Cloud Speech Services configuration
|
||||||
|
Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0=
|
||||||
|
|
||||||
|
# Feature SyncDelta JIRA configuration
|
||||||
|
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0=
|
||||||
|
|
||||||
|
# Teamsbot Browser Bot Service
|
||||||
|
TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io
|
||||||
|
|
||||||
|
# Debug Configuration
|
||||||
|
APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
|
||||||
|
APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
|
||||||
|
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
||||||
|
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
||||||
|
|
||||||
|
# Manadate Pre-Processing Servers
|
||||||
|
PREPROCESS_ALTHAUS_CHAT_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4UkNBelhvckxCQUVjZm94N3BZUDcxaEMyckE2dm1lRVhqODhrWU1SUjNXZ3dQZlVJOWhveXFkZXpobW5xT0NneGZ2SkNUblFmYXd0WTBYNTl3UmRnSWc9PQ==
|
||||||
|
|
||||||
|
# Preprocessor API Configuration
|
||||||
|
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
|
||||||
|
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
|
||||||
|
|
||||||
|
# Azure Communication Services Email Configuration
|
||||||
|
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||||
|
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||||
|
|
@ -49,11 +49,13 @@ Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron-center.net/api/go
|
||||||
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||||
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
||||||
Service_CLICKUP_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5SE1uVURMNVE3NkM4cHBKa2R2TjBnLWdpSXI5dHpKWGExZVFiUF95TFNnZ1NwLWFLdmh6eWFZTHVHYTBzU2FGRUpLYkVyM1NvZjZkWDZHN21qUER5ZVNOaGpCc3NrUGd3VnFTclF3OW1nUlVuWXQ1UVhDLVpyb1BwRExOeFpDeVhtbEhDVnd4TVdpbzNBNk5QQWFPdjdza0xBWGxFY1E3WFpCSUlNa1l4RDlBPQ==
|
Service_CLICKUP_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5SE1uVURMNVE3NkM4cHBKa2R2TjBnLWdpSXI5dHpKWGExZVFiUF95TFNnZ1NwLWFLdmh6eWFZTHVHYTBzU2FGRUpLYkVyM1NvZjZkWDZHN21qUER5ZVNOaGpCc3NrUGd3VnFTclF3OW1nUlVuWXQ1UVhDLVpyb1BwRExOeFpDeVhtbEhDVnd4TVdpbzNBNk5QQWFPdjdza0xBWGxFY1E3WFpCSUlNa1l4RDlBPQ==
|
||||||
Service_CLICKUP_OAUTH_REDIRECT_URI = http://gateway-int.poweron-center.net/api/clickup/auth/connect/callback
|
Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/clickup/auth/connect/callback
|
||||||
|
|
||||||
|
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
|
||||||
|
|
||||||
# Stripe Billing (both end with _SECRET for encryption script)
|
# Stripe Billing (both end with _SECRET for encryption script)
|
||||||
STRIPE_SECRET_KEY_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5ekdBaGNGVUlOQUpncTlzLWlTV0V5OWZzQkpDczhCUGw4U1JpTHZ0d3pfYlFNWElLRlNiNlNsaDRYTGZUTkg2OUFrTW1GZXpOUjBVbmRQWjN6ekhHd2ZSQ195OHlaeWh1TmxrUm10V2R3YmdncmFLbFMzVjdqcWJMSUJPR2xuSEozclNoZG1rZVBTaWg3OFQ1Qzdxb0wyQ2RKazc2dG1aZXBUTXlvbDZqLS1KOVI5M3BGc3NQZkZRbnFpRjIwWmh2ZHlVNlpxZVo2dWNmMjQ5eW02QmtzUT09
|
STRIPE_SECRET_KEY_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5ekdBaGNGVUlOQUpncTlzLWlTV0V5OWZzQkpDczhCUGw4U1JpTHZ0d3pfYlFNWElLRlNiNlNsaDRYTGZUTkg2OUFrTW1GZXpOUjBVbmRQWjN6ekhHd2ZSQ195OHlaeWh1TmxrUm10V2R3YmdncmFLbFMzVjdqcWJMSUJPR2xuSEozclNoZG1rZVBTaWg3OFQ1Qzdxb0wyQ2RKazc2dG1aZXBUTXlvbDZqLS1KOVI5M3BGc3NQZkZRbnFpRjIwWmh2ZHlVNlpxZVo2dWNmMjQ5eW02QmtzUT09
|
||||||
STRIPE_WEBHOOK_SECRET = whsec_2agCQEbDPSOn2C40EJcwoPCqlvaPLF7M
|
STRIPE_WEBHOOK_SECRET = INT_ENC:Z0FBQUFBQnA4UXZiUUVqTl9lREVRWTh1aHFDcFpwcXRkOUx4MS1ham9Ddkl6T0xzMnJuM1hhUHdGNG5CenY1MUg4RlJBOGFQTWl5cVd5MjJ2REItcHYyRmdLX3ZlT2p5Z3BRVkMtQnRoTVkteXlfaU92MVBtOEI0Ni1kbGlfa0NiRmFRRXNHLVE2NHI=
|
||||||
STRIPE_API_VERSION = 2026-01-28.clover
|
STRIPE_API_VERSION = 2026-01-28.clover
|
||||||
STRIPE_AUTOMATIC_TAX_ENABLED = false
|
STRIPE_AUTOMATIC_TAX_ENABLED = false
|
||||||
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
|
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
|
||||||
|
|
|
||||||
101
env_prod.20260428_213451.backup
Normal file
101
env_prod.20260428_213451.backup
Normal file
|
|
@ -0,0 +1,101 @@
|
||||||
|
# Production Environment Configuration
|
||||||
|
|
||||||
|
# System Configuration
|
||||||
|
APP_ENV_TYPE = prod
|
||||||
|
APP_ENV_LABEL = Production Instance
|
||||||
|
APP_KEY_SYSVAR = CONFIG_KEY
|
||||||
|
APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3UnJRV0sySFlDblpXUlREclREaW1WbUt6bGtQYkdrNkZDOXNOLXFua1hqeFF2RHJnRXJ5VlVGV3hOZm41QjZOMlNTb0duYXNxZi05dXVTc2xDVkx0SVBFLUhncVo5T0VUZHE0UTZLWWw3ck09
|
||||||
|
APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3QVpIY19DQVZSSzJmc2F0VEZvQlU1cHBhTEgxdHdnR3g4eW01aTEzYTUxc1gxTDR1RVVpSHRXYjV6N1BLZUdCUGlfOW1qdy0xSHFVRkNBcGZvaGlSSkZycXRuUllaWnpyVGRoeFg1dGEyNUk9
|
||||||
|
APP_API_URL = https://gateway-prod.poweron-center.net
|
||||||
|
|
||||||
|
# PostgreSQL DB Host
|
||||||
|
DB_HOST=gateway-prod-server.postgres.database.azure.com
|
||||||
|
DB_USER=gzxxmcrdhn
|
||||||
|
DB_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3Y1JScGxjZG9TdUkwaHRzSHZhRHpNcDV3N1U2TnIwZ21PRG5TWFFfR1k0N3BiRk5WelVadjlnXzVSTDZ6NXFQNFpqbnJ1R3dNVkJocm1zVEgtSk0xaDRiR19zNDBEbVIzSk51ekNlQ0Z3b0U9
|
||||||
|
DB_PORT=5432
|
||||||
|
|
||||||
|
# Security Configuration
|
||||||
|
APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUlV5SVpaWXBNX1hpa2xPZGdMSWpnN2ZINHQxeGZnNHJweU5pZjlyYlY5Qm9zOUZEbl9wUEgtZHZXd1NhR19JSG9kbFU4MnFGQnllbFhRQVphRGQyNHlFVWR5VHQyUUpqN0stUmRuY2QyTi1oalczRHpLTEJqWURjZWs4YjZvT2U5YnFqcXEwdEpxV05fX05QMmtrPQ==
|
||||||
|
APP_TOKEN_EXPIRY=300
|
||||||
|
|
||||||
|
# CORS Configuration
|
||||||
|
APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
|
||||||
|
|
||||||
|
# Logging configuration
|
||||||
|
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||||
|
APP_LOGGING_LOG_DIR = /home/site/wwwroot/
|
||||||
|
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
|
||||||
|
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
|
||||||
|
APP_LOGGING_CONSOLE_ENABLED = True
|
||||||
|
APP_LOGGING_FILE_ENABLED = True
|
||||||
|
APP_LOGGING_ROTATION_SIZE = 10485760
|
||||||
|
APP_LOGGING_BACKUP_COUNT = 5
|
||||||
|
|
||||||
|
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
||||||
|
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||||
|
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
|
||||||
|
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/login/callback
|
||||||
|
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||||
|
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
|
||||||
|
Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/connect/callback
|
||||||
|
|
||||||
|
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||||
|
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
|
||||||
|
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/login/callback
|
||||||
|
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||||
|
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
|
||||||
|
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/connect/callback
|
||||||
|
|
||||||
|
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||||
|
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
||||||
|
Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
|
||||||
|
Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/clickup/auth/connect/callback
|
||||||
|
|
||||||
|
# Infomaniak OAuth -- Data App (kDrive + Mail)
|
||||||
|
Service_INFOMANIAK_DATA_CLIENT_ID = abd71a95-7c67-465a-b7ab-963cc5eccb4b
|
||||||
|
Service_INFOMANIAK_DATA_CLIENT_SECRET = jwaEZza0VnmAHA1vIQJcpaCC1O4ND6IS0mkQ0GGiVlmof7XHxUcl9YMl7TbtEINz
|
||||||
|
Service_INFOMANIAK_OAUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/infomaniak/auth/connect/callback
|
||||||
|
|
||||||
|
# Stripe Billing (both end with _SECRET for encryption script)
|
||||||
|
STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
|
||||||
|
STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
|
||||||
|
STRIPE_API_VERSION = 2026-01-28.clover
|
||||||
|
STRIPE_AUTOMATIC_TAX_ENABLED = false
|
||||||
|
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah
|
||||||
|
|
||||||
|
|
||||||
|
# AI configuration
|
||||||
|
Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3UxLUdwY2hhdGYxRGp4VFJqYXZIcmkxM1ZyOWV2M0Z4MHdFNkVYQ0ROb1d6LUZFUEdvMHhLMEtXYVBCRzM5TlYyY3ROYWtJRk41cDZxd0tYYi00MjVqMTh4QVcyTXl0bmVocEFHbXQwREpwNi1vODdBNmwzazE5bkpNelE2WXpvblIzWlQwbGdEelI2WXFqT1RibXVHcjNWbVhwYzBOM25XTzNmTDAwUjRvYk4yNjIyZHc5c2RSZzREQUFCdUwyb0ZuOXN1dzI2c2FKdXI4NGxEbk92czZWamJXU3ZSbUlLejZjRklRRk4tLV9aVUFZekI2bTU4OHYxNTUybDg3RVo0ZTh6dXNKRW5GNXVackZvcm9laGI0X3R6V3M9
|
||||||
|
Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09
|
||||||
|
Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6NG5CTm9QOFZRV1BIVC0tV2RKTGtCQWFOUXlpRnhEdjN1U2x3VUdDamtIZV9CQzQ5ZmRmcUh3ZUVUa0NxbGhlenVVdWtaYjdpcnhvUlNFLXZfOWh2dWFZai0xUGU5cWpuYmpnRVRWakh0RVNUUTFyX0w5V0NXVWFrQlZuOTd5TkI0eVRoQ0ZBSm9HYUlYamoyY1FCMmlBPT0=
|
||||||
|
Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg=
|
||||||
|
Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo=
|
||||||
|
Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGc2tQc2lvMk1YZk01Q1dob1U5cnR0dG03WWE3WkpoOWo0SEpvLU9Rc2lCNDExdy1wZExaN3lpT2FEQkxnaHRmWmZUUUZUUUJmblZreGlpaFpOdnFhbzlEd1RsVVJtX216cmhxTm5BcTN2eUZ2T054cDE5bmlEamJ3NGR6MVpFQnA=
|
||||||
|
|
||||||
|
Service_MSFT_TENANT_ID = common
|
||||||
|
|
||||||
|
# Google Cloud Speech Services configuration
|
||||||
|
Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4NFQxaF9uN3h1cVB6dnZid1c1R1VfNDlSQ1NHMEVDZWtKanpMQ29CLXc1MXBqRm1hQ0YtWVhaejBMY1ZTOEFEVlpWQ3hrYkFza1E2RDNsYkdMMndNR0VGNTMwVDRGdURJY3hyaVFxVjEtSEYwNHJzeWM3WmlpZW9jU2E3NTgycEV2allqQ3dJRTNyRFAzaDJ6dklKeXpNRkJhYjFzUkptN2dpbkNpMklrcGxuZl9vTkt3T0JvNm1YTXd5UlkwZWptUXdWVFpnV2J4X3J2WUhIUlFkSElFVnlqMnlJRnNHTnlpMWs2R1dZc2ROWjNYZG85cndmd1E5cUZnVmZRYnVjTG43dXFmSWd2bGFfVWFWSmtpWkpndWNlSUNwcnFNU2NqZXFaV0xsY3l3SElLRkVHcHZGZERKV1ltcGhTS0dhTko1VTJLYzNoZjRkSGVEX3dTMWVVTmdDczV5cE1JQUdSbUJGUm11eFhTVjJHbkt0SzB4UG1Dc2xmbnp1Y041Y2RTeWRuWGdmQy1sTGx0MGtnM2VJQ3EyLXViRlNhTU9ybzZkR1N1bXE5SXhlZENWRFpWSGlYOWx4SUQ3UlR0ZEVxQkxNakRUVFRiUmFnbklOalphLUZkRFVVaXBRUk5NZW5PaUZydTFmQkNPSTdTVTNZd0plWXllNVFJdmN4MVcyTGlwMGFtVjBzOGRxR1FjbzhfYW5zdTB0ZEZBTTJhakltazh1dktNMUZsOUItdFdTb1pIaUxySllXNkdlY20zUS0wTnpFNTB2SU5acG1VcXhyaHBmME8takw3RDh5T043T2VGOV92TzNya2pWSlpYVjZDdXlZcjM3a0hPTlhkaW9oQmxqQlpGRFYyTTY4WmZmT3k4Tk1tdXRuSGdTUVpNT2NKenhXb05PdXBfSEdhMTNxNjdpNXlKUUI2YUgydFFPX1VvXzVJb0UxWTU2YVNiNDQ0QndZanhMMHR1cGdHWGhvcEg1QXEtSXZJdTdZUE12ZEVVWkF4QmtsQS1GYnY3SFIxSHlsOGVfcEpGS1A4QUVEQWNEOFZYYlljQ3ByTU03YU16Y0UzUnJQZEprSWNjT1ZXVEtDWi03Y3ZzRVdYUTlabXJISEo5THRHVXVuM0xqbzA4bGVlZVpOMk1QMmptb21tV0pTMlVoOXdWVU95UW1iQmttc2w1RG9mMWwxXzg1T2IxYUVmTUJEZkpUdTFDTzZ3RlBFeUFiX01iRTZNWkNaSG45TkFOM2pzbUJRZ2N0VFpoejJUTG1RODY3TzZpSzVkYUQzaEpfY2pSTkRzU0VpanlkdXVQQmJ2WU5peno4QWNLTDVxZTlhSHI3NnNiM0k0Y3JkQ0xaOU05bGtsQl8zQklvaktWSDZ4aVp2MHlYelJuUDJyTU9CZC1OZjJxNFc1dDcwSUlxaVh1LTMyWWFwU0IwUU9kOUFpMWpnOERtLTh1VmJiNGVwcXBMbU5fMjVZc0hFbmxQT2puSFd1ZGpyTkphLU5sVlBZWWxrWEZrWGJQWmVkN19tZFZfZ1l1V3pSWlA0V0ZxM2lrWnl2NU9WeTdCbDROSmhfeENKTFhMVXk1d195S2JMUFJoRXZjcVo4V2g0MTNKRnZhUE1wRkNPM3FZOGdVazJPeW5PSGpuZnFGTTdJMkRnam5rUlV6NFlqODlIelRYaEN5VjdJNnVwbllNODNCTFRHMWlXbmM1VlRxbXB3Wm9LRjVrQUpjYzRNMThUMWwwSVhBMUlyamtPZnE4R0o4bEdHay1zMjR5RDJkZ1lYRHZaNHVHU2otR3ZpN25LZlEySEU0UmdTNzJGVHNWQXMyb0dVMV9WUE13ODhZWUFaakxGOWZieGNXZkNYRnV5djEyWTZLcmdrajRBLU1rS1Z0VVRkOWlDMU9fMGVmYXFhZXJGMUhpNkdmb2hkbzZ1OWV6VlNmVzNISjVYTFh6SjJNdWR5MWZidE8yVEo2dnRrZXhMRXBPczUwTG13OGhNUVpIQm0zQmRKRnJ0Nl8wNW1Ob0dHRDVpU0NWREV3TkY2SjktdVBkMFU1ZXBmSFpHQ3FHNTRZdTJvaExpZVEtLTU4YTVyeFBpNDdEajZtWUc4c1dBeUJqQ3NIY1NLS0FIMUxGZzZxNFNkOG9ORGNHWWJCVnZuNnJVTEtoQi1mRTZyUl81ZWJJMi1KOGdERzBhNVRZeHRYUUlqY2JvMFlaNHhWMU9pWFFiZjdaLUhkaG15TTBPZVlkS2R5UVdENTI4QVFiY1RJV0ZNZnlpVWxfZmlnN1BXbGdrbjFGUkhzYl9qeHBxVVJacUE4bjZETENHVFpSamh0NVpOM2hMYTZjYzBuS3J0a3hhZGxSM1V5UHd2OTU3ZHY0Yy1xWDBkWUk0Ymp0MWVrS3YzSktKODhQZnY3QTZ1Wm1VZkZJbS1jamdreks1ZlhpQjFOUDFiOHJ2Nm9NcmdTdU5LQXV2RkZWZEFNZnVKUjVwcVY3dDdhQnpmRVJ6SmlvVXpDM0ZiYXh5bGE2X04tTE9qZ3BiTnN3TF9ZaFRxSUpjNjB1dXZBcy1TZHRHTjFjSUR3WUl4cE9VNzB5Rkk4U3Z1SVZYTl9sYXlZVk83UnFrMlVmcnBpam9lRUlCY19DdVJwOXl2TVVDV1pMRFZTZk9MY3Z1eXA0MnhGazc5YllQaWtOeTc4NjlOa2lGY05RRzY1cG9nbGpYelc4c3FicWxWRkg0YzRSamFlQ19zOU14YWJreU9pNDREZVJ3a0REMUxGTzF1XzI1bEF3VXVZRjlBeWFiLXJsOXgza3VZem1WckhWSnVNbDBNcldadU8xQ3RwOTl5NGgtVlR0QklCLWl5WkE4V1FlQTBCOVU1RE9sQlRrYUNZOGdfUmEwbEZvUTFGUEFWVmQ4V1FhOU9VNjZqemRpZm1sUDhZQTJ0YVBRbWZldkF5THV4QXpfdUtNZ0tlcGdSRFM3c0lDOTNQbnBxdmxYYWNpTmI3MW9BMlZIdTQ5RldudHpNQWQ5NDNPLVVTLXVVNzdHZXh4UXpZa3dVa2J4dTFDV1RkYjRnWXU2M3lJekRYWGNMcWU5OVh6U2xZWDh6MmpqcnpiOHlnMjA5S3RFQm1NZjNSM21adkVnTUpSYVhkTzNkNnJCTmljY0x1cl9kMkx3UHhySjZEdHREanZERzNEUTFlTkR0NWlBczAtdmFGTjdZNVpTMlkxV2czYW5RN2lqemg4eUViZDV6RjdKNXdFcUlvcVhoNkJ6eVJkR1pua1hnNzQwOEs2TXJYSlpGcW9qRDU2QjBOWFFtdXBJRkRKbmdZUF9ZSmRPVEtvUjVhLTV1NjdXQjRhS0duaEtJb2FrQnNjUTRvdFMxdkdTNk1NYlFHUFhhYTJ1eUN3WHN4UlJ4UjdrZjY0SzFGYWVFN1k0cGJnc1RjNmFUenR4NHljbVhablZSWHZmUVN3cXRHNjhsX1BSZWEzdTJUZFA0S2pTaU9YMnZIQ1ZPcGhWMFJqZkVEMWRMR1h3SnU0Z2FzZ3VGM3puNzdhVjhaQXNIWHFsbjB0TDVYSFdSNV9rdWhUUUhSZHBGYkJIVDB5SDdlMC13QTVnS0g5Qkg5RGNxSGJlelVndUhPcEQ0QkRKMTJTZUM1OXJhVm0zYjU0OVY2dk9MQVBheklIQXpVNW9Yc0ROVjEzaFZTWmVxYlBWMlNlSzladzJ6TmNuMG5FVVZkN1VZN1pfS2ZHa0lQcE80S24wSnQtVlJVV09OVWJ3M09YMkZpV2ktVF9ENHhKU2dfYUQ2aUVyamk0VHJHQmVfVHU4clpUTFoteW5aSWRPV1M0RDRMTms4NGRoYmJfVE82aUl2X3VieVJOdDhBQmRwdzdnRTVBNzZwaW93dUlZb3ZRYUtOeG9ULWxvNVp5a0haSjdkcUhRb3d6UGIxRUpCVkVYX2d6TkRqQVozUWxkNGFoc1FXYVd2YWNkME9Qclo0bjYxMFRWTy1nbnI5NTBJNzRMMDluUXRKYTFqQUN4d0d5aHVlamN3Tkk3NWJXeXR0TW9BeUg5Vnp4Q2RnZUY3b3AtMDlrNmlrSGR0eGRtbUdUd2lFRWg4MklEeWJHN2wwZEpVSXMxNDNOWjRFS0tPdWxhMmFCckhfRENIY184aEFDZXNrRDl2dHQtQW12UnRuQXJjaDJoTUpiYkNWQUtfRG9GMUZoNWM4UnBYZ29RWWs2NHcyUm5kdTF3Vk1GeFpiRUJLaVZ2UGFjbi1jV3lMV0N2ZDl4VERPN295X01NNG56ZjZkRzZoYUtmY1E5NlVXemx2SnVfb19iSXg0R2M3Mjd1a2JRPT0=
|
||||||
|
|
||||||
|
# Feature SyncDelta JIRA configuration
|
||||||
|
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4d3Z4d2x6N1FhUktMU0RKbkxfY2pTQkRzXzJ6UXVEbDNCaFM3UHMtQVFGYzNmYWs4N0lMM1R2SFJuZTVFVmx6MGVEbXc5U3NOTnY1TWN0ZDNaamlHQWloalM3VldmREJNSHQ1TlVkSVFJMTVhQWVGSVRMTGw4UTBqNGlQZFVuaHp4WUlKemR5UnBXZlh0REJFLXJ4ejR3PT0=
|
||||||
|
|
||||||
|
# Teamsbot Browser Bot Service
|
||||||
|
TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io
|
||||||
|
|
||||||
|
# Debug Configuration
|
||||||
|
APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
|
||||||
|
APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
|
||||||
|
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
||||||
|
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
||||||
|
|
||||||
|
# Manadate Pre-Processing Servers
|
||||||
|
PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
|
||||||
|
|
||||||
|
# Preprocessor API Configuration
|
||||||
|
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
|
||||||
|
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
|
||||||
|
|
||||||
|
# Azure Communication Services Email Configuration
|
||||||
|
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||||
|
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||||
|
|
@ -51,6 +51,8 @@ Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
||||||
Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
|
Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
|
||||||
Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/clickup/auth/connect/callback
|
Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/clickup/auth/connect/callback
|
||||||
|
|
||||||
|
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
|
||||||
|
|
||||||
# Stripe Billing (both end with _SECRET for encryption script)
|
# Stripe Billing (both end with _SECRET for encryption script)
|
||||||
STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
|
STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
|
||||||
STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
|
STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
|
||||||
|
|
|
||||||
101
env_prod_forgejo.20260428_213451.backup
Normal file
101
env_prod_forgejo.20260428_213451.backup
Normal file
|
|
@ -0,0 +1,101 @@
|
||||||
|
# Production Environment Configuration
|
||||||
|
|
||||||
|
# System Configuration
|
||||||
|
APP_ENV_TYPE = prod
|
||||||
|
APP_ENV_LABEL = Production Instance Forgejo
|
||||||
|
APP_KEY_SYSVAR = /srv/gateway/shared/secrets/master_key.txt
|
||||||
|
APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3UnJRV0sySFlDblpXUlREclREaW1WbUt6bGtQYkdrNkZDOXNOLXFua1hqeFF2RHJnRXJ5VlVGV3hOZm41QjZOMlNTb0duYXNxZi05dXVTc2xDVkx0SVBFLUhncVo5T0VUZHE0UTZLWWw3ck09
|
||||||
|
APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3QVpIY19DQVZSSzJmc2F0VEZvQlU1cHBhTEgxdHdnR3g4eW01aTEzYTUxc1gxTDR1RVVpSHRXYjV6N1BLZUdCUGlfOW1qdy0xSHFVRkNBcGZvaGlSSkZycXRuUllaWnpyVGRoeFg1dGEyNUk9
|
||||||
|
APP_API_URL = https://api.poweron.swiss
|
||||||
|
|
||||||
|
# PostgreSQL DB Host
|
||||||
|
DB_HOST=10.20.0.21
|
||||||
|
DB_USER=poweron_dev
|
||||||
|
DB_PASSWORD_SECRET = mypassword
|
||||||
|
DB_PORT=5432
|
||||||
|
|
||||||
|
# Security Configuration
|
||||||
|
APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUlV5SVpaWXBNX1hpa2xPZGdMSWpnN2ZINHQxeGZnNHJweU5pZjlyYlY5Qm9zOUZEbl9wUEgtZHZXd1NhR19JSG9kbFU4MnFGQnllbFhRQVphRGQyNHlFVWR5VHQyUUpqN0stUmRuY2QyTi1oalczRHpLTEJqWURjZWs4YjZvT2U5YnFqcXEwdEpxV05fX05QMmtrPQ==
|
||||||
|
APP_TOKEN_EXPIRY=300
|
||||||
|
|
||||||
|
# CORS Configuration
|
||||||
|
APP_ALLOWED_ORIGINS=https://porta.poweron.swiss
|
||||||
|
|
||||||
|
# Logging configuration
|
||||||
|
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||||
|
APP_LOGGING_LOG_DIR = srv/gateway/shared/logs
|
||||||
|
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
|
||||||
|
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
|
||||||
|
APP_LOGGING_CONSOLE_ENABLED = True
|
||||||
|
APP_LOGGING_FILE_ENABLED = True
|
||||||
|
APP_LOGGING_ROTATION_SIZE = 10485760
|
||||||
|
APP_LOGGING_BACKUP_COUNT = 5
|
||||||
|
|
||||||
|
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
|
||||||
|
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||||
|
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
|
||||||
|
Service_MSFT_AUTH_REDIRECT_URI=https://api.poweron.swiss/api/msft/auth/login/callback
|
||||||
|
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||||
|
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
|
||||||
|
Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback
|
||||||
|
|
||||||
|
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||||
|
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
|
||||||
|
Service_GOOGLE_AUTH_REDIRECT_URI =
|
||||||
|
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||||
|
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
|
||||||
|
Service_GOOGLE_DATA_REDIRECT_URI =
|
||||||
|
|
||||||
|
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
|
||||||
|
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
||||||
|
Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
|
||||||
|
Service_CLICKUP_OAUTH_REDIRECT_URI = https://api.poweron.swiss/api/clickup/auth/connect/callback
|
||||||
|
|
||||||
|
# Infomaniak OAuth -- Data App (kDrive + Mail)
|
||||||
|
Service_INFOMANIAK_DATA_CLIENT_ID = abd71a95-7c67-465a-b7ab-963cc5eccb4b
|
||||||
|
Service_INFOMANIAK_DATA_CLIENT_SECRET = jwaEZza0VnmAHA1vIQJcpaCC1O4ND6IS0mkQ0GGiVlmof7XHxUcl9YMl7TbtEINz
|
||||||
|
Service_INFOMANIAK_OAUTH_REDIRECT_URI = https://api.poweron.swiss/api/infomaniak/auth/connect/callback
|
||||||
|
|
||||||
|
# Stripe Billing (both end with _SECRET for encryption script)
|
||||||
|
STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
|
||||||
|
STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
|
||||||
|
STRIPE_API_VERSION = 2026-01-28.clover
|
||||||
|
STRIPE_AUTOMATIC_TAX_ENABLED = false
|
||||||
|
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah
|
||||||
|
|
||||||
|
|
||||||
|
# AI configuration
|
||||||
|
Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3UxLUdwY2hhdGYxRGp4VFJqYXZIcmkxM1ZyOWV2M0Z4MHdFNkVYQ0ROb1d6LUZFUEdvMHhLMEtXYVBCRzM5TlYyY3ROYWtJRk41cDZxd0tYYi00MjVqMTh4QVcyTXl0bmVocEFHbXQwREpwNi1vODdBNmwzazE5bkpNelE2WXpvblIzWlQwbGdEelI2WXFqT1RibXVHcjNWbVhwYzBOM25XTzNmTDAwUjRvYk4yNjIyZHc5c2RSZzREQUFCdUwyb0ZuOXN1dzI2c2FKdXI4NGxEbk92czZWamJXU3ZSbUlLejZjRklRRk4tLV9aVUFZekI2bTU4OHYxNTUybDg3RVo0ZTh6dXNKRW5GNXVackZvcm9laGI0X3R6V3M9
|
||||||
|
Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09
|
||||||
|
Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6NG5CTm9QOFZRV1BIVC0tV2RKTGtCQWFOUXlpRnhEdjN1U2x3VUdDamtIZV9CQzQ5ZmRmcUh3ZUVUa0NxbGhlenVVdWtaYjdpcnhvUlNFLXZfOWh2dWFZai0xUGU5cWpuYmpnRVRWakh0RVNUUTFyX0w5V0NXVWFrQlZuOTd5TkI0eVRoQ0ZBSm9HYUlYamoyY1FCMmlBPT0=
|
||||||
|
Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg=
|
||||||
|
Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo=
|
||||||
|
Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGc2tQc2lvMk1YZk01Q1dob1U5cnR0dG03WWE3WkpoOWo0SEpvLU9Rc2lCNDExdy1wZExaN3lpT2FEQkxnaHRmWmZUUUZUUUJmblZreGlpaFpOdnFhbzlEd1RsVVJtX216cmhxTm5BcTN2eUZ2T054cDE5bmlEamJ3NGR6MVpFQnA=
|
||||||
|
|
||||||
|
Service_MSFT_TENANT_ID = common
|
||||||
|
|
||||||
|
# Google Cloud Speech Services configuration
|
||||||
|
Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4NFQxaF9uN3h1cVB6dnZid1c1R1VfNDlSQ1NHMEVDZWtKanpMQ29CLXc1MXBqRm1hQ0YtWVhaejBMY1ZTOEFEVlpWQ3hrYkFza1E2RDNsYkdMMndNR0VGNTMwVDRGdURJY3hyaVFxVjEtSEYwNHJzeWM3WmlpZW9jU2E3NTgycEV2allqQ3dJRTNyRFAzaDJ6dklKeXpNRkJhYjFzUkptN2dpbkNpMklrcGxuZl9vTkt3T0JvNm1YTXd5UlkwZWptUXdWVFpnV2J4X3J2WUhIUlFkSElFVnlqMnlJRnNHTnlpMWs2R1dZc2ROWjNYZG85cndmd1E5cUZnVmZRYnVjTG43dXFmSWd2bGFfVWFWSmtpWkpndWNlSUNwcnFNU2NqZXFaV0xsY3l3SElLRkVHcHZGZERKV1ltcGhTS0dhTko1VTJLYzNoZjRkSGVEX3dTMWVVTmdDczV5cE1JQUdSbUJGUm11eFhTVjJHbkt0SzB4UG1Dc2xmbnp1Y041Y2RTeWRuWGdmQy1sTGx0MGtnM2VJQ3EyLXViRlNhTU9ybzZkR1N1bXE5SXhlZENWRFpWSGlYOWx4SUQ3UlR0ZEVxQkxNakRUVFRiUmFnbklOalphLUZkRFVVaXBRUk5NZW5PaUZydTFmQkNPSTdTVTNZd0plWXllNVFJdmN4MVcyTGlwMGFtVjBzOGRxR1FjbzhfYW5zdTB0ZEZBTTJhakltazh1dktNMUZsOUItdFdTb1pIaUxySllXNkdlY20zUS0wTnpFNTB2SU5acG1VcXhyaHBmME8takw3RDh5T043T2VGOV92TzNya2pWSlpYVjZDdXlZcjM3a0hPTlhkaW9oQmxqQlpGRFYyTTY4WmZmT3k4Tk1tdXRuSGdTUVpNT2NKenhXb05PdXBfSEdhMTNxNjdpNXlKUUI2YUgydFFPX1VvXzVJb0UxWTU2YVNiNDQ0QndZanhMMHR1cGdHWGhvcEg1QXEtSXZJdTdZUE12ZEVVWkF4QmtsQS1GYnY3SFIxSHlsOGVfcEpGS1A4QUVEQWNEOFZYYlljQ3ByTU03YU16Y0UzUnJQZEprSWNjT1ZXVEtDWi03Y3ZzRVdYUTlabXJISEo5THRHVXVuM0xqbzA4bGVlZVpOMk1QMmptb21tV0pTMlVoOXdWVU95UW1iQmttc2w1RG9mMWwxXzg1T2IxYUVmTUJEZkpUdTFDTzZ3RlBFeUFiX01iRTZNWkNaSG45TkFOM2pzbUJRZ2N0VFpoejJUTG1RODY3TzZpSzVkYUQzaEpfY2pSTkRzU0VpanlkdXVQQmJ2WU5peno4QWNLTDVxZTlhSHI3NnNiM0k0Y3JkQ0xaOU05bGtsQl8zQklvaktWSDZ4aVp2MHlYelJuUDJyTU9CZC1OZjJxNFc1dDcwSUlxaVh1LTMyWWFwU0IwUU9kOUFpMWpnOERtLTh1VmJiNGVwcXBMbU5fMjVZc0hFbmxQT2puSFd1ZGpyTkphLU5sVlBZWWxrWEZrWGJQWmVkN19tZFZfZ1l1V3pSWlA0V0ZxM2lrWnl2NU9WeTdCbDROSmhfeENKTFhMVXk1d195S2JMUFJoRXZjcVo4V2g0MTNKRnZhUE1wRkNPM3FZOGdVazJPeW5PSGpuZnFGTTdJMkRnam5rUlV6NFlqODlIelRYaEN5VjdJNnVwbllNODNCTFRHMWlXbmM1VlRxbXB3Wm9LRjVrQUpjYzRNMThUMWwwSVhBMUlyamtPZnE4R0o4bEdHay1zMjR5RDJkZ1lYRHZaNHVHU2otR3ZpN25LZlEySEU0UmdTNzJGVHNWQXMyb0dVMV9WUE13ODhZWUFaakxGOWZieGNXZkNYRnV5djEyWTZLcmdrajRBLU1rS1Z0VVRkOWlDMU9fMGVmYXFhZXJGMUhpNkdmb2hkbzZ1OWV6VlNmVzNISjVYTFh6SjJNdWR5MWZidE8yVEo2dnRrZXhMRXBPczUwTG13OGhNUVpIQm0zQmRKRnJ0Nl8wNW1Ob0dHRDVpU0NWREV3TkY2SjktdVBkMFU1ZXBmSFpHQ3FHNTRZdTJvaExpZVEtLTU4YTVyeFBpNDdEajZtWUc4c1dBeUJqQ3NIY1NLS0FIMUxGZzZxNFNkOG9ORGNHWWJCVnZuNnJVTEtoQi1mRTZyUl81ZWJJMi1KOGdERzBhNVRZeHRYUUlqY2JvMFlaNHhWMU9pWFFiZjdaLUhkaG15TTBPZVlkS2R5UVdENTI4QVFiY1RJV0ZNZnlpVWxfZmlnN1BXbGdrbjFGUkhzYl9qeHBxVVJacUE4bjZETENHVFpSamh0NVpOM2hMYTZjYzBuS3J0a3hhZGxSM1V5UHd2OTU3ZHY0Yy1xWDBkWUk0Ymp0MWVrS3YzSktKODhQZnY3QTZ1Wm1VZkZJbS1jamdreks1ZlhpQjFOUDFiOHJ2Nm9NcmdTdU5LQXV2RkZWZEFNZnVKUjVwcVY3dDdhQnpmRVJ6SmlvVXpDM0ZiYXh5bGE2X04tTE9qZ3BiTnN3TF9ZaFRxSUpjNjB1dXZBcy1TZHRHTjFjSUR3WUl4cE9VNzB5Rkk4U3Z1SVZYTl9sYXlZVk83UnFrMlVmcnBpam9lRUlCY19DdVJwOXl2TVVDV1pMRFZTZk9MY3Z1eXA0MnhGazc5YllQaWtOeTc4NjlOa2lGY05RRzY1cG9nbGpYelc4c3FicWxWRkg0YzRSamFlQ19zOU14YWJreU9pNDREZVJ3a0REMUxGTzF1XzI1bEF3VXVZRjlBeWFiLXJsOXgza3VZem1WckhWSnVNbDBNcldadU8xQ3RwOTl5NGgtVlR0QklCLWl5WkE4V1FlQTBCOVU1RE9sQlRrYUNZOGdfUmEwbEZvUTFGUEFWVmQ4V1FhOU9VNjZqemRpZm1sUDhZQTJ0YVBRbWZldkF5THV4QXpfdUtNZ0tlcGdSRFM3c0lDOTNQbnBxdmxYYWNpTmI3MW9BMlZIdTQ5RldudHpNQWQ5NDNPLVVTLXVVNzdHZXh4UXpZa3dVa2J4dTFDV1RkYjRnWXU2M3lJekRYWGNMcWU5OVh6U2xZWDh6MmpqcnpiOHlnMjA5S3RFQm1NZjNSM21adkVnTUpSYVhkTzNkNnJCTmljY0x1cl9kMkx3UHhySjZEdHREanZERzNEUTFlTkR0NWlBczAtdmFGTjdZNVpTMlkxV2czYW5RN2lqemg4eUViZDV6RjdKNXdFcUlvcVhoNkJ6eVJkR1pua1hnNzQwOEs2TXJYSlpGcW9qRDU2QjBOWFFtdXBJRkRKbmdZUF9ZSmRPVEtvUjVhLTV1NjdXQjRhS0duaEtJb2FrQnNjUTRvdFMxdkdTNk1NYlFHUFhhYTJ1eUN3WHN4UlJ4UjdrZjY0SzFGYWVFN1k0cGJnc1RjNmFUenR4NHljbVhablZSWHZmUVN3cXRHNjhsX1BSZWEzdTJUZFA0S2pTaU9YMnZIQ1ZPcGhWMFJqZkVEMWRMR1h3SnU0Z2FzZ3VGM3puNzdhVjhaQXNIWHFsbjB0TDVYSFdSNV9rdWhUUUhSZHBGYkJIVDB5SDdlMC13QTVnS0g5Qkg5RGNxSGJlelVndUhPcEQ0QkRKMTJTZUM1OXJhVm0zYjU0OVY2dk9MQVBheklIQXpVNW9Yc0ROVjEzaFZTWmVxYlBWMlNlSzladzJ6TmNuMG5FVVZkN1VZN1pfS2ZHa0lQcE80S24wSnQtVlJVV09OVWJ3M09YMkZpV2ktVF9ENHhKU2dfYUQ2aUVyamk0VHJHQmVfVHU4clpUTFoteW5aSWRPV1M0RDRMTms4NGRoYmJfVE82aUl2X3VieVJOdDhBQmRwdzdnRTVBNzZwaW93dUlZb3ZRYUtOeG9ULWxvNVp5a0haSjdkcUhRb3d6UGIxRUpCVkVYX2d6TkRqQVozUWxkNGFoc1FXYVd2YWNkME9Qclo0bjYxMFRWTy1nbnI5NTBJNzRMMDluUXRKYTFqQUN4d0d5aHVlamN3Tkk3NWJXeXR0TW9BeUg5Vnp4Q2RnZUY3b3AtMDlrNmlrSGR0eGRtbUdUd2lFRWg4MklEeWJHN2wwZEpVSXMxNDNOWjRFS0tPdWxhMmFCckhfRENIY184aEFDZXNrRDl2dHQtQW12UnRuQXJjaDJoTUpiYkNWQUtfRG9GMUZoNWM4UnBYZ29RWWs2NHcyUm5kdTF3Vk1GeFpiRUJLaVZ2UGFjbi1jV3lMV0N2ZDl4VERPN295X01NNG56ZjZkRzZoYUtmY1E5NlVXemx2SnVfb19iSXg0R2M3Mjd1a2JRPT0=
|
||||||
|
|
||||||
|
# Feature SyncDelta JIRA configuration
|
||||||
|
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4d3Z4d2x6N1FhUktMU0RKbkxfY2pTQkRzXzJ6UXVEbDNCaFM3UHMtQVFGYzNmYWs4N0lMM1R2SFJuZTVFVmx6MGVEbXc5U3NOTnY1TWN0ZDNaamlHQWloalM3VldmREJNSHQ1TlVkSVFJMTVhQWVGSVRMTGw4UTBqNGlQZFVuaHp4WUlKemR5UnBXZlh0REJFLXJ4ejR3PT0=
|
||||||
|
|
||||||
|
# Teamsbot Browser Bot Service
|
||||||
|
TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io
|
||||||
|
|
||||||
|
# Debug Configuration
|
||||||
|
APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
|
||||||
|
APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
|
||||||
|
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
|
||||||
|
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
|
||||||
|
|
||||||
|
# Manadate Pre-Processing Servers
|
||||||
|
PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
|
||||||
|
|
||||||
|
# Preprocessor API Configuration
|
||||||
|
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
|
||||||
|
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
|
||||||
|
|
||||||
|
# Azure Communication Services Email Configuration
|
||||||
|
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
|
||||||
|
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
|
||||||
|
|
@ -11,7 +11,7 @@ APP_API_URL = https://api.poweron.swiss
|
||||||
# PostgreSQL DB Host
|
# PostgreSQL DB Host
|
||||||
DB_HOST=10.20.0.21
|
DB_HOST=10.20.0.21
|
||||||
DB_USER=poweron_dev
|
DB_USER=poweron_dev
|
||||||
DB_PASSWORD_SECRET = mypassword
|
DB_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQnA4UXZiMnRoUzVlbVRLX3JTRl94cVpMaURtMndZVmFBYXdvdnIxLV81dWwxWmhmcUlCMUFZbDhRT2NsQmNqSl9ZMmRWRVN1Y2JqNlVwOXRJY1VBTm1oSjNiaFE9PQ==
|
||||||
DB_PORT=5432
|
DB_PORT=5432
|
||||||
|
|
||||||
# Security Configuration
|
# Security Configuration
|
||||||
|
|
@ -51,6 +51,8 @@ Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
|
||||||
Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
|
Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
|
||||||
Service_CLICKUP_OAUTH_REDIRECT_URI = https://api.poweron.swiss/api/clickup/auth/connect/callback
|
Service_CLICKUP_OAUTH_REDIRECT_URI = https://api.poweron.swiss/api/clickup/auth/connect/callback
|
||||||
|
|
||||||
|
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
|
||||||
|
|
||||||
# Stripe Billing (both end with _SECRET for encryption script)
|
# Stripe Billing (both end with _SECRET for encryption script)
|
||||||
STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
|
STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
|
||||||
STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
|
STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,35 @@ from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingMode
|
||||||
# Configure logger
|
# Configure logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _supportsCustomTemperature(modelName: str) -> bool:
|
||||||
|
"""Check whether an Anthropic model accepts a custom ``temperature``.
|
||||||
|
|
||||||
|
Anthropic's Extended-Thinking models (Claude 4.7 Opus and the
|
||||||
|
upcoming 4.7 Sonnet/Haiku, plus all 5.x and beyond) reject every
|
||||||
|
``temperature`` value with HTTP 400
|
||||||
|
``{"error": "`temperature` is deprecated for this model."}`` --
|
||||||
|
only the model's internal default is accepted. Older Claude 4.5 /
|
||||||
|
4.6 models still accept any value in [0, 1].
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if ``temperature`` may be sent; False if it must be omitted.
|
||||||
|
"""
|
||||||
|
if not modelName:
|
||||||
|
return True
|
||||||
|
name = modelName.lower()
|
||||||
|
if name.startswith("claude-opus-4-7"):
|
||||||
|
return False
|
||||||
|
if name.startswith("claude-sonnet-4-7"):
|
||||||
|
return False
|
||||||
|
if name.startswith("claude-haiku-4-7"):
|
||||||
|
return False
|
||||||
|
# 5.x and beyond: same Extended-Thinking family, no custom temperature.
|
||||||
|
if name.startswith("claude-opus-5") or name.startswith("claude-sonnet-5") or name.startswith("claude-haiku-5"):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def loadConfigData():
|
def loadConfigData():
|
||||||
"""Load configuration data for Anthropic connector"""
|
"""Load configuration data for Anthropic connector"""
|
||||||
return {
|
return {
|
||||||
|
|
@ -276,9 +305,12 @@ class AiAnthropic(BaseConnectorAi):
|
||||||
payload: Dict[str, Any] = {
|
payload: Dict[str, Any] = {
|
||||||
"model": model.name,
|
"model": model.name,
|
||||||
"messages": converted_messages,
|
"messages": converted_messages,
|
||||||
"temperature": temperature,
|
|
||||||
}
|
}
|
||||||
|
# Extended-Thinking models (claude-opus-4-7 etc.) reject any
|
||||||
|
# `temperature` value -- only the model default is accepted.
|
||||||
|
if _supportsCustomTemperature(model.name):
|
||||||
|
payload["temperature"] = temperature
|
||||||
|
|
||||||
# Anthropic requires max_tokens - use provided value or throw error
|
# Anthropic requires max_tokens - use provided value or throw error
|
||||||
if maxTokens is None:
|
if maxTokens is None:
|
||||||
raise ValueError("maxTokens must be provided for Anthropic API calls")
|
raise ValueError("maxTokens must be provided for Anthropic API calls")
|
||||||
|
|
@ -381,10 +413,11 @@ class AiAnthropic(BaseConnectorAi):
|
||||||
payload: Dict[str, Any] = {
|
payload: Dict[str, Any] = {
|
||||||
"model": model.name,
|
"model": model.name,
|
||||||
"messages": converted,
|
"messages": converted,
|
||||||
"temperature": temperature,
|
|
||||||
"max_tokens": model.maxTokens,
|
"max_tokens": model.maxTokens,
|
||||||
"stream": True,
|
"stream": True,
|
||||||
}
|
}
|
||||||
|
if _supportsCustomTemperature(model.name):
|
||||||
|
payload["temperature"] = temperature
|
||||||
if system_prompt:
|
if system_prompt:
|
||||||
payload["system"] = system_prompt
|
payload["system"] = system_prompt
|
||||||
if modelCall.tools:
|
if modelCall.tools:
|
||||||
|
|
@ -608,10 +641,10 @@ class AiAnthropic(BaseConnectorAi):
|
||||||
|
|
||||||
if systemPrompt:
|
if systemPrompt:
|
||||||
payload["system"] = systemPrompt
|
payload["system"] = systemPrompt
|
||||||
|
|
||||||
# Set temperature from model
|
if _supportsCustomTemperature(model.name):
|
||||||
payload["temperature"] = temperature
|
payload["temperature"] = temperature
|
||||||
|
|
||||||
# Make API call with headers from httpClient (which includes anthropic-version)
|
# Make API call with headers from httpClient (which includes anthropic-version)
|
||||||
response = await self.httpClient.post(
|
response = await self.httpClient.post(
|
||||||
"https://api.anthropic.com/v1/messages",
|
"https://api.anthropic.com/v1/messages",
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,30 @@ from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingMode
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _supportsCustomTemperature(modelName: str) -> bool:
|
||||||
|
"""Check whether an OpenAI model accepts a custom `temperature` value.
|
||||||
|
|
||||||
|
GPT-5.x and the o-series (o1/o3/o4) reasoning models reject every
|
||||||
|
`temperature` value other than the default (1) with HTTP 400
|
||||||
|
`unsupported_value`. For these models we must omit `temperature`
|
||||||
|
from the payload entirely. Older chat-completions models
|
||||||
|
(gpt-4o, gpt-4o-mini, gpt-4.1, gpt-3.5-*) still accept any value
|
||||||
|
in [0, 2].
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if `temperature` may be sent; False if it must be omitted.
|
||||||
|
"""
|
||||||
|
if not modelName:
|
||||||
|
return True
|
||||||
|
name = modelName.lower()
|
||||||
|
if name.startswith("gpt-5"):
|
||||||
|
return False
|
||||||
|
if name.startswith("o1") or name.startswith("o3") or name.startswith("o4"):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def loadConfigData():
|
def loadConfigData():
|
||||||
"""Load configuration data for OpenAI connector"""
|
"""Load configuration data for OpenAI connector"""
|
||||||
return {
|
return {
|
||||||
|
|
@ -344,14 +368,18 @@ class AiOpenai(BaseConnectorAi):
|
||||||
payload = {
|
payload = {
|
||||||
"model": model.name,
|
"model": model.name,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"temperature": temperature,
|
|
||||||
# Universal output-length cap. `max_tokens` is deprecated and
|
# Universal output-length cap. `max_tokens` is deprecated and
|
||||||
# rejected outright by gpt-5.x / o-series; `max_completion_tokens`
|
# rejected outright by gpt-5.x / o-series; `max_completion_tokens`
|
||||||
# is accepted by every current chat-completions model (legacy
|
# is accepted by every current chat-completions model (legacy
|
||||||
# gpt-4o, gpt-4.1, gpt-5.x, o1/o3/o4) per OpenAI API reference.
|
# gpt-4o, gpt-4.1, gpt-5.x, o1/o3/o4) per OpenAI API reference.
|
||||||
"max_completion_tokens": maxTokens
|
"max_completion_tokens": maxTokens
|
||||||
}
|
}
|
||||||
|
# gpt-5.x and o-series only accept the default temperature (1) and
|
||||||
|
# return HTTP 400 `unsupported_value` for anything else - omit the
|
||||||
|
# field entirely for those models.
|
||||||
|
if _supportsCustomTemperature(model.name):
|
||||||
|
payload["temperature"] = temperature
|
||||||
|
|
||||||
if modelCall.tools:
|
if modelCall.tools:
|
||||||
payload["tools"] = modelCall.tools
|
payload["tools"] = modelCall.tools
|
||||||
payload["tool_choice"] = modelCall.toolChoice or "auto"
|
payload["tool_choice"] = modelCall.toolChoice or "auto"
|
||||||
|
|
@ -428,13 +456,15 @@ class AiOpenai(BaseConnectorAi):
|
||||||
payload: Dict[str, Any] = {
|
payload: Dict[str, Any] = {
|
||||||
"model": model.name,
|
"model": model.name,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"temperature": temperature,
|
|
||||||
# See callAiBasic for the rationale: `max_completion_tokens`
|
# See callAiBasic for the rationale: `max_completion_tokens`
|
||||||
# is the universal output-length parameter; `max_tokens` is
|
# is the universal output-length parameter; `max_tokens` is
|
||||||
# deprecated and rejected by gpt-5.x / o-series.
|
# deprecated and rejected by gpt-5.x / o-series.
|
||||||
"max_completion_tokens": model.maxTokens,
|
"max_completion_tokens": model.maxTokens,
|
||||||
"stream": True,
|
"stream": True,
|
||||||
}
|
}
|
||||||
|
if _supportsCustomTemperature(model.name):
|
||||||
|
payload["temperature"] = temperature
|
||||||
|
|
||||||
if modelCall.tools:
|
if modelCall.tools:
|
||||||
payload["tools"] = modelCall.tools
|
payload["tools"] = modelCall.tools
|
||||||
payload["tool_choice"] = modelCall.toolChoice or "auto"
|
payload["tool_choice"] = modelCall.toolChoice or "auto"
|
||||||
|
|
@ -585,15 +615,15 @@ class AiOpenai(BaseConnectorAi):
|
||||||
# Use the messages directly - they should already contain the image data
|
# Use the messages directly - they should already contain the image data
|
||||||
# in the format: {"type": "image_url", "image_url": {"url": "data:...base64,..."}}
|
# in the format: {"type": "image_url", "image_url": {"url": "data:...base64,..."}}
|
||||||
|
|
||||||
# Use parameters from model
|
|
||||||
temperature = model.temperature
|
temperature = model.temperature
|
||||||
# Don't set maxTokens - let the model use its full context length
|
# Don't set maxTokens - let the model use its full context length
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": model.name,
|
"model": model.name,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"temperature": temperature
|
|
||||||
}
|
}
|
||||||
|
if _supportsCustomTemperature(model.name):
|
||||||
|
payload["temperature"] = temperature
|
||||||
|
|
||||||
response = await self.httpClient.post(
|
response = await self.httpClient.post(
|
||||||
model.apiUrl,
|
model.apiUrl,
|
||||||
|
|
|
||||||
|
|
@ -9,13 +9,15 @@ googleAuthScopes = [
|
||||||
"https://www.googleapis.com/auth/userinfo.profile",
|
"https://www.googleapis.com/auth/userinfo.profile",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Google — Data app (Gmail + Drive + identity for token responses)
|
# Google — Data app (Gmail + Drive + Calendar + Contacts + identity for token responses)
|
||||||
googleDataScopes = [
|
googleDataScopes = [
|
||||||
"openid",
|
"openid",
|
||||||
"https://www.googleapis.com/auth/userinfo.email",
|
"https://www.googleapis.com/auth/userinfo.email",
|
||||||
"https://www.googleapis.com/auth/userinfo.profile",
|
"https://www.googleapis.com/auth/userinfo.profile",
|
||||||
"https://www.googleapis.com/auth/gmail.readonly",
|
"https://www.googleapis.com/auth/gmail.readonly",
|
||||||
"https://www.googleapis.com/auth/drive.readonly",
|
"https://www.googleapis.com/auth/drive.readonly",
|
||||||
|
"https://www.googleapis.com/auth/calendar.readonly",
|
||||||
|
"https://www.googleapis.com/auth/contacts.readonly",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Microsoft — Auth app: Graph profile only (MSAL adds openid, profile, offline_access, …)
|
# Microsoft — Auth app: Graph profile only (MSAL adds openid, profile, offline_access, …)
|
||||||
|
|
@ -34,6 +36,8 @@ msftDataScopes = [
|
||||||
"OnlineMeetings.Read",
|
"OnlineMeetings.Read",
|
||||||
"Chat.ReadWrite",
|
"Chat.ReadWrite",
|
||||||
"ChatMessage.Send",
|
"ChatMessage.Send",
|
||||||
|
"Calendars.Read",
|
||||||
|
"Contacts.Read",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -42,14 +46,8 @@ def msftDataScopesForRefresh() -> str:
|
||||||
return " ".join(msftDataScopes)
|
return " ".join(msftDataScopes)
|
||||||
|
|
||||||
|
|
||||||
# Infomaniak — Data app (kDrive + Mail; user_info needed for /1/profile lookup)
|
# Infomaniak intentionally has no OAuth scope set: the kDrive + Mail data APIs
|
||||||
infomaniakDataScopes = [
|
# are only reachable with manually issued Personal Access Tokens (see
|
||||||
"user_info",
|
# wiki/d-guides/infomaniak-token-setup.md). The OAuth /authorize endpoint at
|
||||||
"kdrive",
|
# login.infomaniak.com only accepts identity scopes (openid/profile/email/phone)
|
||||||
"mail",
|
# and does not return tokens that work against /1/* data routes.
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def infomaniakDataScopesForRefresh() -> str:
|
|
||||||
"""Space-separated scope string identical to authorization request."""
|
|
||||||
return " ".join(infomaniakDataScopes)
|
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ from modules.datamodels.datamodelSecurity import Token, TokenPurpose
|
||||||
from modules.datamodels.datamodelUam import AuthAuthority
|
from modules.datamodels.datamodelUam import AuthAuthority
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.shared.timeUtils import getUtcTimestamp, createExpirationTimestamp, parseTimestamp
|
from modules.shared.timeUtils import getUtcTimestamp, createExpirationTimestamp, parseTimestamp
|
||||||
from modules.auth.oauthProviderConfig import msftDataScopesForRefresh, infomaniakDataScopesForRefresh
|
from modules.auth.oauthProviderConfig import msftDataScopesForRefresh
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -30,9 +30,6 @@ class TokenManager:
|
||||||
self.google_client_id = APP_CONFIG.get("Service_GOOGLE_DATA_CLIENT_ID")
|
self.google_client_id = APP_CONFIG.get("Service_GOOGLE_DATA_CLIENT_ID")
|
||||||
self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_DATA_CLIENT_SECRET")
|
self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_DATA_CLIENT_SECRET")
|
||||||
|
|
||||||
# Infomaniak Data OAuth (kDrive + Mail)
|
|
||||||
self.infomaniak_client_id = APP_CONFIG.get("Service_INFOMANIAK_DATA_CLIENT_ID")
|
|
||||||
self.infomaniak_client_secret = APP_CONFIG.get("Service_INFOMANIAK_DATA_CLIENT_SECRET")
|
|
||||||
|
|
||||||
def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
|
def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
|
||||||
"""Refresh Microsoft OAuth token using refresh token"""
|
"""Refresh Microsoft OAuth token using refresh token"""
|
||||||
|
|
@ -166,65 +163,6 @@ class TokenManager:
|
||||||
logger.error(f"Error refreshing Google token: {str(e)}")
|
logger.error(f"Error refreshing Google token: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def refreshInfomaniakToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
|
|
||||||
"""Refresh Infomaniak OAuth token using refresh token"""
|
|
||||||
try:
|
|
||||||
logger.debug(f"refreshInfomaniakToken: Starting Infomaniak token refresh for user {userId}")
|
|
||||||
|
|
||||||
if not self.infomaniak_client_id or not self.infomaniak_client_secret:
|
|
||||||
logger.error("Infomaniak OAuth configuration not found")
|
|
||||||
return None
|
|
||||||
|
|
||||||
tokenUrl = "https://login.infomaniak.com/token"
|
|
||||||
data = {
|
|
||||||
"client_id": self.infomaniak_client_id,
|
|
||||||
"client_secret": self.infomaniak_client_secret,
|
|
||||||
"grant_type": "refresh_token",
|
|
||||||
"refresh_token": refreshToken,
|
|
||||||
"scope": infomaniakDataScopesForRefresh(),
|
|
||||||
}
|
|
||||||
|
|
||||||
with httpx.Client(timeout=30.0) as client:
|
|
||||||
response = client.post(tokenUrl, data=data)
|
|
||||||
logger.debug(f"refreshInfomaniakToken: HTTP response status: {response.status_code}")
|
|
||||||
|
|
||||||
if response.status_code == 200:
|
|
||||||
tokenData = response.json()
|
|
||||||
if "access_token" not in tokenData:
|
|
||||||
logger.error("Infomaniak token refresh response missing access_token")
|
|
||||||
return None
|
|
||||||
|
|
||||||
newToken = Token(
|
|
||||||
userId=userId,
|
|
||||||
authority=AuthAuthority.INFOMANIAK,
|
|
||||||
connectionId=oldToken.connectionId,
|
|
||||||
tokenPurpose=TokenPurpose.DATA_CONNECTION,
|
|
||||||
tokenAccess=tokenData["access_token"],
|
|
||||||
tokenRefresh=tokenData.get("refresh_token", refreshToken),
|
|
||||||
tokenType=tokenData.get("token_type", "bearer"),
|
|
||||||
expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)),
|
|
||||||
createdAt=getUtcTimestamp(),
|
|
||||||
)
|
|
||||||
return newToken
|
|
||||||
|
|
||||||
logger.error(
|
|
||||||
f"Failed to refresh Infomaniak token: {response.status_code} - {response.text}"
|
|
||||||
)
|
|
||||||
if response.status_code == 400:
|
|
||||||
try:
|
|
||||||
errorData = response.json()
|
|
||||||
if errorData.get("error") == "invalid_grant":
|
|
||||||
logger.warning(
|
|
||||||
"Infomaniak refresh token is invalid or expired - user needs to re-authenticate"
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return None
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error refreshing Infomaniak token: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def refreshToken(self, oldToken: Token) -> Optional[Token]:
|
def refreshToken(self, oldToken: Token) -> Optional[Token]:
|
||||||
"""Refresh an expired token using the appropriate OAuth service"""
|
"""Refresh an expired token using the appropriate OAuth service"""
|
||||||
try:
|
try:
|
||||||
|
|
@ -268,9 +206,6 @@ class TokenManager:
|
||||||
elif oldToken.authority == AuthAuthority.GOOGLE:
|
elif oldToken.authority == AuthAuthority.GOOGLE:
|
||||||
logger.debug(f"refreshToken: Refreshing Google token")
|
logger.debug(f"refreshToken: Refreshing Google token")
|
||||||
return self.refreshGoogleToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
|
return self.refreshGoogleToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
|
||||||
elif oldToken.authority == AuthAuthority.INFOMANIAK:
|
|
||||||
logger.debug(f"refreshToken: Refreshing Infomaniak token")
|
|
||||||
return self.refreshInfomaniakToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
|
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Unknown authority for token refresh: {oldToken.authority}")
|
logger.warning(f"Unknown authority for token refresh: {oldToken.authority}")
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
|
|
@ -144,45 +144,6 @@ class TokenRefreshService:
|
||||||
logger.error(f"Error refreshing Microsoft token for connection {connection.id}: {str(e)}")
|
logger.error(f"Error refreshing Microsoft token for connection {connection.id}: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def _refresh_infomaniak_token(self, interface, connection: UserConnection) -> bool:
|
|
||||||
"""Refresh Infomaniak OAuth token"""
|
|
||||||
try:
|
|
||||||
logger.debug(f"Refreshing Infomaniak token for connection {connection.id}")
|
|
||||||
|
|
||||||
current_token = interface.getConnectionToken(connection.id)
|
|
||||||
if not current_token:
|
|
||||||
logger.warning(f"No Infomaniak token found for connection {connection.id}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
from modules.auth.tokenManager import TokenManager
|
|
||||||
token_manager = TokenManager()
|
|
||||||
|
|
||||||
refreshedToken = token_manager.refreshToken(current_token)
|
|
||||||
if refreshedToken:
|
|
||||||
interface.saveConnectionToken(refreshedToken)
|
|
||||||
interface.db.recordModify(UserConnection, connection.id, {
|
|
||||||
"lastChecked": getUtcTimestamp(),
|
|
||||||
"expiresAt": refreshedToken.expiresAt,
|
|
||||||
})
|
|
||||||
logger.info(f"Successfully refreshed Infomaniak token for connection {connection.id}")
|
|
||||||
try:
|
|
||||||
audit_logger.logSecurityEvent(
|
|
||||||
userId=str(connection.userId),
|
|
||||||
mandateId="system",
|
|
||||||
action="token_refresh",
|
|
||||||
details=f"Infomaniak token refreshed for connection {connection.id}",
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return True
|
|
||||||
|
|
||||||
logger.warning(f"Failed to refresh Infomaniak token for connection {connection.id}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error refreshing Infomaniak token for connection {connection.id}: {str(e)}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def refresh_expired_tokens(self, user_id: str) -> Dict[str, Any]:
|
async def refresh_expired_tokens(self, user_id: str) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Refresh expired OAuth tokens for a user
|
Refresh expired OAuth tokens for a user
|
||||||
|
|
@ -216,7 +177,7 @@ class TokenRefreshService:
|
||||||
for connection in connections:
|
for connection in connections:
|
||||||
# Only refresh expired OAuth connections
|
# Only refresh expired OAuth connections
|
||||||
if (connection.tokenStatus == 'expired' and
|
if (connection.tokenStatus == 'expired' and
|
||||||
connection.authority in [AuthAuthority.GOOGLE, AuthAuthority.MSFT, AuthAuthority.INFOMANIAK]):
|
connection.authority in [AuthAuthority.GOOGLE, AuthAuthority.MSFT]):
|
||||||
|
|
||||||
# Check rate limiting
|
# Check rate limiting
|
||||||
if self._is_rate_limited(connection.id):
|
if self._is_rate_limited(connection.id):
|
||||||
|
|
@ -233,8 +194,6 @@ class TokenRefreshService:
|
||||||
success = await self._refresh_google_token(root_interface, connection)
|
success = await self._refresh_google_token(root_interface, connection)
|
||||||
elif connection.authority == AuthAuthority.MSFT:
|
elif connection.authority == AuthAuthority.MSFT:
|
||||||
success = await self._refresh_microsoft_token(root_interface, connection)
|
success = await self._refresh_microsoft_token(root_interface, connection)
|
||||||
elif connection.authority == AuthAuthority.INFOMANIAK:
|
|
||||||
success = await self._refresh_infomaniak_token(root_interface, connection)
|
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
refreshed_count += 1
|
refreshed_count += 1
|
||||||
|
|
@ -289,7 +248,7 @@ class TokenRefreshService:
|
||||||
# Only refresh active tokens that expire soon
|
# Only refresh active tokens that expire soon
|
||||||
if (connection.tokenStatus == 'active' and
|
if (connection.tokenStatus == 'active' and
|
||||||
connection.tokenExpiresAt and
|
connection.tokenExpiresAt and
|
||||||
connection.authority in [AuthAuthority.GOOGLE, AuthAuthority.MSFT, AuthAuthority.INFOMANIAK]):
|
connection.authority in [AuthAuthority.GOOGLE, AuthAuthority.MSFT]):
|
||||||
|
|
||||||
# Check if token expires within 5 minutes
|
# Check if token expires within 5 minutes
|
||||||
time_until_expiry = connection.tokenExpiresAt - current_time
|
time_until_expiry = connection.tokenExpiresAt - current_time
|
||||||
|
|
@ -310,8 +269,6 @@ class TokenRefreshService:
|
||||||
success = await self._refresh_google_token(root_interface, connection)
|
success = await self._refresh_google_token(root_interface, connection)
|
||||||
elif connection.authority == AuthAuthority.MSFT:
|
elif connection.authority == AuthAuthority.MSFT:
|
||||||
success = await self._refresh_microsoft_token(root_interface, connection)
|
success = await self._refresh_microsoft_token(root_interface, connection)
|
||||||
elif connection.authority == AuthAuthority.INFOMANIAK:
|
|
||||||
success = await self._refresh_infomaniak_token(root_interface, connection)
|
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
refreshed_count += 1
|
refreshed_count += 1
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,47 @@ logger = logging.getLogger(__name__)
|
||||||
# No mapping needed - table name = Pydantic model name exactly
|
# No mapping needed - table name = Pydantic model name exactly
|
||||||
|
|
||||||
|
|
||||||
|
class DatabaseQueryError(RuntimeError):
|
||||||
|
"""Raised by DB read methods when the underlying SQL query failed.
|
||||||
|
|
||||||
|
Empty result sets do NOT raise this — they return ``[]`` / ``None`` /
|
||||||
|
``{"items": [], "totalItems": 0, "totalPages": 0}`` as before. This
|
||||||
|
exception is reserved for **real** failures: psycopg2 ProgrammingError,
|
||||||
|
DataError, OperationalError, IntegrityError, plus any unexpected
|
||||||
|
Python error raised inside a query path.
|
||||||
|
|
||||||
|
Read methods used to silently swallow such errors and return empty
|
||||||
|
collections, which made every caller incapable of distinguishing
|
||||||
|
"no rows" from "broken query / type adapter / dropped column / lost
|
||||||
|
connection". That hid concrete bugs (e.g. dict passed where Postgres
|
||||||
|
expected a UUID string) behind misleading downstream "no record found"
|
||||||
|
errors.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, table: str, message: str, original: BaseException = None):
|
||||||
|
super().__init__(f"{table}: {message}")
|
||||||
|
self.table = table
|
||||||
|
self.original = original
|
||||||
|
|
||||||
|
|
||||||
|
def _rollbackQuietly(connection) -> None:
|
||||||
|
"""Restore the connection state after a failed query.
|
||||||
|
|
||||||
|
Postgres puts the connection in an error state after any failed
|
||||||
|
statement; subsequent queries on the same connection raise
|
||||||
|
``InFailedSqlTransaction`` until we rollback. We swallow rollback
|
||||||
|
errors because the original query error is what the caller should
|
||||||
|
see — a secondary rollback failure typically means the connection
|
||||||
|
is gone and will be reopened on the next ``_ensure_connection``.
|
||||||
|
"""
|
||||||
|
if connection is None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
connection.rollback()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class SystemTable(PowerOnModel):
|
class SystemTable(PowerOnModel):
|
||||||
"""Data model for system table entries"""
|
"""Data model for system table entries"""
|
||||||
|
|
||||||
|
|
@ -762,7 +803,8 @@ class DatabaseConnector:
|
||||||
return record
|
return record
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error loading record {recordId} from table {table}: {e}")
|
logger.error(f"Error loading record {recordId} from table {table}: {e}")
|
||||||
return None
|
_rollbackQuietly(getattr(self, "connection", None))
|
||||||
|
raise DatabaseQueryError(table, str(e), original=e) from e
|
||||||
|
|
||||||
def getRecord(self, model_class: type, recordId: str) -> Optional[Dict[str, Any]]:
|
def getRecord(self, model_class: type, recordId: str) -> Optional[Dict[str, Any]]:
|
||||||
"""Load one row by primary key (routes / services; wraps _loadRecord)."""
|
"""Load one row by primary key (routes / services; wraps _loadRecord)."""
|
||||||
|
|
@ -848,7 +890,8 @@ class DatabaseConnector:
|
||||||
return records
|
return records
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error loading table {table}: {e}")
|
logger.error(f"Error loading table {table}: {e}")
|
||||||
return []
|
_rollbackQuietly(getattr(self, "connection", None))
|
||||||
|
raise DatabaseQueryError(table, str(e), original=e) from e
|
||||||
|
|
||||||
def _registerInitialId(self, table: str, initialId: str) -> bool:
|
def _registerInitialId(self, table: str, initialId: str) -> bool:
|
||||||
"""Registers the initial ID for a table."""
|
"""Registers the initial ID for a table."""
|
||||||
|
|
@ -1047,7 +1090,8 @@ class DatabaseConnector:
|
||||||
return records
|
return records
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error loading records from table {table}: {e}")
|
logger.error(f"Error loading records from table {table}: {e}")
|
||||||
return []
|
_rollbackQuietly(getattr(self, "connection", None))
|
||||||
|
raise DatabaseQueryError(table, str(e), original=e) from e
|
||||||
|
|
||||||
def _buildPaginationClauses(
|
def _buildPaginationClauses(
|
||||||
self,
|
self,
|
||||||
|
|
@ -1270,7 +1314,8 @@ class DatabaseConnector:
|
||||||
return {"items": records, "totalItems": totalItems, "totalPages": totalPages}
|
return {"items": records, "totalItems": totalItems, "totalPages": totalPages}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in getRecordsetPaginated for table {table}: {e}")
|
logger.error(f"Error in getRecordsetPaginated for table {table}: {e}")
|
||||||
return {"items": [], "totalItems": 0, "totalPages": 0}
|
_rollbackQuietly(getattr(self, "connection", None))
|
||||||
|
raise DatabaseQueryError(table, str(e), original=e) from e
|
||||||
|
|
||||||
def getDistinctColumnValues(
|
def getDistinctColumnValues(
|
||||||
self,
|
self,
|
||||||
|
|
@ -1332,7 +1377,8 @@ class DatabaseConnector:
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in getDistinctColumnValues for {table}.{column}: {e}")
|
logger.error(f"Error in getDistinctColumnValues for {table}.{column}: {e}")
|
||||||
return []
|
_rollbackQuietly(getattr(self, "connection", None))
|
||||||
|
raise DatabaseQueryError(table, str(e), original=e) from e
|
||||||
|
|
||||||
def recordCreate(
|
def recordCreate(
|
||||||
self, model_class: type, record: Union[Dict[str, Any], BaseModel]
|
self, model_class: type, record: Union[Dict[str, Any], BaseModel]
|
||||||
|
|
@ -1710,7 +1756,8 @@ class DatabaseConnector:
|
||||||
return records
|
return records
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in semantic search on {table}: {e}")
|
logger.error(f"Error in semantic search on {table}: {e}")
|
||||||
return []
|
_rollbackQuietly(getattr(self, "connection", None))
|
||||||
|
raise DatabaseQueryError(table, str(e), original=e) from e
|
||||||
|
|
||||||
def close(self, forceClose: bool = False):
|
def close(self, forceClose: bool = False):
|
||||||
"""Close the database connection.
|
"""Close the database connection.
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,8 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_DRIVE_BASE = "https://www.googleapis.com/drive/v3"
|
_DRIVE_BASE = "https://www.googleapis.com/drive/v3"
|
||||||
_GMAIL_BASE = "https://gmail.googleapis.com/gmail/v1"
|
_GMAIL_BASE = "https://gmail.googleapis.com/gmail/v1"
|
||||||
|
_CALENDAR_BASE = "https://www.googleapis.com/calendar/v3"
|
||||||
|
_PEOPLE_BASE = "https://people.googleapis.com/v1"
|
||||||
|
|
||||||
|
|
||||||
async def _googleGet(token: str, url: str) -> Dict[str, Any]:
|
async def _googleGet(token: str, url: str) -> Dict[str, Any]:
|
||||||
|
|
@ -274,12 +276,480 @@ class GmailAdapter(ServiceAdapter):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class CalendarAdapter(ServiceAdapter):
|
||||||
|
"""Google Calendar ServiceAdapter -- browse calendars, list events, .ics download.
|
||||||
|
|
||||||
|
Path conventions:
|
||||||
|
``""`` / ``"/"`` -> list calendars from ``calendarList``
|
||||||
|
``"/<calendarId>"`` -> list upcoming events in that calendar
|
||||||
|
``"/<calendarId>/<eventId>"`` -> reserved for future event detail browse
|
||||||
|
"""
|
||||||
|
|
||||||
|
_DEFAULT_EVENT_LIMIT = 100
|
||||||
|
_MAX_EVENT_LIMIT = 2500
|
||||||
|
|
||||||
|
def __init__(self, accessToken: str):
|
||||||
|
self._token = accessToken
|
||||||
|
|
||||||
|
async def browse(
|
||||||
|
self,
|
||||||
|
path: str,
|
||||||
|
filter: Optional[str] = None,
|
||||||
|
limit: Optional[int] = None,
|
||||||
|
) -> List[ExternalEntry]:
|
||||||
|
cleanPath = (path or "").strip("/")
|
||||||
|
if not cleanPath:
|
||||||
|
url = f"{_CALENDAR_BASE}/users/me/calendarList?maxResults=250"
|
||||||
|
result = await _googleGet(self._token, url)
|
||||||
|
if "error" in result:
|
||||||
|
logger.warning(f"Google Calendar list failed: {result['error']}")
|
||||||
|
return []
|
||||||
|
calendars = result.get("items", [])
|
||||||
|
if filter:
|
||||||
|
f = filter.lower()
|
||||||
|
calendars = [c for c in calendars if f in (c.get("summary") or "").lower()]
|
||||||
|
return [
|
||||||
|
ExternalEntry(
|
||||||
|
name=c.get("summaryOverride") or c.get("summary", ""),
|
||||||
|
path=f"/{c.get('id', '')}",
|
||||||
|
isFolder=True,
|
||||||
|
metadata={
|
||||||
|
"id": c.get("id"),
|
||||||
|
"primary": c.get("primary", False),
|
||||||
|
"accessRole": c.get("accessRole"),
|
||||||
|
"backgroundColor": c.get("backgroundColor"),
|
||||||
|
"timeZone": c.get("timeZone"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for c in calendars
|
||||||
|
]
|
||||||
|
|
||||||
|
from urllib.parse import quote
|
||||||
|
calendarId = cleanPath.split("/", 1)[0]
|
||||||
|
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
|
||||||
|
url = (
|
||||||
|
f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
|
||||||
|
f"?maxResults={effectiveLimit}&orderBy=startTime&singleEvents=true"
|
||||||
|
)
|
||||||
|
result = await _googleGet(self._token, url)
|
||||||
|
if "error" in result:
|
||||||
|
logger.warning(f"Google Calendar events failed: {result['error']}")
|
||||||
|
return []
|
||||||
|
events = result.get("items", [])
|
||||||
|
return [
|
||||||
|
ExternalEntry(
|
||||||
|
name=ev.get("summary", "(no title)"),
|
||||||
|
path=f"/{calendarId}/{ev.get('id', '')}",
|
||||||
|
isFolder=False,
|
||||||
|
mimeType="text/calendar",
|
||||||
|
metadata={
|
||||||
|
"id": ev.get("id"),
|
||||||
|
"start": (ev.get("start") or {}).get("dateTime") or (ev.get("start") or {}).get("date"),
|
||||||
|
"end": (ev.get("end") or {}).get("dateTime") or (ev.get("end") or {}).get("date"),
|
||||||
|
"location": ev.get("location"),
|
||||||
|
"organizer": (ev.get("organizer") or {}).get("email"),
|
||||||
|
"htmlLink": ev.get("htmlLink"),
|
||||||
|
"status": ev.get("status"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for ev in events
|
||||||
|
]
|
||||||
|
|
||||||
|
async def download(self, path: str) -> DownloadResult:
|
||||||
|
from urllib.parse import quote
|
||||||
|
cleanPath = (path or "").strip("/")
|
||||||
|
if "/" not in cleanPath:
|
||||||
|
return DownloadResult()
|
||||||
|
calendarId, eventId = cleanPath.split("/", 1)
|
||||||
|
url = f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events/{quote(eventId, safe='')}"
|
||||||
|
ev = await _googleGet(self._token, url)
|
||||||
|
if "error" in ev:
|
||||||
|
logger.warning(f"Google Calendar event fetch failed: {ev['error']}")
|
||||||
|
return DownloadResult()
|
||||||
|
icsBytes = _googleEventToIcs(ev)
|
||||||
|
summary = ev.get("summary") or eventId
|
||||||
|
safeName = _googleSafeFileName(summary) or "event"
|
||||||
|
return DownloadResult(
|
||||||
|
data=icsBytes,
|
||||||
|
fileName=f"{safeName}.ics",
|
||||||
|
mimeType="text/calendar",
|
||||||
|
)
|
||||||
|
|
||||||
|
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
||||||
|
return {"error": "Google Calendar upload not supported"}
|
||||||
|
|
||||||
|
async def search(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
path: Optional[str] = None,
|
||||||
|
limit: Optional[int] = None,
|
||||||
|
) -> List[ExternalEntry]:
|
||||||
|
from urllib.parse import quote
|
||||||
|
calendarId = (path or "").strip("/").split("/", 1)[0] or "primary"
|
||||||
|
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
|
||||||
|
url = (
|
||||||
|
f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
|
||||||
|
f"?q={quote(query, safe='')}&maxResults={effectiveLimit}&singleEvents=true"
|
||||||
|
)
|
||||||
|
result = await _googleGet(self._token, url)
|
||||||
|
if "error" in result:
|
||||||
|
return []
|
||||||
|
return [
|
||||||
|
ExternalEntry(
|
||||||
|
name=ev.get("summary", "(no title)"),
|
||||||
|
path=f"/{calendarId}/{ev.get('id', '')}",
|
||||||
|
isFolder=False,
|
||||||
|
mimeType="text/calendar",
|
||||||
|
metadata={
|
||||||
|
"id": ev.get("id"),
|
||||||
|
"start": (ev.get("start") or {}).get("dateTime") or (ev.get("start") or {}).get("date"),
|
||||||
|
"end": (ev.get("end") or {}).get("dateTime") or (ev.get("end") or {}).get("date"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for ev in result.get("items", [])
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class ContactsAdapter(ServiceAdapter):
|
||||||
|
"""Google Contacts ServiceAdapter -- People API (read-only).
|
||||||
|
|
||||||
|
Path conventions:
|
||||||
|
``""`` / ``"/"`` -> list contact groups (incl. virtual ``all`` for the user's connections)
|
||||||
|
``"/all"`` -> list all ``people/me/connections``
|
||||||
|
``"/<groupResourceName>"`` -> list members of that contact group (e.g. ``contactGroups/myFriends``)
|
||||||
|
``"/<group>/<personId>"`` -> reserved for future detail browse;
|
||||||
|
``personId`` is the suffix after ``people/``
|
||||||
|
"""
|
||||||
|
|
||||||
|
_DEFAULT_CONTACT_LIMIT = 200
|
||||||
|
_MAX_CONTACT_LIMIT = 1000
|
||||||
|
_PERSON_FIELDS = (
|
||||||
|
"names,emailAddresses,phoneNumbers,organizations,addresses,biographies,memberships"
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self, accessToken: str):
|
||||||
|
self._token = accessToken
|
||||||
|
|
||||||
|
async def browse(
|
||||||
|
self,
|
||||||
|
path: str,
|
||||||
|
filter: Optional[str] = None,
|
||||||
|
limit: Optional[int] = None,
|
||||||
|
) -> List[ExternalEntry]:
|
||||||
|
cleanPath = (path or "").strip("/")
|
||||||
|
if not cleanPath:
|
||||||
|
entries: List[ExternalEntry] = [
|
||||||
|
ExternalEntry(
|
||||||
|
name="Alle Kontakte",
|
||||||
|
path="/all",
|
||||||
|
isFolder=True,
|
||||||
|
metadata={"id": "all", "isVirtual": True},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
url = f"{_PEOPLE_BASE}/contactGroups?pageSize=200"
|
||||||
|
result = await _googleGet(self._token, url)
|
||||||
|
if "error" not in result:
|
||||||
|
for grp in result.get("contactGroups", []):
|
||||||
|
name = grp.get("formattedName") or grp.get("name") or ""
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
entries.append(
|
||||||
|
ExternalEntry(
|
||||||
|
name=name,
|
||||||
|
path=f"/{grp.get('resourceName', '')}",
|
||||||
|
isFolder=True,
|
||||||
|
metadata={
|
||||||
|
"id": grp.get("resourceName"),
|
||||||
|
"memberCount": grp.get("memberCount", 0),
|
||||||
|
"groupType": grp.get("groupType"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(f"Google contactGroups list failed: {result['error']}")
|
||||||
|
return entries
|
||||||
|
|
||||||
|
from urllib.parse import quote
|
||||||
|
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
|
||||||
|
groupRef = cleanPath.split("/", 1)[0]
|
||||||
|
if groupRef == "all":
|
||||||
|
url = (
|
||||||
|
f"{_PEOPLE_BASE}/people/me/connections"
|
||||||
|
f"?pageSize={min(effectiveLimit, 1000)}&personFields={self._PERSON_FIELDS}"
|
||||||
|
)
|
||||||
|
result = await _googleGet(self._token, url)
|
||||||
|
if "error" in result:
|
||||||
|
logger.warning(f"Google People connections failed: {result['error']}")
|
||||||
|
return []
|
||||||
|
people = result.get("connections", [])
|
||||||
|
else:
|
||||||
|
groupResource = groupRef
|
||||||
|
grpUrl = (
|
||||||
|
f"{_PEOPLE_BASE}/{quote(groupResource, safe='/')}"
|
||||||
|
f"?maxMembers={min(effectiveLimit, 1000)}"
|
||||||
|
)
|
||||||
|
grpResult = await _googleGet(self._token, grpUrl)
|
||||||
|
if "error" in grpResult:
|
||||||
|
logger.warning(f"Google contactGroup detail failed: {grpResult['error']}")
|
||||||
|
return []
|
||||||
|
memberResourceNames = grpResult.get("memberResourceNames") or []
|
||||||
|
if not memberResourceNames:
|
||||||
|
return []
|
||||||
|
chunkSize = 200
|
||||||
|
people: List[Dict[str, Any]] = []
|
||||||
|
for i in range(0, min(len(memberResourceNames), effectiveLimit), chunkSize):
|
||||||
|
chunk = memberResourceNames[i : i + chunkSize]
|
||||||
|
params = "&".join(f"resourceNames={quote(rn, safe='/')}" for rn in chunk)
|
||||||
|
batchUrl = f"{_PEOPLE_BASE}/people:batchGet?{params}&personFields={self._PERSON_FIELDS}"
|
||||||
|
batchResult = await _googleGet(self._token, batchUrl)
|
||||||
|
if "error" in batchResult:
|
||||||
|
logger.warning(f"Google People batchGet failed: {batchResult['error']}")
|
||||||
|
continue
|
||||||
|
for resp in batchResult.get("responses", []):
|
||||||
|
person = resp.get("person")
|
||||||
|
if person:
|
||||||
|
people.append(person)
|
||||||
|
if len(people) >= effectiveLimit:
|
||||||
|
break
|
||||||
|
|
||||||
|
return [
|
||||||
|
ExternalEntry(
|
||||||
|
name=_googlePersonLabel(p) or "(no name)",
|
||||||
|
path=f"/{groupRef}/{(p.get('resourceName', '') or '').split('/')[-1]}",
|
||||||
|
isFolder=False,
|
||||||
|
mimeType="text/vcard",
|
||||||
|
metadata={
|
||||||
|
"id": p.get("resourceName"),
|
||||||
|
"emails": [e.get("value") for e in (p.get("emailAddresses") or []) if e.get("value")],
|
||||||
|
"phones": [pn.get("value") for pn in (p.get("phoneNumbers") or []) if pn.get("value")],
|
||||||
|
"organization": (p.get("organizations") or [{}])[0].get("name") if p.get("organizations") else None,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for p in people[:effectiveLimit]
|
||||||
|
]
|
||||||
|
|
||||||
|
async def download(self, path: str) -> DownloadResult:
|
||||||
|
from urllib.parse import quote
|
||||||
|
cleanPath = (path or "").strip("/")
|
||||||
|
if "/" not in cleanPath:
|
||||||
|
return DownloadResult()
|
||||||
|
personSuffix = cleanPath.split("/")[-1]
|
||||||
|
if not personSuffix:
|
||||||
|
return DownloadResult()
|
||||||
|
url = f"{_PEOPLE_BASE}/people/{quote(personSuffix, safe='')}?personFields={self._PERSON_FIELDS}"
|
||||||
|
person = await _googleGet(self._token, url)
|
||||||
|
if "error" in person:
|
||||||
|
logger.warning(f"Google People fetch failed: {person['error']}")
|
||||||
|
return DownloadResult()
|
||||||
|
vcfBytes = _googlePersonToVcard(person)
|
||||||
|
label = _googlePersonLabel(person) or personSuffix
|
||||||
|
safeName = _googleSafeFileName(label) or "contact"
|
||||||
|
return DownloadResult(
|
||||||
|
data=vcfBytes,
|
||||||
|
fileName=f"{safeName}.vcf",
|
||||||
|
mimeType="text/vcard",
|
||||||
|
)
|
||||||
|
|
||||||
|
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
||||||
|
return {"error": "Google Contacts upload not supported"}
|
||||||
|
|
||||||
|
async def search(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
path: Optional[str] = None,
|
||||||
|
limit: Optional[int] = None,
|
||||||
|
) -> List[ExternalEntry]:
|
||||||
|
from urllib.parse import quote
|
||||||
|
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
|
||||||
|
url = (
|
||||||
|
f"{_PEOPLE_BASE}/people:searchContacts"
|
||||||
|
f"?query={quote(query, safe='')}&pageSize={min(effectiveLimit, 30)}"
|
||||||
|
f"&readMask={self._PERSON_FIELDS}"
|
||||||
|
)
|
||||||
|
result = await _googleGet(self._token, url)
|
||||||
|
if "error" in result:
|
||||||
|
return []
|
||||||
|
entries: List[ExternalEntry] = []
|
||||||
|
for r in result.get("results", []):
|
||||||
|
p = r.get("person") or {}
|
||||||
|
entries.append(
|
||||||
|
ExternalEntry(
|
||||||
|
name=_googlePersonLabel(p) or "(no name)",
|
||||||
|
path=f"/search/{(p.get('resourceName', '') or '').split('/')[-1]}",
|
||||||
|
isFolder=False,
|
||||||
|
mimeType="text/vcard",
|
||||||
|
metadata={
|
||||||
|
"id": p.get("resourceName"),
|
||||||
|
"emails": [e.get("value") for e in (p.get("emailAddresses") or []) if e.get("value")],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def _googleSafeFileName(name: str) -> str:
|
||||||
|
import re
|
||||||
|
return re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", name or "")[:80].strip(". ")
|
||||||
|
|
||||||
|
|
||||||
|
def _googleIcsEscape(value: str) -> str:
|
||||||
|
if value is None:
|
||||||
|
return ""
|
||||||
|
return (
|
||||||
|
value.replace("\\", "\\\\")
|
||||||
|
.replace(";", "\\;")
|
||||||
|
.replace(",", "\\,")
|
||||||
|
.replace("\r\n", "\\n")
|
||||||
|
.replace("\n", "\\n")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _googleIcsDateTime(value: Optional[str]) -> Optional[str]:
|
||||||
|
"""Convert a Google Calendar dateTime/date string to RFC 5545 format (UTC)."""
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
try:
|
||||||
|
if "T" not in value:
|
||||||
|
dt = datetime.strptime(value, "%Y-%m-%d")
|
||||||
|
return dt.strftime("%Y%m%d")
|
||||||
|
normalized = value.replace("Z", "+00:00") if value.endswith("Z") else value
|
||||||
|
dt = datetime.fromisoformat(normalized)
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
|
return dt.astimezone(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _googleEventToIcs(event: Dict[str, Any]) -> bytes:
|
||||||
|
"""Build a minimal RFC 5545 VCALENDAR/VEVENT for a Google Calendar event."""
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
uid = event.get("iCalUID") or event.get("id") or "unknown@poweron"
|
||||||
|
summary = _googleIcsEscape(event.get("summary") or "")
|
||||||
|
location = _googleIcsEscape(event.get("location") or "")
|
||||||
|
description = _googleIcsEscape(event.get("description") or "")
|
||||||
|
rawStart = (event.get("start") or {}).get("dateTime") or (event.get("start") or {}).get("date")
|
||||||
|
rawEnd = (event.get("end") or {}).get("dateTime") or (event.get("end") or {}).get("date")
|
||||||
|
isAllDay = bool((event.get("start") or {}).get("date") and not (event.get("start") or {}).get("dateTime"))
|
||||||
|
dtstart = _googleIcsDateTime(rawStart)
|
||||||
|
dtend = _googleIcsDateTime(rawEnd)
|
||||||
|
dtstamp = _googleIcsDateTime(event.get("updated")) or datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
"BEGIN:VCALENDAR",
|
||||||
|
"VERSION:2.0",
|
||||||
|
"PRODID:-//PowerOn//Google-Calendar-Adapter//EN",
|
||||||
|
"CALSCALE:GREGORIAN",
|
||||||
|
"BEGIN:VEVENT",
|
||||||
|
f"UID:{uid}",
|
||||||
|
f"DTSTAMP:{dtstamp}",
|
||||||
|
]
|
||||||
|
if dtstart:
|
||||||
|
lines.append(f"DTSTART;VALUE=DATE:{dtstart}" if isAllDay else f"DTSTART:{dtstart}")
|
||||||
|
if dtend:
|
||||||
|
lines.append(f"DTEND;VALUE=DATE:{dtend}" if isAllDay else f"DTEND:{dtend}")
|
||||||
|
if summary:
|
||||||
|
lines.append(f"SUMMARY:{summary}")
|
||||||
|
if location:
|
||||||
|
lines.append(f"LOCATION:{location}")
|
||||||
|
if description:
|
||||||
|
lines.append(f"DESCRIPTION:{description}")
|
||||||
|
organizer = (event.get("organizer") or {}).get("email")
|
||||||
|
if organizer:
|
||||||
|
lines.append(f"ORGANIZER:mailto:{organizer}")
|
||||||
|
for att in (event.get("attendees") or []):
|
||||||
|
addr = att.get("email")
|
||||||
|
if addr:
|
||||||
|
lines.append(f"ATTENDEE:mailto:{addr}")
|
||||||
|
lines.append("END:VEVENT")
|
||||||
|
lines.append("END:VCALENDAR")
|
||||||
|
return ("\r\n".join(lines) + "\r\n").encode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def _googlePersonLabel(person: Dict[str, Any]) -> str:
|
||||||
|
names = person.get("names") or []
|
||||||
|
if names:
|
||||||
|
primary = names[0]
|
||||||
|
display = primary.get("displayName") or ""
|
||||||
|
if display:
|
||||||
|
return display
|
||||||
|
given = primary.get("givenName") or ""
|
||||||
|
family = primary.get("familyName") or ""
|
||||||
|
full = f"{given} {family}".strip()
|
||||||
|
if full:
|
||||||
|
return full
|
||||||
|
orgs = person.get("organizations") or []
|
||||||
|
if orgs and orgs[0].get("name"):
|
||||||
|
return orgs[0]["name"]
|
||||||
|
emails = person.get("emailAddresses") or []
|
||||||
|
if emails and emails[0].get("value"):
|
||||||
|
return emails[0]["value"]
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _googlePersonToVcard(person: Dict[str, Any]) -> bytes:
|
||||||
|
"""Build a vCard 3.0 from a Google People API person payload."""
|
||||||
|
names = person.get("names") or []
|
||||||
|
primaryName = names[0] if names else {}
|
||||||
|
given = primaryName.get("givenName") or ""
|
||||||
|
family = primaryName.get("familyName") or ""
|
||||||
|
middle = primaryName.get("middleName") or ""
|
||||||
|
fn = primaryName.get("displayName") or _googlePersonLabel(person) or ""
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
"BEGIN:VCARD",
|
||||||
|
"VERSION:3.0",
|
||||||
|
f"N:{family};{given};{middle};;",
|
||||||
|
f"FN:{fn}",
|
||||||
|
]
|
||||||
|
orgs = person.get("organizations") or []
|
||||||
|
if orgs:
|
||||||
|
org = orgs[0]
|
||||||
|
orgVal = org.get("name") or ""
|
||||||
|
if org.get("department"):
|
||||||
|
orgVal = f"{orgVal};{org['department']}"
|
||||||
|
if orgVal:
|
||||||
|
lines.append(f"ORG:{orgVal}")
|
||||||
|
if org.get("title"):
|
||||||
|
lines.append(f"TITLE:{org['title']}")
|
||||||
|
for em in (person.get("emailAddresses") or []):
|
||||||
|
addr = em.get("value")
|
||||||
|
if not addr:
|
||||||
|
continue
|
||||||
|
emailType = (em.get("type") or "INTERNET").upper()
|
||||||
|
lines.append(f"EMAIL;TYPE={emailType}:{addr}")
|
||||||
|
for ph in (person.get("phoneNumbers") or []):
|
||||||
|
val = ph.get("value")
|
||||||
|
if not val:
|
||||||
|
continue
|
||||||
|
phType = (ph.get("type") or "VOICE").upper()
|
||||||
|
lines.append(f"TEL;TYPE={phType}:{val}")
|
||||||
|
for addr in (person.get("addresses") or []):
|
||||||
|
street = addr.get("streetAddress") or ""
|
||||||
|
city = addr.get("city") or ""
|
||||||
|
region = addr.get("region") or ""
|
||||||
|
postal = addr.get("postalCode") or ""
|
||||||
|
country = addr.get("country") or ""
|
||||||
|
if any([street, city, region, postal, country]):
|
||||||
|
adrType = (addr.get("type") or "OTHER").upper()
|
||||||
|
lines.append(f"ADR;TYPE={adrType}:;;{street};{city};{region};{postal};{country}")
|
||||||
|
bios = person.get("biographies") or []
|
||||||
|
if bios and bios[0].get("value"):
|
||||||
|
lines.append(f"NOTE:{_googleIcsEscape(bios[0]['value'])}")
|
||||||
|
lines.append(f"UID:{person.get('resourceName', '')}")
|
||||||
|
lines.append("END:VCARD")
|
||||||
|
return ("\r\n".join(lines) + "\r\n").encode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
class GoogleConnector(ProviderConnector):
|
class GoogleConnector(ProviderConnector):
|
||||||
"""Google ProviderConnector -- 1 connection -> Drive + Gmail."""
|
"""Google ProviderConnector -- 1 connection -> Drive + Gmail + Calendar + Contacts."""
|
||||||
|
|
||||||
_SERVICE_MAP = {
|
_SERVICE_MAP = {
|
||||||
"drive": DriveAdapter,
|
"drive": DriveAdapter,
|
||||||
"gmail": GmailAdapter,
|
"gmail": GmailAdapter,
|
||||||
|
"calendar": CalendarAdapter,
|
||||||
|
"contact": ContactsAdapter,
|
||||||
}
|
}
|
||||||
|
|
||||||
def getAvailableServices(self) -> List[str]:
|
def getAvailableServices(self) -> List[str]:
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -126,6 +126,11 @@ def _stripGraphBase(url: str) -> str:
|
||||||
|
|
||||||
def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> ExternalEntry:
|
def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> ExternalEntry:
|
||||||
isFolder = "folder" in item
|
isFolder = "folder" in item
|
||||||
|
# Graph exposes the driveItem content hash as ``eTag`` (quoted) or
|
||||||
|
# ``cTag``; we normalise to a "revision" string so callers can use it as a
|
||||||
|
# stable ``contentVersion`` for idempotent ingestion without re-downloading
|
||||||
|
# file bytes.
|
||||||
|
revision = item.get("eTag") or item.get("cTag")
|
||||||
return ExternalEntry(
|
return ExternalEntry(
|
||||||
name=item.get("name", ""),
|
name=item.get("name", ""),
|
||||||
path=f"{basePath}/{item.get('name', '')}" if basePath else item.get("name", ""),
|
path=f"{basePath}/{item.get('name', '')}" if basePath else item.get("name", ""),
|
||||||
|
|
@ -137,6 +142,9 @@ def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> Exter
|
||||||
"id": item.get("id"),
|
"id": item.get("id"),
|
||||||
"webUrl": item.get("webUrl"),
|
"webUrl": item.get("webUrl"),
|
||||||
"childCount": item.get("folder", {}).get("childCount") if isFolder else None,
|
"childCount": item.get("folder", {}).get("childCount") if isFolder else None,
|
||||||
|
"revision": revision,
|
||||||
|
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
||||||
|
"parentReference": item.get("parentReference", {}),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -167,21 +175,36 @@ class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
return await self._discoverSites()
|
return await self._discoverSites()
|
||||||
|
|
||||||
if not folderPath or folderPath == "/":
|
if not folderPath or folderPath == "/":
|
||||||
endpoint = f"sites/{siteId}/drive/root/children"
|
endpoint: Optional[str] = f"sites/{siteId}/drive/root/children?$top=200"
|
||||||
else:
|
else:
|
||||||
cleanPath = folderPath.lstrip("/")
|
cleanPath = folderPath.lstrip("/")
|
||||||
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children"
|
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children?$top=200"
|
||||||
|
|
||||||
result = await self._graphGet(endpoint)
|
# Follow @odata.nextLink until a hard cap is reached so large libraries
|
||||||
if "error" in result:
|
# are fully enumerated (required for bootstrap). Per-page size uses
|
||||||
logger.warning(f"SharePoint browse failed: {result['error']}")
|
# Graph's max supported value to minimise round-trips.
|
||||||
return []
|
effectiveLimit = int(limit) if limit is not None else None
|
||||||
|
items: List[Dict[str, Any]] = []
|
||||||
|
hardCap = 5000
|
||||||
|
while endpoint and len(items) < hardCap:
|
||||||
|
result = await self._graphGet(endpoint)
|
||||||
|
if "error" in result:
|
||||||
|
logger.warning(f"SharePoint browse failed: {result['error']}")
|
||||||
|
break
|
||||||
|
for raw in result.get("value", []) or []:
|
||||||
|
items.append(raw)
|
||||||
|
if effectiveLimit is not None and len(items) >= effectiveLimit:
|
||||||
|
break
|
||||||
|
if effectiveLimit is not None and len(items) >= effectiveLimit:
|
||||||
|
break
|
||||||
|
nextLink = result.get("@odata.nextLink")
|
||||||
|
endpoint = _stripGraphBase(nextLink) if nextLink else None
|
||||||
|
|
||||||
entries = [_graphItemToExternalEntry(item, path) for item in result.get("value", [])]
|
entries = [_graphItemToExternalEntry(item, path) for item in items]
|
||||||
if filter:
|
if filter:
|
||||||
entries = [e for e in entries if _matchFilter(e, filter)]
|
entries = [e for e in entries if _matchFilter(e, filter)]
|
||||||
if limit is not None:
|
if effectiveLimit is not None:
|
||||||
entries = entries[: max(1, int(limit))]
|
entries = entries[: max(1, effectiveLimit)]
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
async def _discoverSites(self) -> List[ExternalEntry]:
|
async def _discoverSites(self) -> List[ExternalEntry]:
|
||||||
|
|
@ -841,6 +864,285 @@ class OneDriveAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Calendar Adapter
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class CalendarAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
|
"""ServiceAdapter for Outlook Calendar via Microsoft Graph.
|
||||||
|
|
||||||
|
Path conventions:
|
||||||
|
``""`` / ``"/"`` -> list user calendars
|
||||||
|
``"/<calendarId>"`` -> list events in that calendar
|
||||||
|
``"/<calendarId>/<eventId>"`` -> reserved for future event detail browse
|
||||||
|
|
||||||
|
Downloads return a synthesised ``.ics`` (VCALENDAR/VEVENT) since Microsoft
|
||||||
|
Graph does not expose a ``/$value`` endpoint for events.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_DEFAULT_EVENT_LIMIT = 100
|
||||||
|
_MAX_EVENT_LIMIT = 1000
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
async def browse(
|
||||||
|
self,
|
||||||
|
path: str,
|
||||||
|
filter: Optional[str] = None,
|
||||||
|
limit: Optional[int] = None,
|
||||||
|
) -> List[ExternalEntry]:
|
||||||
|
cleanPath = (path or "").strip("/")
|
||||||
|
if not cleanPath:
|
||||||
|
result = await self._graphGet("me/calendars?$top=100")
|
||||||
|
if "error" in result:
|
||||||
|
logger.warning(f"MSFT Calendar list failed: {result['error']}")
|
||||||
|
return []
|
||||||
|
calendars = result.get("value", [])
|
||||||
|
if filter:
|
||||||
|
calendars = [c for c in calendars if filter.lower() in (c.get("name") or "").lower()]
|
||||||
|
return [
|
||||||
|
ExternalEntry(
|
||||||
|
name=c.get("name", ""),
|
||||||
|
path=f"/{c.get('id', '')}",
|
||||||
|
isFolder=True,
|
||||||
|
metadata={
|
||||||
|
"id": c.get("id"),
|
||||||
|
"color": c.get("color"),
|
||||||
|
"owner": (c.get("owner") or {}).get("address"),
|
||||||
|
"isDefaultCalendar": c.get("isDefaultCalendar", False),
|
||||||
|
"canEdit": c.get("canEdit", False),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for c in calendars
|
||||||
|
]
|
||||||
|
|
||||||
|
calendarId = cleanPath.split("/", 1)[0]
|
||||||
|
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
|
||||||
|
pageSize = min(self._PAGE_SIZE, effectiveLimit)
|
||||||
|
endpoint: Optional[str] = (
|
||||||
|
f"me/calendars/{calendarId}/events"
|
||||||
|
f"?$top={pageSize}&$orderby=start/dateTime desc"
|
||||||
|
)
|
||||||
|
events: List[Dict[str, Any]] = []
|
||||||
|
while endpoint and len(events) < effectiveLimit:
|
||||||
|
result = await self._graphGet(endpoint)
|
||||||
|
if "error" in result:
|
||||||
|
logger.warning(f"MSFT Calendar events failed: {result['error']}")
|
||||||
|
break
|
||||||
|
for ev in result.get("value", []):
|
||||||
|
events.append(ev)
|
||||||
|
if len(events) >= effectiveLimit:
|
||||||
|
break
|
||||||
|
nextLink = result.get("@odata.nextLink")
|
||||||
|
endpoint = _stripGraphBase(nextLink) if nextLink else None
|
||||||
|
|
||||||
|
return [
|
||||||
|
ExternalEntry(
|
||||||
|
name=ev.get("subject", "(no subject)"),
|
||||||
|
path=f"/{calendarId}/{ev.get('id', '')}",
|
||||||
|
isFolder=False,
|
||||||
|
mimeType="text/calendar",
|
||||||
|
metadata={
|
||||||
|
"id": ev.get("id"),
|
||||||
|
"start": (ev.get("start") or {}).get("dateTime"),
|
||||||
|
"end": (ev.get("end") or {}).get("dateTime"),
|
||||||
|
"location": (ev.get("location") or {}).get("displayName"),
|
||||||
|
"organizer": (ev.get("organizer") or {}).get("emailAddress", {}).get("address"),
|
||||||
|
"isAllDay": ev.get("isAllDay", False),
|
||||||
|
"webLink": ev.get("webLink"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for ev in events
|
||||||
|
]
|
||||||
|
|
||||||
|
async def download(self, path: str) -> DownloadResult:
|
||||||
|
cleanPath = (path or "").strip("/")
|
||||||
|
if "/" not in cleanPath:
|
||||||
|
return DownloadResult()
|
||||||
|
eventId = cleanPath.split("/")[-1]
|
||||||
|
ev = await self._graphGet(f"me/events/{eventId}")
|
||||||
|
if "error" in ev:
|
||||||
|
logger.warning(f"MSFT Calendar event fetch failed: {ev['error']}")
|
||||||
|
return DownloadResult()
|
||||||
|
icsBytes = _eventToIcs(ev)
|
||||||
|
subject = ev.get("subject") or eventId
|
||||||
|
safeName = _safeFileName(subject) or "event"
|
||||||
|
return DownloadResult(
|
||||||
|
data=icsBytes,
|
||||||
|
fileName=f"{safeName}.ics",
|
||||||
|
mimeType="text/calendar",
|
||||||
|
)
|
||||||
|
|
||||||
|
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
||||||
|
return {"error": "Calendar upload not supported"}
|
||||||
|
|
||||||
|
async def search(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
path: Optional[str] = None,
|
||||||
|
limit: Optional[int] = None,
|
||||||
|
) -> List[ExternalEntry]:
|
||||||
|
safeQuery = query.replace("'", "''")
|
||||||
|
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
|
||||||
|
endpoint = f"me/events?$search=\"{safeQuery}\"&$top={effectiveLimit}"
|
||||||
|
result = await self._graphGet(endpoint)
|
||||||
|
if "error" in result:
|
||||||
|
return []
|
||||||
|
return [
|
||||||
|
ExternalEntry(
|
||||||
|
name=ev.get("subject", "(no subject)"),
|
||||||
|
path=f"/search/{ev.get('id', '')}",
|
||||||
|
isFolder=False,
|
||||||
|
mimeType="text/calendar",
|
||||||
|
metadata={
|
||||||
|
"id": ev.get("id"),
|
||||||
|
"start": (ev.get("start") or {}).get("dateTime"),
|
||||||
|
"end": (ev.get("end") or {}).get("dateTime"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for ev in result.get("value", [])
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Contacts Adapter
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class ContactsAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
|
"""ServiceAdapter for Outlook Contacts via Microsoft Graph.
|
||||||
|
|
||||||
|
Path conventions:
|
||||||
|
``""`` -> list contact folders (default + custom)
|
||||||
|
``"/<folderId>"`` -> list contacts in that folder; the
|
||||||
|
virtual id ``default`` maps to
|
||||||
|
``/me/contacts`` (the user's primary
|
||||||
|
contact list)
|
||||||
|
``"/<folderId>/<contactId>"`` -> reserved for future detail browse
|
||||||
|
|
||||||
|
Downloads return a synthesised vCard 3.0 (.vcf) since Microsoft Graph
|
||||||
|
does not expose a ``/$value`` endpoint for contacts.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_DEFAULT_CONTACT_LIMIT = 200
|
||||||
|
_MAX_CONTACT_LIMIT = 1000
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
_DEFAULT_FOLDER_ID = "default"
|
||||||
|
|
||||||
|
async def browse(
|
||||||
|
self,
|
||||||
|
path: str,
|
||||||
|
filter: Optional[str] = None,
|
||||||
|
limit: Optional[int] = None,
|
||||||
|
) -> List[ExternalEntry]:
|
||||||
|
cleanPath = (path or "").strip("/")
|
||||||
|
if not cleanPath:
|
||||||
|
folders: List[ExternalEntry] = [
|
||||||
|
ExternalEntry(
|
||||||
|
name="Kontakte",
|
||||||
|
path=f"/{self._DEFAULT_FOLDER_ID}",
|
||||||
|
isFolder=True,
|
||||||
|
metadata={"id": self._DEFAULT_FOLDER_ID, "isDefault": True},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
result = await self._graphGet("me/contactFolders?$top=100")
|
||||||
|
if "error" not in result:
|
||||||
|
for f in result.get("value", []):
|
||||||
|
folders.append(
|
||||||
|
ExternalEntry(
|
||||||
|
name=f.get("displayName", ""),
|
||||||
|
path=f"/{f.get('id', '')}",
|
||||||
|
isFolder=True,
|
||||||
|
metadata={"id": f.get("id"), "parentFolderId": f.get("parentFolderId")},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(f"MSFT contactFolders list failed: {result['error']}")
|
||||||
|
return folders
|
||||||
|
|
||||||
|
folderId = cleanPath.split("/", 1)[0]
|
||||||
|
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
|
||||||
|
pageSize = min(self._PAGE_SIZE, effectiveLimit)
|
||||||
|
if folderId == self._DEFAULT_FOLDER_ID:
|
||||||
|
endpoint: Optional[str] = f"me/contacts?$top={pageSize}&$orderby=displayName"
|
||||||
|
else:
|
||||||
|
endpoint = f"me/contactFolders/{folderId}/contacts?$top={pageSize}&$orderby=displayName"
|
||||||
|
|
||||||
|
contacts: List[Dict[str, Any]] = []
|
||||||
|
while endpoint and len(contacts) < effectiveLimit:
|
||||||
|
result = await self._graphGet(endpoint)
|
||||||
|
if "error" in result:
|
||||||
|
logger.warning(f"MSFT contacts list failed: {result['error']}")
|
||||||
|
break
|
||||||
|
for c in result.get("value", []):
|
||||||
|
contacts.append(c)
|
||||||
|
if len(contacts) >= effectiveLimit:
|
||||||
|
break
|
||||||
|
nextLink = result.get("@odata.nextLink")
|
||||||
|
endpoint = _stripGraphBase(nextLink) if nextLink else None
|
||||||
|
|
||||||
|
return [
|
||||||
|
ExternalEntry(
|
||||||
|
name=c.get("displayName") or _personLabel(c) or "(no name)",
|
||||||
|
path=f"/{folderId}/{c.get('id', '')}",
|
||||||
|
isFolder=False,
|
||||||
|
mimeType="text/vcard",
|
||||||
|
metadata={
|
||||||
|
"id": c.get("id"),
|
||||||
|
"givenName": c.get("givenName"),
|
||||||
|
"surname": c.get("surname"),
|
||||||
|
"companyName": c.get("companyName"),
|
||||||
|
"emailAddresses": [e.get("address") for e in (c.get("emailAddresses") or []) if e.get("address")],
|
||||||
|
"businessPhones": c.get("businessPhones") or [],
|
||||||
|
"mobilePhone": c.get("mobilePhone"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for c in contacts
|
||||||
|
]
|
||||||
|
|
||||||
|
async def download(self, path: str) -> DownloadResult:
|
||||||
|
cleanPath = (path or "").strip("/")
|
||||||
|
if "/" not in cleanPath:
|
||||||
|
return DownloadResult()
|
||||||
|
contactId = cleanPath.split("/")[-1]
|
||||||
|
c = await self._graphGet(f"me/contacts/{contactId}")
|
||||||
|
if "error" in c:
|
||||||
|
logger.warning(f"MSFT contact fetch failed: {c['error']}")
|
||||||
|
return DownloadResult()
|
||||||
|
vcfBytes = _contactToVcard(c)
|
||||||
|
label = c.get("displayName") or _personLabel(c) or contactId
|
||||||
|
safeName = _safeFileName(label) or "contact"
|
||||||
|
return DownloadResult(
|
||||||
|
data=vcfBytes,
|
||||||
|
fileName=f"{safeName}.vcf",
|
||||||
|
mimeType="text/vcard",
|
||||||
|
)
|
||||||
|
|
||||||
|
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
|
||||||
|
return {"error": "Contacts upload not supported"}
|
||||||
|
|
||||||
|
async def search(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
path: Optional[str] = None,
|
||||||
|
limit: Optional[int] = None,
|
||||||
|
) -> List[ExternalEntry]:
|
||||||
|
safeQuery = query.replace("'", "''")
|
||||||
|
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
|
||||||
|
endpoint = f"me/contacts?$search=\"{safeQuery}\"&$top={effectiveLimit}"
|
||||||
|
result = await self._graphGet(endpoint)
|
||||||
|
if "error" in result:
|
||||||
|
return []
|
||||||
|
return [
|
||||||
|
ExternalEntry(
|
||||||
|
name=c.get("displayName") or _personLabel(c) or "(no name)",
|
||||||
|
path=f"/search/{c.get('id', '')}",
|
||||||
|
isFolder=False,
|
||||||
|
mimeType="text/vcard",
|
||||||
|
metadata={"id": c.get("id")},
|
||||||
|
)
|
||||||
|
for c in result.get("value", [])
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# MsftConnector (1:n)
|
# MsftConnector (1:n)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -853,6 +1155,8 @@ class MsftConnector(ProviderConnector):
|
||||||
"outlook": OutlookAdapter,
|
"outlook": OutlookAdapter,
|
||||||
"teams": TeamsAdapter,
|
"teams": TeamsAdapter,
|
||||||
"onedrive": OneDriveAdapter,
|
"onedrive": OneDriveAdapter,
|
||||||
|
"calendar": CalendarAdapter,
|
||||||
|
"contact": ContactsAdapter,
|
||||||
}
|
}
|
||||||
|
|
||||||
def getAvailableServices(self) -> List[str]:
|
def getAvailableServices(self) -> List[str]:
|
||||||
|
|
@ -891,3 +1195,143 @@ def _matchFilter(entry: ExternalEntry, pattern: str) -> bool:
|
||||||
"""Simple glob-like filter (supports * wildcard)."""
|
"""Simple glob-like filter (supports * wildcard)."""
|
||||||
import fnmatch
|
import fnmatch
|
||||||
return fnmatch.fnmatch(entry.name.lower(), pattern.lower())
|
return fnmatch.fnmatch(entry.name.lower(), pattern.lower())
|
||||||
|
|
||||||
|
|
||||||
|
def _safeFileName(name: str) -> str:
|
||||||
|
"""Strip path-unsafe characters and trim length so the result is a usable file name."""
|
||||||
|
import re
|
||||||
|
return re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", name or "")[:80].strip(". ")
|
||||||
|
|
||||||
|
|
||||||
|
def _personLabel(contact: Dict[str, Any]) -> str:
|
||||||
|
given = (contact.get("givenName") or "").strip()
|
||||||
|
surname = (contact.get("surname") or "").strip()
|
||||||
|
if given or surname:
|
||||||
|
return f"{given} {surname}".strip()
|
||||||
|
company = (contact.get("companyName") or "").strip()
|
||||||
|
return company
|
||||||
|
|
||||||
|
|
||||||
|
def _icsEscape(value: str) -> str:
|
||||||
|
"""Escape RFC 5545 reserved characters in TEXT properties."""
|
||||||
|
if value is None:
|
||||||
|
return ""
|
||||||
|
return (
|
||||||
|
value.replace("\\", "\\\\")
|
||||||
|
.replace(";", "\\;")
|
||||||
|
.replace(",", "\\,")
|
||||||
|
.replace("\r\n", "\\n")
|
||||||
|
.replace("\n", "\\n")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _icsDateTime(value: Optional[str]) -> Optional[str]:
|
||||||
|
"""Convert an ISO datetime string to an RFC 5545 DATE-TIME value (UTC)."""
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
try:
|
||||||
|
normalized = value.replace("Z", "+00:00") if value.endswith("Z") else value
|
||||||
|
dt = datetime.fromisoformat(normalized)
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
|
return dt.astimezone(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _eventToIcs(event: Dict[str, Any]) -> bytes:
|
||||||
|
"""Build a minimal RFC 5545 VCALENDAR/VEVENT for a Graph event payload."""
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
uid = event.get("iCalUId") or event.get("id") or "unknown@poweron"
|
||||||
|
summary = _icsEscape(event.get("subject") or "")
|
||||||
|
location = _icsEscape((event.get("location") or {}).get("displayName") or "")
|
||||||
|
body = (event.get("body") or {}).get("content") or ""
|
||||||
|
description = _icsEscape(body)
|
||||||
|
dtstart = _icsDateTime((event.get("start") or {}).get("dateTime"))
|
||||||
|
dtend = _icsDateTime((event.get("end") or {}).get("dateTime"))
|
||||||
|
dtstamp = _icsDateTime(event.get("lastModifiedDateTime")) or datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
"BEGIN:VCALENDAR",
|
||||||
|
"VERSION:2.0",
|
||||||
|
"PRODID:-//PowerOn//MSFT-Calendar-Adapter//EN",
|
||||||
|
"CALSCALE:GREGORIAN",
|
||||||
|
"BEGIN:VEVENT",
|
||||||
|
f"UID:{uid}",
|
||||||
|
f"DTSTAMP:{dtstamp}",
|
||||||
|
]
|
||||||
|
if dtstart:
|
||||||
|
lines.append(f"DTSTART:{dtstart}")
|
||||||
|
if dtend:
|
||||||
|
lines.append(f"DTEND:{dtend}")
|
||||||
|
if summary:
|
||||||
|
lines.append(f"SUMMARY:{summary}")
|
||||||
|
if location:
|
||||||
|
lines.append(f"LOCATION:{location}")
|
||||||
|
if description:
|
||||||
|
lines.append(f"DESCRIPTION:{description}")
|
||||||
|
organizer = (event.get("organizer") or {}).get("emailAddress", {}).get("address")
|
||||||
|
if organizer:
|
||||||
|
lines.append(f"ORGANIZER:mailto:{organizer}")
|
||||||
|
for att in (event.get("attendees") or []):
|
||||||
|
addr = (att.get("emailAddress") or {}).get("address")
|
||||||
|
if addr:
|
||||||
|
lines.append(f"ATTENDEE:mailto:{addr}")
|
||||||
|
lines.append("END:VEVENT")
|
||||||
|
lines.append("END:VCALENDAR")
|
||||||
|
return ("\r\n".join(lines) + "\r\n").encode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def _contactToVcard(contact: Dict[str, Any]) -> bytes:
|
||||||
|
"""Build a vCard 3.0 from a Graph /me/contacts payload."""
|
||||||
|
given = contact.get("givenName") or ""
|
||||||
|
surname = contact.get("surname") or ""
|
||||||
|
middle = contact.get("middleName") or ""
|
||||||
|
fn = contact.get("displayName") or _personLabel(contact) or contact.get("companyName") or ""
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
"BEGIN:VCARD",
|
||||||
|
"VERSION:3.0",
|
||||||
|
f"N:{surname};{given};{middle};;",
|
||||||
|
f"FN:{fn}",
|
||||||
|
]
|
||||||
|
if contact.get("companyName"):
|
||||||
|
org = contact["companyName"]
|
||||||
|
if contact.get("department"):
|
||||||
|
org = f"{org};{contact['department']}"
|
||||||
|
lines.append(f"ORG:{org}")
|
||||||
|
if contact.get("jobTitle"):
|
||||||
|
lines.append(f"TITLE:{contact['jobTitle']}")
|
||||||
|
for em in (contact.get("emailAddresses") or []):
|
||||||
|
addr = em.get("address")
|
||||||
|
if addr:
|
||||||
|
lines.append(f"EMAIL;TYPE=INTERNET:{addr}")
|
||||||
|
for phone in (contact.get("businessPhones") or []):
|
||||||
|
if phone:
|
||||||
|
lines.append(f"TEL;TYPE=WORK,VOICE:{phone}")
|
||||||
|
if contact.get("mobilePhone"):
|
||||||
|
lines.append(f"TEL;TYPE=CELL,VOICE:{contact['mobilePhone']}")
|
||||||
|
for phone in (contact.get("homePhones") or []):
|
||||||
|
if phone:
|
||||||
|
lines.append(f"TEL;TYPE=HOME,VOICE:{phone}")
|
||||||
|
|
||||||
|
def _appendAddress(addr: Dict[str, Any], typ: str) -> None:
|
||||||
|
if not addr:
|
||||||
|
return
|
||||||
|
street = addr.get("street") or ""
|
||||||
|
city = addr.get("city") or ""
|
||||||
|
state = addr.get("state") or ""
|
||||||
|
postal = addr.get("postalCode") or ""
|
||||||
|
country = addr.get("countryOrRegion") or ""
|
||||||
|
if any([street, city, state, postal, country]):
|
||||||
|
lines.append(f"ADR;TYPE={typ}:;;{street};{city};{state};{postal};{country}")
|
||||||
|
|
||||||
|
_appendAddress(contact.get("businessAddress") or {}, "WORK")
|
||||||
|
_appendAddress(contact.get("homeAddress") or {}, "HOME")
|
||||||
|
_appendAddress(contact.get("otherAddress") or {}, "OTHER")
|
||||||
|
if contact.get("personalNotes"):
|
||||||
|
lines.append(f"NOTE:{_icsEscape(contact['personalNotes'])}")
|
||||||
|
lines.append(f"UID:{contact.get('id', '')}")
|
||||||
|
lines.append("END:VCARD")
|
||||||
|
return ("\r\n".join(lines) + "\r\n").encode("utf-8")
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,12 @@ class DataSource(PowerOnModel):
|
||||||
json_schema_extra={"label": "Verbindungs-ID", "fk_target": {"db": "poweron_app", "table": "UserConnection", "labelField": "externalUsername"}},
|
json_schema_extra={"label": "Verbindungs-ID", "fk_target": {"db": "poweron_app", "table": "UserConnection", "labelField": "externalUsername"}},
|
||||||
)
|
)
|
||||||
sourceType: str = Field(
|
sourceType: str = Field(
|
||||||
description="sharepointFolder, googleDriveFolder, outlookFolder, ftpFolder, clickupList (path under /team/...)",
|
description=(
|
||||||
|
"sharepointFolder, onedriveFolder, googleDriveFolder, "
|
||||||
|
"outlookFolder, gmailFolder, ftpFolder, clickupList "
|
||||||
|
"(path under /team/...), kdriveFolder, calendarFolder, "
|
||||||
|
"contactFolder"
|
||||||
|
),
|
||||||
json_schema_extra={"label": "Quellentyp"},
|
json_schema_extra={"label": "Quellentyp"},
|
||||||
)
|
)
|
||||||
path: str = Field(
|
path: str = Field(
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,13 @@
|
||||||
Document reference models for typed document references in workflows.
|
Document reference models for typed document references in workflows.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import List, Optional
|
import logging
|
||||||
|
from typing import Any, List, Optional
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from modules.shared.i18nRegistry import i18nModel
|
from modules.shared.i18nRegistry import i18nModel
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class DocumentReference(BaseModel):
|
class DocumentReference(BaseModel):
|
||||||
"""Base class for document references"""
|
"""Base class for document references"""
|
||||||
|
|
@ -115,3 +118,86 @@ class DocumentReferenceList(BaseModel):
|
||||||
references.append(DocumentListReference(label=refStr))
|
references.append(DocumentListReference(label=refStr))
|
||||||
|
|
||||||
return cls(references=references)
|
return cls(references=references)
|
||||||
|
|
||||||
|
|
||||||
|
def coerceDocumentReferenceList(value: Any) -> DocumentReferenceList:
|
||||||
|
"""Tolerant coercion of any agent/UI-supplied document list to
|
||||||
|
:class:`DocumentReferenceList`.
|
||||||
|
|
||||||
|
Accepts the canonical formats plus the dict-wrapper shapes that
|
||||||
|
LLM tool-callers tend to generate when they see a
|
||||||
|
``type=DocumentList`` parameter:
|
||||||
|
|
||||||
|
* ``None`` / ``""`` -> empty list
|
||||||
|
* :class:`DocumentReferenceList` -> as-is
|
||||||
|
* ``str`` -> single-element string list
|
||||||
|
* ``list[str]`` -> :meth:`from_string_list`
|
||||||
|
* ``list[dict]`` with ``id`` or ``documentId`` -> item references
|
||||||
|
* ``{"documents": [...]}`` / ``{"references": [...]}`` ->
|
||||||
|
recurse into the inner list (this is the shape LLMs love)
|
||||||
|
* ``{"id": "..."}`` / ``{"documentId": "..."}`` -> single
|
||||||
|
item reference
|
||||||
|
* any unrecognised input -> empty list with a WARN log; never
|
||||||
|
raises (the caller decides whether an empty list is fatal).
|
||||||
|
"""
|
||||||
|
if value is None or value == "":
|
||||||
|
return DocumentReferenceList(references=[])
|
||||||
|
if isinstance(value, DocumentReferenceList):
|
||||||
|
return value
|
||||||
|
if isinstance(value, str):
|
||||||
|
return DocumentReferenceList.from_string_list([value])
|
||||||
|
|
||||||
|
if isinstance(value, dict):
|
||||||
|
for innerKey in ("documents", "references", "items", "files"):
|
||||||
|
if innerKey in value and isinstance(value[innerKey], list):
|
||||||
|
return coerceDocumentReferenceList(value[innerKey])
|
||||||
|
docId = value.get("documentId") or value.get("id")
|
||||||
|
if docId:
|
||||||
|
return DocumentReferenceList(references=[
|
||||||
|
DocumentItemReference(
|
||||||
|
documentId=str(docId),
|
||||||
|
fileName=value.get("fileName") or value.get("name"),
|
||||||
|
)
|
||||||
|
])
|
||||||
|
logger.warning(
|
||||||
|
f"coerceDocumentReferenceList: unsupported dict shape "
|
||||||
|
f"(keys={list(value.keys())}); returning empty list."
|
||||||
|
)
|
||||||
|
return DocumentReferenceList(references=[])
|
||||||
|
|
||||||
|
if isinstance(value, list):
|
||||||
|
if not value:
|
||||||
|
return DocumentReferenceList(references=[])
|
||||||
|
first = value[0]
|
||||||
|
if isinstance(first, str):
|
||||||
|
return DocumentReferenceList.from_string_list(value)
|
||||||
|
if isinstance(first, dict):
|
||||||
|
references: List[DocumentReference] = []
|
||||||
|
for item in value:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
docId = item.get("documentId") or item.get("id")
|
||||||
|
if docId:
|
||||||
|
references.append(DocumentItemReference(
|
||||||
|
documentId=str(docId),
|
||||||
|
fileName=item.get("fileName") or item.get("name"),
|
||||||
|
))
|
||||||
|
elif item.get("label"):
|
||||||
|
references.append(DocumentListReference(
|
||||||
|
label=str(item["label"]),
|
||||||
|
messageId=item.get("messageId"),
|
||||||
|
))
|
||||||
|
return DocumentReferenceList(references=references)
|
||||||
|
# Mixed/object list (e.g. inline ActionDocument-like): caller
|
||||||
|
# must pre-handle that case before calling this coercer.
|
||||||
|
logger.warning(
|
||||||
|
f"coerceDocumentReferenceList: list element type "
|
||||||
|
f"{type(first).__name__} not recognised; returning empty list."
|
||||||
|
)
|
||||||
|
return DocumentReferenceList(references=[])
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
f"coerceDocumentReferenceList: unsupported value type "
|
||||||
|
f"{type(value).__name__}; returning empty list."
|
||||||
|
)
|
||||||
|
return DocumentReferenceList(references=[])
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,14 @@ class ExtractionOptions(BaseModel):
|
||||||
imageQuality: int = Field(default=85, ge=1, le=100, description="Image quality (1-100)")
|
imageQuality: int = Field(default=85, ge=1, le=100, description="Image quality (1-100)")
|
||||||
|
|
||||||
# Merging strategy
|
# Merging strategy
|
||||||
mergeStrategy: MergeStrategy = Field(default_factory=MergeStrategy, description="Strategy for merging extraction results")
|
mergeStrategy: Optional[MergeStrategy] = Field(
|
||||||
|
default_factory=MergeStrategy,
|
||||||
|
description=(
|
||||||
|
"Strategy for merging extraction results. Pass None to skip merging entirely "
|
||||||
|
"(required for per-chunk ingestion pipelines like RAG, where per-page/per-section "
|
||||||
|
"granularity must be preserved for embedding)."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
# Optional chunking parameters (for backward compatibility)
|
# Optional chunking parameters (for backward compatibility)
|
||||||
chunkAllowed: Optional[bool] = Field(default=None, description="Whether chunking is allowed")
|
chunkAllowed: Optional[bool] = Field(default=None, description="Whether chunking is allowed")
|
||||||
|
|
|
||||||
|
|
@ -90,6 +90,16 @@ class FileContentIndex(PowerOnModel):
|
||||||
description="Data visibility scope: personal, featureInstance, mandate, global",
|
description="Data visibility scope: personal, featureInstance, mandate, global",
|
||||||
json_schema_extra={"label": "Sichtbarkeit"},
|
json_schema_extra={"label": "Sichtbarkeit"},
|
||||||
)
|
)
|
||||||
|
sourceKind: str = Field(
|
||||||
|
default="file",
|
||||||
|
description="Origin of the indexed content: file, sharepoint_item, outlook_message, outlook_attachment, ...",
|
||||||
|
json_schema_extra={"label": "Quellenart"},
|
||||||
|
)
|
||||||
|
connectionId: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="UserConnection ID if this index entry originates from an external connector",
|
||||||
|
json_schema_extra={"label": "Connection-ID"},
|
||||||
|
)
|
||||||
neutralizationStatus: Optional[str] = Field(
|
neutralizationStatus: Optional[str] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Neutralization status: completed, failed, skipped, None = not required",
|
description="Neutralization status: completed, failed, skipped, None = not required",
|
||||||
|
|
|
||||||
|
|
@ -475,7 +475,23 @@ class UserConnection(PowerOnModel):
|
||||||
description="OAuth scopes granted for this connection",
|
description="OAuth scopes granted for this connection",
|
||||||
json_schema_extra={"frontend_type": "list", "frontend_readonly": True, "frontend_required": False, "label": "Gewährte Berechtigungen"},
|
json_schema_extra={"frontend_type": "list", "frontend_readonly": True, "frontend_required": False, "label": "Gewährte Berechtigungen"},
|
||||||
)
|
)
|
||||||
|
knowledgeIngestionEnabled: bool = Field(
|
||||||
|
default=False,
|
||||||
|
description="Whether the user has consented to knowledge ingestion for this connection",
|
||||||
|
json_schema_extra={"frontend_type": "boolean", "frontend_readonly": False, "frontend_required": False, "label": "Wissensdatenbank aktiv"},
|
||||||
|
)
|
||||||
|
knowledgePreferences: Optional[Dict[str, Any]] = Field(
|
||||||
|
default=None,
|
||||||
|
description=(
|
||||||
|
"Per-connection knowledge ingestion preferences. schemaVersion=1 keys: "
|
||||||
|
"neutralizeBeforeEmbed (bool), mailContentDepth (metadata|snippet|full), "
|
||||||
|
"mailIndexAttachments (bool), filesIndexBinaries (bool), mimeAllowlist (list[str]), "
|
||||||
|
"clickupScope (titles|title_description|with_comments), "
|
||||||
|
"surfaceToggles (dict per authority), maxAgeDays (int)."
|
||||||
|
),
|
||||||
|
json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False, "label": "Wissenspräferenzen"},
|
||||||
|
)
|
||||||
|
|
||||||
@computed_field
|
@computed_field
|
||||||
@property
|
@property
|
||||||
def connectionReference(self) -> str:
|
def connectionReference(self) -> str:
|
||||||
|
|
|
||||||
|
|
@ -174,14 +174,26 @@ async def indexSessionData(
|
||||||
for c in chunks
|
for c in chunks
|
||||||
]
|
]
|
||||||
|
|
||||||
await knowledgeService.indexFile(
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
fileId=syntheticFileId,
|
|
||||||
fileName=f"coaching-session-{sessionId[:8]}",
|
await knowledgeService.requestIngestion(
|
||||||
mimeType="application/x-coaching-session",
|
IngestionJob(
|
||||||
userId=userId,
|
sourceKind="coaching_session",
|
||||||
featureInstanceId=featureInstanceId,
|
sourceId=syntheticFileId,
|
||||||
mandateId=mandateId,
|
fileName=f"coaching-session-{sessionId[:8]}",
|
||||||
contentObjects=contentObjects,
|
mimeType="application/x-coaching-session",
|
||||||
|
userId=userId,
|
||||||
|
featureInstanceId=featureInstanceId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
contentObjects=contentObjects,
|
||||||
|
provenance={
|
||||||
|
"lane": "feature",
|
||||||
|
"feature": "commcoach",
|
||||||
|
"sessionId": sessionId,
|
||||||
|
"contextId": contextId,
|
||||||
|
"messageCount": len(messages or []),
|
||||||
|
},
|
||||||
|
)
|
||||||
)
|
)
|
||||||
logger.info(f"Successfully indexed coaching session {sessionId} ({len(chunks)} chunks)")
|
logger.info(f"Successfully indexed coaching session {sessionId} ({len(chunks)} chunks)")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -1160,6 +1160,9 @@ async def list_connection_services(
|
||||||
"drive": "Google Drive",
|
"drive": "Google Drive",
|
||||||
"gmail": "Gmail",
|
"gmail": "Gmail",
|
||||||
"files": "Files (FTP)",
|
"files": "Files (FTP)",
|
||||||
|
"kdrive": "kDrive",
|
||||||
|
"calendar": "Calendar",
|
||||||
|
"contact": "Contacts",
|
||||||
}
|
}
|
||||||
_serviceIcons = {
|
_serviceIcons = {
|
||||||
"sharepoint": "sharepoint",
|
"sharepoint": "sharepoint",
|
||||||
|
|
@ -1170,6 +1173,9 @@ async def list_connection_services(
|
||||||
"drive": "cloud",
|
"drive": "cloud",
|
||||||
"gmail": "mail",
|
"gmail": "mail",
|
||||||
"files": "folder",
|
"files": "folder",
|
||||||
|
"kdrive": "cloud",
|
||||||
|
"calendar": "calendar",
|
||||||
|
"contact": "contact",
|
||||||
}
|
}
|
||||||
items = [
|
items = [
|
||||||
{"service": s, "label": _serviceLabels.get(s, s), "icon": _serviceIcons.get(s, "folder")}
|
{"service": s, "label": _serviceLabels.get(s, s), "icon": _serviceIcons.get(s, "folder")}
|
||||||
|
|
|
||||||
|
|
@ -188,6 +188,9 @@ _SOURCE_TYPE_TO_SERVICE = {
|
||||||
"gmailFolder": "gmail",
|
"gmailFolder": "gmail",
|
||||||
"ftpFolder": "files",
|
"ftpFolder": "files",
|
||||||
"clickupList": "clickup",
|
"clickupList": "clickup",
|
||||||
|
"kdriveFolder": "kdrive",
|
||||||
|
"calendarFolder": "calendar",
|
||||||
|
"contactFolder": "contact",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1818,6 +1821,9 @@ async def listConnectionServices(
|
||||||
"drive": "Google Drive",
|
"drive": "Google Drive",
|
||||||
"gmail": "Gmail",
|
"gmail": "Gmail",
|
||||||
"files": "Files (FTP)",
|
"files": "Files (FTP)",
|
||||||
|
"kdrive": "kDrive",
|
||||||
|
"calendar": "Calendar",
|
||||||
|
"contact": "Contacts",
|
||||||
}
|
}
|
||||||
_serviceIcons = {
|
_serviceIcons = {
|
||||||
"sharepoint": "sharepoint",
|
"sharepoint": "sharepoint",
|
||||||
|
|
@ -1827,6 +1833,9 @@ async def listConnectionServices(
|
||||||
"drive": "cloud",
|
"drive": "cloud",
|
||||||
"gmail": "mail",
|
"gmail": "mail",
|
||||||
"files": "folder",
|
"files": "folder",
|
||||||
|
"kdrive": "cloud",
|
||||||
|
"calendar": "calendar",
|
||||||
|
"contact": "contact",
|
||||||
}
|
}
|
||||||
items = [
|
items = [
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1268,19 +1268,7 @@ class AppObjects:
|
||||||
result = []
|
result = []
|
||||||
for conn_dict in connections:
|
for conn_dict in connections:
|
||||||
try:
|
try:
|
||||||
# Create UserConnection object
|
connection = UserConnection.model_validate(conn_dict)
|
||||||
connection = UserConnection(
|
|
||||||
id=conn_dict["id"],
|
|
||||||
userId=conn_dict["userId"],
|
|
||||||
authority=conn_dict.get("authority"),
|
|
||||||
externalId=conn_dict.get("externalId", ""),
|
|
||||||
externalUsername=conn_dict.get("externalUsername", ""),
|
|
||||||
externalEmail=conn_dict.get("externalEmail"),
|
|
||||||
status=conn_dict.get("status", "pending"),
|
|
||||||
connectedAt=conn_dict.get("connectedAt"),
|
|
||||||
lastChecked=conn_dict.get("lastChecked"),
|
|
||||||
expiresAt=conn_dict.get("expiresAt"),
|
|
||||||
)
|
|
||||||
result.append(connection)
|
result.append(connection)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
|
|
@ -1293,6 +1281,28 @@ class AppObjects:
|
||||||
logger.error(f"Error getting user connections: {str(e)}")
|
logger.error(f"Error getting user connections: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def getActiveKnowledgeConnections(self) -> List[UserConnection]:
|
||||||
|
"""Return all UserConnections with knowledgeIngestionEnabled=True and status=active.
|
||||||
|
|
||||||
|
Used by the daily re-sync scheduler to determine which connections to re-index.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
rows = self.db.getRecordset(
|
||||||
|
UserConnection,
|
||||||
|
recordFilter={"knowledgeIngestionEnabled": True, "status": ConnectionStatus.ACTIVE.value},
|
||||||
|
)
|
||||||
|
result = []
|
||||||
|
for row in rows or []:
|
||||||
|
try:
|
||||||
|
conn = UserConnection.model_validate(row) if isinstance(row, dict) else row
|
||||||
|
result.append(conn)
|
||||||
|
except Exception as _e:
|
||||||
|
logger.warning(f"getActiveKnowledgeConnections: could not parse row: {_e}")
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"getActiveKnowledgeConnections failed: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
def getUserConnectionById(self, connectionId: str) -> Optional[UserConnection]:
|
def getUserConnectionById(self, connectionId: str) -> Optional[UserConnection]:
|
||||||
"""Get a single UserConnection by ID or by reference string (connection:authority:username)."""
|
"""Get a single UserConnection by ID or by reference string (connection:authority:username)."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -1317,18 +1327,21 @@ class AppObjects:
|
||||||
|
|
||||||
if connections:
|
if connections:
|
||||||
conn_dict = connections[0]
|
conn_dict = connections[0]
|
||||||
return UserConnection(
|
try:
|
||||||
id=conn_dict["id"],
|
return UserConnection.model_validate(conn_dict)
|
||||||
userId=conn_dict["userId"],
|
except Exception:
|
||||||
authority=conn_dict.get("authority"),
|
return UserConnection(
|
||||||
externalId=conn_dict.get("externalId", ""),
|
id=conn_dict["id"],
|
||||||
externalUsername=conn_dict.get("externalUsername", ""),
|
userId=conn_dict["userId"],
|
||||||
externalEmail=conn_dict.get("externalEmail"),
|
authority=conn_dict.get("authority"),
|
||||||
status=conn_dict.get("status", "pending"),
|
externalId=conn_dict.get("externalId", ""),
|
||||||
connectedAt=conn_dict.get("connectedAt"),
|
externalUsername=conn_dict.get("externalUsername", ""),
|
||||||
lastChecked=conn_dict.get("lastChecked"),
|
externalEmail=conn_dict.get("externalEmail"),
|
||||||
expiresAt=conn_dict.get("expiresAt"),
|
status=conn_dict.get("status", "pending"),
|
||||||
)
|
connectedAt=conn_dict.get("connectedAt"),
|
||||||
|
lastChecked=conn_dict.get("lastChecked"),
|
||||||
|
expiresAt=conn_dict.get("expiresAt"),
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error getting user connection by ID: {str(e)}")
|
logger.error(f"Error getting user connection by ID: {str(e)}")
|
||||||
|
|
@ -3331,7 +3344,10 @@ class AppObjects:
|
||||||
)
|
)
|
||||||
|
|
||||||
if not tokens:
|
if not tokens:
|
||||||
logger.warning(
|
# Pending connections legitimately have no token yet (PAT not
|
||||||
|
# submitted, OAuth callback not completed). Keep at DEBUG to
|
||||||
|
# avoid noisy warnings on every connection-list refresh.
|
||||||
|
logger.debug(
|
||||||
f"No connection token found for connectionId: {connectionId}"
|
f"No connection token found for connectionId: {connectionId}"
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
|
|
@ -93,6 +93,46 @@ class KnowledgeObjects:
|
||||||
self.db.recordModify(FileContentIndex, fileId, {"status": status})
|
self.db.recordModify(FileContentIndex, fileId, {"status": status})
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def deleteFileContentIndexByConnectionId(self, connectionId: str) -> Dict[str, int]:
|
||||||
|
"""Delete all FileContentIndex rows (and their ContentChunks) for a connection.
|
||||||
|
|
||||||
|
Used when a UserConnection is revoked / disconnected so the knowledge corpus
|
||||||
|
no longer references data the user no longer grants access to. Returns a dict
|
||||||
|
with counts to support observability logs.
|
||||||
|
"""
|
||||||
|
if not connectionId:
|
||||||
|
return {"indexRows": 0, "chunks": 0}
|
||||||
|
|
||||||
|
rows = self.db.getRecordset(
|
||||||
|
FileContentIndex, recordFilter={"connectionId": connectionId}
|
||||||
|
)
|
||||||
|
mandateIds: set = set()
|
||||||
|
chunkCount = 0
|
||||||
|
indexCount = 0
|
||||||
|
for row in rows:
|
||||||
|
fid = row.get("id") if isinstance(row, dict) else getattr(row, "id", None)
|
||||||
|
mid = row.get("mandateId") if isinstance(row, dict) else getattr(row, "mandateId", "")
|
||||||
|
if not fid:
|
||||||
|
continue
|
||||||
|
chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fid})
|
||||||
|
for chunk in chunks:
|
||||||
|
if self.db.recordDelete(ContentChunk, chunk["id"]):
|
||||||
|
chunkCount += 1
|
||||||
|
if self.db.recordDelete(FileContentIndex, fid):
|
||||||
|
indexCount += 1
|
||||||
|
if mid:
|
||||||
|
mandateIds.add(str(mid))
|
||||||
|
|
||||||
|
for mid in mandateIds:
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbBilling import _getRootInterface
|
||||||
|
|
||||||
|
_getRootInterface().reconcileMandateStorageBilling(mid)
|
||||||
|
except Exception as ex:
|
||||||
|
logger.warning("reconcileMandateStorageBilling after connection purge failed: %s", ex)
|
||||||
|
|
||||||
|
return {"indexRows": indexCount, "chunks": chunkCount}
|
||||||
|
|
||||||
def deleteFileContentIndex(self, fileId: str) -> bool:
|
def deleteFileContentIndex(self, fileId: str) -> bool:
|
||||||
"""Delete a FileContentIndex and all associated ContentChunks."""
|
"""Delete a FileContentIndex and all associated ContentChunks."""
|
||||||
existing = self.getFileContentIndex(fileId)
|
existing = self.getFileContentIndex(fileId)
|
||||||
|
|
|
||||||
|
|
@ -836,13 +836,25 @@ class ComponentObjects:
|
||||||
def checkForDuplicateFile(self, fileHash: str, fileName: str) -> Optional[FileItem]:
|
def checkForDuplicateFile(self, fileHash: str, fileName: str) -> Optional[FileItem]:
|
||||||
"""Checks if a file with the same hash AND fileName already exists for the current user
|
"""Checks if a file with the same hash AND fileName already exists for the current user
|
||||||
**within the same scope** (mandateId + featureInstanceId).
|
**within the same scope** (mandateId + featureInstanceId).
|
||||||
|
|
||||||
Duplicate = same user + same fileHash + same fileName + same scope.
|
Duplicate = same user + same fileHash + same fileName + same scope + RBAC-visible.
|
||||||
Same hash with different name is allowed (intentional copy by user).
|
Same hash with different name is allowed (intentional copy by user).
|
||||||
|
|
||||||
|
RBAC parity contract: this method must NEVER return a FileItem that
|
||||||
|
``getFile()`` would not return for the current user. Otherwise callers
|
||||||
|
(``saveUploadedFile`` / ``createFile``) hand back an id that the very
|
||||||
|
next ``updateFile`` / ``getFile`` then rejects with
|
||||||
|
``File with ID ... not found`` -- the well-known "ghost duplicate"
|
||||||
|
symptom seen when ``interfaceDbComponent`` is initialised without an
|
||||||
|
``featureInstanceId`` (e.g. via ``serviceHub``) but a same-hash+name
|
||||||
|
file exists in another featureInstance under the same mandate.
|
||||||
|
We therefore cross-check the candidate through the RBAC-aware ``getFile``
|
||||||
|
before returning it; if RBAC blocks it, we treat it as "no duplicate
|
||||||
|
for this scope" and the caller will create a fresh per-scope copy.
|
||||||
"""
|
"""
|
||||||
if not self.userId:
|
if not self.userId:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
recordFilter: dict = {
|
recordFilter: dict = {
|
||||||
"sysCreatedBy": self.userId,
|
"sysCreatedBy": self.userId,
|
||||||
"fileHash": fileHash,
|
"fileHash": fileHash,
|
||||||
|
|
@ -857,10 +869,10 @@ class ComponentObjects:
|
||||||
FileItem,
|
FileItem,
|
||||||
recordFilter=recordFilter,
|
recordFilter=recordFilter,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not matchingFiles:
|
if not matchingFiles:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
file = matchingFiles[0]
|
file = matchingFiles[0]
|
||||||
fileId = file["id"]
|
fileId = file["id"]
|
||||||
|
|
||||||
|
|
@ -869,16 +881,17 @@ class ComponentObjects:
|
||||||
logger.warning(f"Duplicate FileItem {fileId} found but FileData missing — treating as new file")
|
logger.warning(f"Duplicate FileItem {fileId} found but FileData missing — treating as new file")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return FileItem(
|
rbacVisible = self.getFile(fileId)
|
||||||
id=fileId,
|
if rbacVisible is None:
|
||||||
mandateId=file.get("mandateId", ""),
|
logger.info(
|
||||||
featureInstanceId=file.get("featureInstanceId", ""),
|
f"Duplicate FileItem {fileId} ('{fileName}', hash {fileHash[:12]}...) found via "
|
||||||
fileName=file["fileName"],
|
f"sysCreatedBy+hash+name match but is not RBAC-visible in current scope "
|
||||||
mimeType=file["mimeType"],
|
f"(mandateId={self.mandateId or '-'}, featureInstanceId={self.featureInstanceId or '-'}). "
|
||||||
fileHash=file["fileHash"],
|
f"Treating as no-duplicate so a fresh per-scope copy gets created."
|
||||||
fileSize=file["fileSize"],
|
)
|
||||||
sysCreatedAt=file.get("sysCreatedAt"),
|
return None
|
||||||
)
|
|
||||||
|
return rbacVisible
|
||||||
|
|
||||||
# Class-level cache — built once from the ExtractorRegistry
|
# Class-level cache — built once from the ExtractorRegistry
|
||||||
_extensionToMime: Optional[Dict[str, str]] = None
|
_extensionToMime: Optional[Dict[str, str]] = None
|
||||||
|
|
|
||||||
|
|
@ -351,11 +351,18 @@ def create_connection(
|
||||||
externalUsername="", # Will be set after OAuth
|
externalUsername="", # Will be set after OAuth
|
||||||
status=ConnectionStatus.PENDING # Start with PENDING status
|
status=ConnectionStatus.PENDING # Start with PENDING status
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Apply knowledge consent + preferences from request body before persisting
|
||||||
|
knowledge_enabled = connection_data.get("knowledgeIngestionEnabled")
|
||||||
|
if isinstance(knowledge_enabled, bool):
|
||||||
|
connection.knowledgeIngestionEnabled = knowledge_enabled
|
||||||
|
knowledge_prefs = connection_data.get("knowledgePreferences")
|
||||||
|
if isinstance(knowledge_prefs, dict):
|
||||||
|
connection.knowledgePreferences = knowledge_prefs
|
||||||
|
|
||||||
# Save connection record - models now handle timestamp serialization automatically
|
# Save connection record - models now handle timestamp serialization automatically
|
||||||
interface.db.recordModify(UserConnection, connection.id, connection.model_dump())
|
interface.db.recordModify(UserConnection, connection.id, connection.model_dump())
|
||||||
|
|
||||||
|
|
||||||
return connection
|
return connection
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
|
|
@ -484,16 +491,23 @@ def update_connection(
|
||||||
def connect_service(
|
def connect_service(
|
||||||
request: Request,
|
request: Request,
|
||||||
connectionId: str = Path(..., description="The ID of the connection to connect"),
|
connectionId: str = Path(..., description="The ID of the connection to connect"),
|
||||||
|
body: Optional[Dict[str, Any]] = Body(default=None),
|
||||||
currentUser: User = Depends(getCurrentUser)
|
currentUser: User = Depends(getCurrentUser)
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Connect a service for the current user
|
"""Connect a service for the current user.
|
||||||
|
|
||||||
|
Optional body: ``{"reauth": true}`` -- forces the OAuth provider to re-show
|
||||||
|
the consent screen, which is required when new scopes have been added (e.g.
|
||||||
|
Calendar + Contacts after the connection was first created). Without this
|
||||||
|
flag the provider silently re-uses the previous consent and never grants
|
||||||
|
the new scopes, leaving the connection in a degraded state.
|
||||||
|
|
||||||
SECURITY: This endpoint is secure - users can only connect their own connections.
|
SECURITY: This endpoint is secure - users can only connect their own connections.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
interface = getInterface(currentUser)
|
interface = getInterface(currentUser)
|
||||||
|
|
||||||
# Find the connection
|
# Find the connection
|
||||||
connection = None
|
connection = None
|
||||||
# SECURITY FIX: All users (including admins) can only connect their own connections
|
# SECURITY FIX: All users (including admins) can only connect their own connections
|
||||||
|
|
@ -503,29 +517,40 @@ def connect_service(
|
||||||
if conn.id == connectionId:
|
if conn.id == connectionId:
|
||||||
connection = conn
|
connection = conn
|
||||||
break
|
break
|
||||||
|
|
||||||
if not connection:
|
if not connection:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_404_NOT_FOUND,
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
detail=routeApiMsg("Connection not found")
|
detail=routeApiMsg("Connection not found")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
reauth = bool((body or {}).get("reauth")) if isinstance(body, dict) else False
|
||||||
|
reauthSuffix = "&reauth=1" if reauth else ""
|
||||||
|
|
||||||
# Data-app OAuth (JWT state issued server-side in /auth/connect)
|
# Data-app OAuth (JWT state issued server-side in /auth/connect)
|
||||||
auth_url = None
|
auth_url = None
|
||||||
if connection.authority == AuthAuthority.MSFT:
|
if connection.authority == AuthAuthority.MSFT:
|
||||||
auth_url = f"/api/msft/auth/connect?connectionId={quote(connectionId, safe='')}"
|
auth_url = f"/api/msft/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}"
|
||||||
elif connection.authority == AuthAuthority.GOOGLE:
|
elif connection.authority == AuthAuthority.GOOGLE:
|
||||||
auth_url = f"/api/google/auth/connect?connectionId={quote(connectionId, safe='')}"
|
auth_url = f"/api/google/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}"
|
||||||
elif connection.authority == AuthAuthority.CLICKUP:
|
elif connection.authority == AuthAuthority.CLICKUP:
|
||||||
auth_url = f"/api/clickup/auth/connect?connectionId={quote(connectionId, safe='')}"
|
auth_url = f"/api/clickup/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}"
|
||||||
elif connection.authority == AuthAuthority.INFOMANIAK:
|
elif connection.authority == AuthAuthority.INFOMANIAK:
|
||||||
auth_url = f"/api/infomaniak/auth/connect?connectionId={quote(connectionId, safe='')}"
|
# Infomaniak does not use OAuth for data access; the frontend posts a
|
||||||
|
# Personal Access Token directly to /api/infomaniak/connections/{id}/token.
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=routeApiMsg(
|
||||||
|
"Infomaniak uses a Personal Access Token instead of OAuth. "
|
||||||
|
"Submit the token via POST /api/infomaniak/connections/{connectionId}/token."
|
||||||
|
),
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
detail=f"Unsupported authority: {connection.authority}"
|
detail=f"Unsupported authority: {connection.authority}"
|
||||||
)
|
)
|
||||||
|
|
||||||
return {"authUrl": auth_url}
|
return {"authUrl": auth_url}
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
|
|
@ -568,8 +593,25 @@ def disconnect_service(
|
||||||
detail=routeApiMsg("Connection not found")
|
detail=routeApiMsg("Connection not found")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Update connection status
|
# Fire revoked event BEFORE DB status change so knowledge purge and
|
||||||
connection.status = ConnectionStatus.INACTIVE
|
# status mutation form one logical step; subscribers see the
|
||||||
|
# connection as it was. INACTIVE does not exist on the enum — REVOKED
|
||||||
|
# is the correct terminal-but-retained state (deleted rows are
|
||||||
|
# handled in DELETE /{id}).
|
||||||
|
try:
|
||||||
|
from modules.shared.callbackRegistry import callbackRegistry
|
||||||
|
|
||||||
|
callbackRegistry.trigger(
|
||||||
|
"connection.revoked",
|
||||||
|
connectionId=connectionId,
|
||||||
|
authority=str(getattr(connection.authority, "value", connection.authority) or ""),
|
||||||
|
userId=str(currentUser.id),
|
||||||
|
reason="disconnected",
|
||||||
|
)
|
||||||
|
except Exception as _cbErr:
|
||||||
|
logger.warning("connection.revoked callback failed for %s: %s", connectionId, _cbErr)
|
||||||
|
|
||||||
|
connection.status = ConnectionStatus.REVOKED
|
||||||
connection.lastChecked = getUtcTimestamp()
|
connection.lastChecked = getUtcTimestamp()
|
||||||
|
|
||||||
# Update connection record - models now handle timestamp serialization automatically
|
# Update connection record - models now handle timestamp serialization automatically
|
||||||
|
|
@ -618,6 +660,23 @@ def delete_connection(
|
||||||
detail=routeApiMsg("Connection not found")
|
detail=routeApiMsg("Connection not found")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Fire revoked event BEFORE the row disappears so consumers still
|
||||||
|
# have authority/connection context for observability; purge itself
|
||||||
|
# targets FileContentIndex rows by connectionId which are unaffected
|
||||||
|
# by the UserConnection delete.
|
||||||
|
try:
|
||||||
|
from modules.shared.callbackRegistry import callbackRegistry
|
||||||
|
|
||||||
|
callbackRegistry.trigger(
|
||||||
|
"connection.revoked",
|
||||||
|
connectionId=connectionId,
|
||||||
|
authority=str(getattr(connection.authority, "value", connection.authority) or ""),
|
||||||
|
userId=str(currentUser.id),
|
||||||
|
reason="deleted",
|
||||||
|
)
|
||||||
|
except Exception as _cbErr:
|
||||||
|
logger.warning("connection.revoked callback failed for %s: %s", connectionId, _cbErr)
|
||||||
|
|
||||||
# Remove the connection - only need connectionId since permissions are verified
|
# Remove the connection - only need connectionId since permissions are verified
|
||||||
interface.removeUserConnection(connectionId)
|
interface.removeUserConnection(connectionId)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -77,7 +77,7 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
|
||||||
"""Background task: pre-scan + extraction + knowledge indexing.
|
"""Background task: pre-scan + extraction + knowledge indexing.
|
||||||
Step 1: Structure Pre-Scan (AI-free) -> FileContentIndex (persisted)
|
Step 1: Structure Pre-Scan (AI-free) -> FileContentIndex (persisted)
|
||||||
Step 2: Content extraction via runExtraction -> ContentParts
|
Step 2: Content extraction via runExtraction -> ContentParts
|
||||||
Step 3: KnowledgeService.indexFile -> chunking + embedding -> Knowledge Store"""
|
Step 3: KnowledgeService.requestIngestion -> idempotent chunking + embedding -> Knowledge Store"""
|
||||||
userId = user.id if hasattr(user, "id") else str(user)
|
userId = user.id if hasattr(user, "id") else str(user)
|
||||||
try:
|
try:
|
||||||
mgmtInterface = interfaceDbManagement.getInterface(user)
|
mgmtInterface = interfaceDbManagement.getInterface(user)
|
||||||
|
|
@ -122,9 +122,30 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
|
||||||
f"{contentIndex.totalObjects} objects"
|
f"{contentIndex.totalObjects} objects"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Persist FileContentIndex immediately
|
# Persist FileContentIndex immediately.
|
||||||
|
# IMPORTANT: preserve `_ingestion` metadata and `status="indexed"` from any
|
||||||
|
# prior successful run — otherwise this upsert wipes the idempotency cache
|
||||||
|
# and requestIngestion cannot detect duplicates (AC4 breaks).
|
||||||
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||||
knowledgeDb = getKnowledgeInterface()
|
knowledgeDb = getKnowledgeInterface()
|
||||||
|
try:
|
||||||
|
_existing = knowledgeDb.getFileContentIndex(fileId)
|
||||||
|
except Exception:
|
||||||
|
_existing = None
|
||||||
|
if _existing:
|
||||||
|
_existingStruct = (
|
||||||
|
_existing.get("structure") if isinstance(_existing, dict)
|
||||||
|
else getattr(_existing, "structure", {})
|
||||||
|
) or {}
|
||||||
|
_existingStatus = (
|
||||||
|
_existing.get("status") if isinstance(_existing, dict)
|
||||||
|
else getattr(_existing, "status", "")
|
||||||
|
) or ""
|
||||||
|
if "_ingestion" in _existingStruct:
|
||||||
|
contentIndex.structure = dict(contentIndex.structure or {})
|
||||||
|
contentIndex.structure["_ingestion"] = _existingStruct["_ingestion"]
|
||||||
|
if _existingStatus == "indexed":
|
||||||
|
contentIndex.status = "indexed"
|
||||||
knowledgeDb.upsertFileContentIndex(contentIndex)
|
knowledgeDb.upsertFileContentIndex(contentIndex)
|
||||||
|
|
||||||
# Step 2: Content extraction (AI-free, produces ContentParts)
|
# Step 2: Content extraction (AI-free, produces ContentParts)
|
||||||
|
|
@ -134,7 +155,10 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
|
||||||
|
|
||||||
extractorRegistry = ExtractorRegistry()
|
extractorRegistry = ExtractorRegistry()
|
||||||
chunkerRegistry = ChunkerRegistry()
|
chunkerRegistry = ChunkerRegistry()
|
||||||
options = ExtractionOptions()
|
# mergeStrategy=None: keep per-page / per-section granularity for RAG ingestion.
|
||||||
|
# The default MergeStrategy concatenates all text parts into a single blob, which
|
||||||
|
# collapses a 500-page PDF into one ContentChunk and destroys semantic retrieval.
|
||||||
|
options = ExtractionOptions(mergeStrategy=None)
|
||||||
|
|
||||||
extracted = runExtraction(
|
extracted = runExtraction(
|
||||||
extractorRegistry, chunkerRegistry,
|
extractorRegistry, chunkerRegistry,
|
||||||
|
|
@ -181,15 +205,21 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
|
||||||
)
|
)
|
||||||
knowledgeService = getService("knowledge", ctx)
|
knowledgeService = getService("knowledge", ctx)
|
||||||
|
|
||||||
await knowledgeService.indexFile(
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
fileId=fileId,
|
|
||||||
fileName=fileName,
|
await knowledgeService.requestIngestion(
|
||||||
mimeType=mimeType,
|
IngestionJob(
|
||||||
userId=userId,
|
sourceKind="file",
|
||||||
featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
|
sourceId=fileId,
|
||||||
mandateId=str(mandate_id) if mandate_id else "",
|
fileName=fileName,
|
||||||
contentObjects=contentObjects,
|
mimeType=mimeType,
|
||||||
structure=contentIndex.structure,
|
userId=userId,
|
||||||
|
featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
|
||||||
|
mandateId=str(mandate_id) if mandate_id else "",
|
||||||
|
contentObjects=contentObjects,
|
||||||
|
structure=contentIndex.structure,
|
||||||
|
provenance={"lane": "upload", "route": "routeDataFiles._autoIndexFile"},
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Re-acquire interface after await to avoid stale user context from the singleton
|
# Re-acquire interface after await to avoid stale user context from the singleton
|
||||||
|
|
|
||||||
|
|
@ -241,6 +241,29 @@ async def auth_connect_callback(
|
||||||
)
|
)
|
||||||
interface.saveConnectionToken(token)
|
interface.saveConnectionToken(token)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from modules.shared.callbackRegistry import callbackRegistry
|
||||||
|
|
||||||
|
if connection.knowledgeIngestionEnabled:
|
||||||
|
callbackRegistry.trigger(
|
||||||
|
"connection.established",
|
||||||
|
connectionId=connection.id,
|
||||||
|
authority=str(getattr(connection.authority, "value", connection.authority) or "clickup"),
|
||||||
|
userId=str(user.id),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.skipped — knowledge ingestion disabled by user",
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.skipped",
|
||||||
|
"connectionId": connection.id,
|
||||||
|
"authority": "clickup",
|
||||||
|
"reason": "consent_disabled",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception as _cbErr:
|
||||||
|
logger.warning("connection.established callback failed for %s: %s", connection.id, _cbErr)
|
||||||
|
|
||||||
return HTMLResponse(
|
return HTMLResponse(
|
||||||
content=f"""
|
content=f"""
|
||||||
<html>
|
<html>
|
||||||
|
|
|
||||||
|
|
@ -281,9 +281,17 @@ async def auth_login_callback(
|
||||||
def auth_connect(
|
def auth_connect(
|
||||||
request: Request,
|
request: Request,
|
||||||
connectionId: str = Query(..., description="UserConnection id"),
|
connectionId: str = Query(..., description="UserConnection id"),
|
||||||
|
reauth: Optional[int] = Query(0, description="If 1, force the consent screen so newly added scopes are granted"),
|
||||||
currentUser: User = Depends(getCurrentUser),
|
currentUser: User = Depends(getCurrentUser),
|
||||||
) -> RedirectResponse:
|
) -> RedirectResponse:
|
||||||
"""Start Google Data OAuth for an existing connection (requires gateway session)."""
|
"""Start Google Data OAuth for an existing connection (requires gateway session).
|
||||||
|
|
||||||
|
Google already defaults to ``prompt=consent`` here, but ``include_granted_scopes=true``
|
||||||
|
can cause newly added scopes (e.g. calendar.readonly, contacts.readonly) to be
|
||||||
|
silently dropped on subsequent re-authorisations. With ``reauth=1`` we drop
|
||||||
|
``include_granted_scopes`` so Google re-issues a token strictly for the
|
||||||
|
current scope list.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
_require_google_data_config()
|
_require_google_data_config()
|
||||||
interface = getInterface(currentUser)
|
interface = getInterface(currentUser)
|
||||||
|
|
@ -310,9 +318,10 @@ def auth_connect(
|
||||||
)
|
)
|
||||||
extra_params: Dict[str, Any] = {
|
extra_params: Dict[str, Any] = {
|
||||||
"access_type": "offline",
|
"access_type": "offline",
|
||||||
"include_granted_scopes": "true",
|
|
||||||
"state": state_jwt,
|
"state": state_jwt,
|
||||||
}
|
}
|
||||||
|
if not reauth:
|
||||||
|
extra_params["include_granted_scopes"] = "true"
|
||||||
login_hint = connection.externalEmail or connection.externalUsername
|
login_hint = connection.externalEmail or connection.externalUsername
|
||||||
if login_hint:
|
if login_hint:
|
||||||
extra_params["login_hint"] = login_hint
|
extra_params["login_hint"] = login_hint
|
||||||
|
|
@ -470,6 +479,29 @@ async def auth_connect_callback(
|
||||||
)
|
)
|
||||||
interface.saveConnectionToken(token)
|
interface.saveConnectionToken(token)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from modules.shared.callbackRegistry import callbackRegistry
|
||||||
|
|
||||||
|
if connection.knowledgeIngestionEnabled:
|
||||||
|
callbackRegistry.trigger(
|
||||||
|
"connection.established",
|
||||||
|
connectionId=connection.id,
|
||||||
|
authority=str(getattr(connection.authority, "value", connection.authority) or "google"),
|
||||||
|
userId=str(user.id),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.skipped — knowledge ingestion disabled by user",
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.skipped",
|
||||||
|
"connectionId": connection.id,
|
||||||
|
"authority": "google",
|
||||||
|
"reason": "consent_disabled",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception as _cbErr:
|
||||||
|
logger.warning("connection.established callback failed for %s: %s", connection.id, _cbErr)
|
||||||
|
|
||||||
return HTMLResponse(
|
return HTMLResponse(
|
||||||
content=f"""
|
content=f"""
|
||||||
<html>
|
<html>
|
||||||
|
|
|
||||||
|
|
@ -1,69 +1,66 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
"""Infomaniak OAuth for data connections (UserConnection + Token).
|
"""Infomaniak Personal-Access-Token onboarding for data connections.
|
||||||
|
|
||||||
Pure DATA_CONNECTION flow -- Infomaniak is NOT a login authority for PowerOn.
|
Infomaniak does NOT support OAuth scopes for kDrive/kSuite data access.
|
||||||
|
The user must create a Personal Access Token (PAT) at
|
||||||
|
https://manager.infomaniak.com/v3/ng/accounts/token/list with the API
|
||||||
|
scopes:
|
||||||
|
|
||||||
|
- ``drive`` -> kDrive (active adapter)
|
||||||
|
- ``workspace:calendar`` -> Calendar (active adapter)
|
||||||
|
- ``workspace:contact`` -> Contacts (active adapter)
|
||||||
|
- ``workspace:mail`` -> Mail (adapter pending; scope reserved)
|
||||||
|
|
||||||
|
Validation strategy
|
||||||
|
-------------------
|
||||||
|
The submit endpoint validates the PAT in two deterministic steps,
|
||||||
|
each addressing one scope:
|
||||||
|
|
||||||
|
1. ``listAccessibleDrives(pat)`` -> ``GET /2/drive/init?with=drives``
|
||||||
|
proves the ``drive`` scope is on the PAT and -- as a side effect --
|
||||||
|
confirms the user has at least one accessible kDrive. This is the
|
||||||
|
*only* listing endpoint that returns drives where the user has
|
||||||
|
``role: 'user'`` (the documented ``/2/drive?account_id=...`` listing
|
||||||
|
is filtered to admin-only drives and would silently return ``[]``
|
||||||
|
for a standard kSuite member).
|
||||||
|
|
||||||
|
2. ``resolveOwnerIdentity(pat)`` -> PIM Calendar (preferred) or PIM
|
||||||
|
Contacts (fallback) yields the user's display name + their kSuite
|
||||||
|
account_id, used purely for connection labelling. This also proves
|
||||||
|
that at least one of ``workspace:calendar`` / ``workspace:contact``
|
||||||
|
is on the PAT (the connection would otherwise be blank in the UI).
|
||||||
|
|
||||||
|
Mail has no separate probe: its scope is recorded in ``grantedScopes``
|
||||||
|
so a future adapter can pick it up without re-issuing the token.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException, Request, status, Depends, Query
|
from fastapi import APIRouter, HTTPException, Request, status, Depends, Path, Body
|
||||||
from fastapi.responses import HTMLResponse, RedirectResponse
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
|
||||||
import time
|
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from urllib.parse import urlencode
|
import hashlib
|
||||||
import httpx
|
|
||||||
from jose import jwt as jose_jwt
|
|
||||||
from jose import JWTError
|
|
||||||
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.interfaces.interfaceDbApp import getInterface
|
||||||
from modules.interfaces.interfaceDbApp import getInterface, getRootInterface
|
|
||||||
from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection
|
from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection
|
||||||
from modules.datamodels.datamodelSecurity import Token, TokenPurpose
|
from modules.datamodels.datamodelSecurity import Token, TokenPurpose
|
||||||
from modules.auth import getCurrentUser, limiter, SECRET_KEY, ALGORITHM
|
from modules.auth import getCurrentUser, limiter
|
||||||
from modules.auth.oauthProviderConfig import infomaniakDataScopes
|
from modules.shared.timeUtils import getUtcTimestamp, createExpirationTimestamp
|
||||||
from modules.shared.timeUtils import createExpirationTimestamp, getUtcTimestamp, parseTimestamp
|
|
||||||
from modules.shared.i18nRegistry import apiRouteContext
|
from modules.shared.i18nRegistry import apiRouteContext
|
||||||
|
from modules.connectors.providerInfomaniak.connectorInfomaniak import (
|
||||||
|
resolveOwnerIdentity,
|
||||||
|
listAccessibleDrives,
|
||||||
|
InfomaniakIdentityError,
|
||||||
|
)
|
||||||
|
|
||||||
routeApiMsg = apiRouteContext("routeSecurityInfomaniak")
|
routeApiMsg = apiRouteContext("routeSecurityInfomaniak")
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_FLOW_CONNECT = "infomaniak_connect"
|
# Infomaniak PATs do not expire unless the user sets an explicit lifetime in
|
||||||
|
# the Manager (up to 30 years). We persist a 10-year horizon so the central
|
||||||
INFOMANIAK_AUTHORIZE_URL = "https://login.infomaniak.com/authorize"
|
# tokenStatus helper does not flag the connection as "no token". Mirrors
|
||||||
INFOMANIAK_TOKEN_URL = "https://login.infomaniak.com/token"
|
# ClickUp.
|
||||||
INFOMANIAK_API_BASE = "https://api.infomaniak.com"
|
_INFOMANIAK_TOKEN_EXPIRES_IN_SEC = 10 * 365 * 24 * 3600
|
||||||
|
|
||||||
CLIENT_ID = APP_CONFIG.get("Service_INFOMANIAK_DATA_CLIENT_ID")
|
|
||||||
CLIENT_SECRET = APP_CONFIG.get("Service_INFOMANIAK_DATA_CLIENT_SECRET")
|
|
||||||
REDIRECT_URI = APP_CONFIG.get("Service_INFOMANIAK_OAUTH_REDIRECT_URI")
|
|
||||||
|
|
||||||
|
|
||||||
def _issue_oauth_state(claims: Dict[str, Any]) -> str:
|
|
||||||
body = {**claims, "exp": int(time.time()) + 600}
|
|
||||||
return jose_jwt.encode(body, SECRET_KEY, algorithm=ALGORITHM)
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_oauth_state(state: str) -> Dict[str, Any]:
|
|
||||||
try:
|
|
||||||
return jose_jwt.decode(state, SECRET_KEY, algorithms=[ALGORITHM])
|
|
||||||
except JWTError as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid OAuth state: {e}"
|
|
||||||
) from e
|
|
||||||
|
|
||||||
|
|
||||||
def _require_infomaniak_config():
|
|
||||||
if not CLIENT_ID or not CLIENT_SECRET or not REDIRECT_URI:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
||||||
detail=routeApiMsg(
|
|
||||||
"Infomaniak OAuth is not configured "
|
|
||||||
"(Service_INFOMANIAK_DATA_CLIENT_ID, Service_INFOMANIAK_DATA_CLIENT_SECRET, "
|
|
||||||
"Service_INFOMANIAK_OAUTH_REDIRECT_URI)"
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
router = APIRouter(
|
router = APIRouter(
|
||||||
|
|
@ -78,251 +75,143 @@ router = APIRouter(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/auth/connect")
|
@router.post("/connections/{connectionId}/token")
|
||||||
@limiter.limit("5/minute")
|
@limiter.limit("10/minute")
|
||||||
def auth_connect(
|
async def submit_infomaniak_token(
|
||||||
request: Request,
|
request: Request,
|
||||||
connectionId: str = Query(..., description="UserConnection id"),
|
connectionId: str = Path(..., description="UserConnection id"),
|
||||||
|
body: Dict[str, Any] = Body(..., description="{ 'token': '<PAT>' }"),
|
||||||
currentUser: User = Depends(getCurrentUser),
|
currentUser: User = Depends(getCurrentUser),
|
||||||
) -> RedirectResponse:
|
) -> Dict[str, Any]:
|
||||||
"""Start Infomaniak OAuth for an existing connection (requires gateway session)."""
|
"""Validate and persist an Infomaniak Personal Access Token (PAT).
|
||||||
try:
|
|
||||||
_require_infomaniak_config()
|
|
||||||
interface = getInterface(currentUser)
|
|
||||||
connections = interface.getUserConnections(currentUser.id)
|
|
||||||
connection = None
|
|
||||||
for conn in connections:
|
|
||||||
if conn.id == connectionId and conn.authority == AuthAuthority.INFOMANIAK:
|
|
||||||
connection = conn
|
|
||||||
break
|
|
||||||
if not connection:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_404_NOT_FOUND,
|
|
||||||
detail=routeApiMsg("Infomaniak connection not found"),
|
|
||||||
)
|
|
||||||
|
|
||||||
state_jwt = _issue_oauth_state(
|
Body:
|
||||||
{
|
{ "token": "<personal-access-token from Infomaniak Manager>" }
|
||||||
"flow": _FLOW_CONNECT,
|
|
||||||
"connectionId": connectionId,
|
Validation order (both must succeed before persisting):
|
||||||
"userId": str(currentUser.id),
|
1. ``listAccessibleDrives(pat)`` -> proves the ``drive`` scope
|
||||||
}
|
is on the PAT and confirms the user can see at least one
|
||||||
)
|
kDrive (uses ``/2/drive/init?with=drives`` so users with
|
||||||
query = urlencode(
|
``role: 'user'`` are also covered).
|
||||||
{
|
2. ``resolveOwnerIdentity(pat)`` -> display name + kSuite
|
||||||
"client_id": CLIENT_ID,
|
account_id for the connection UI label (proves at least one
|
||||||
"response_type": "code",
|
of ``workspace:calendar`` / ``workspace:contact`` is present).
|
||||||
"access_type": "offline",
|
|
||||||
"redirect_uri": REDIRECT_URI,
|
No PAT-derived data is stored as adapter state -- both the drive
|
||||||
"scope": " ".join(infomaniakDataScopes),
|
list and the owner identity are re-resolved lazily by the adapters
|
||||||
"state": state_jwt,
|
at request time.
|
||||||
}
|
"""
|
||||||
)
|
pat = (body or {}).get("token")
|
||||||
auth_url = f"{INFOMANIAK_AUTHORIZE_URL}?{query}"
|
if not isinstance(pat, str) or not pat.strip():
|
||||||
return RedirectResponse(auth_url)
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error initiating Infomaniak connect: {str(e)}")
|
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
detail=f"Failed to initiate Infomaniak connect: {str(e)}",
|
detail=routeApiMsg("Missing 'token' in request body"),
|
||||||
)
|
)
|
||||||
|
pat = pat.strip()
|
||||||
|
|
||||||
|
interface = getInterface(currentUser)
|
||||||
@router.get("/auth/connect/callback")
|
|
||||||
async def auth_connect_callback(
|
|
||||||
code: str = Query(...),
|
|
||||||
state: str = Query(...),
|
|
||||||
) -> HTMLResponse:
|
|
||||||
"""OAuth callback for Infomaniak data connection."""
|
|
||||||
state_data = _parse_oauth_state(state)
|
|
||||||
if state_data.get("flow") != _FLOW_CONNECT:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400, detail=routeApiMsg("Invalid OAuth flow for this callback")
|
|
||||||
)
|
|
||||||
connection_id = state_data.get("connectionId")
|
|
||||||
user_id = state_data.get("userId")
|
|
||||||
if not connection_id or not user_id:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400, detail=routeApiMsg("Missing connection or user in OAuth state")
|
|
||||||
)
|
|
||||||
|
|
||||||
_require_infomaniak_config()
|
|
||||||
|
|
||||||
async with httpx.AsyncClient() as client:
|
|
||||||
token_resp = await client.post(
|
|
||||||
INFOMANIAK_TOKEN_URL,
|
|
||||||
data={
|
|
||||||
"grant_type": "authorization_code",
|
|
||||||
"client_id": CLIENT_ID,
|
|
||||||
"client_secret": CLIENT_SECRET,
|
|
||||||
"code": code,
|
|
||||||
"redirect_uri": REDIRECT_URI,
|
|
||||||
},
|
|
||||||
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
|
||||||
timeout=30.0,
|
|
||||||
)
|
|
||||||
if token_resp.status_code != 200:
|
|
||||||
logger.error(
|
|
||||||
f"Infomaniak token exchange failed: {token_resp.status_code} {token_resp.text}"
|
|
||||||
)
|
|
||||||
return HTMLResponse(
|
|
||||||
content=f"<html><body><h1>Connection Failed</h1><p>{token_resp.text}</p></body></html>",
|
|
||||||
status_code=400,
|
|
||||||
)
|
|
||||||
token_json = token_resp.json()
|
|
||||||
access_token = token_json.get("access_token")
|
|
||||||
refresh_token = token_json.get("refresh_token", "")
|
|
||||||
expires_in = int(token_json.get("expires_in", 0))
|
|
||||||
granted_scopes = token_json.get("scope", "")
|
|
||||||
|
|
||||||
if not access_token:
|
|
||||||
return HTMLResponse(
|
|
||||||
content="<html><body><h1>Connection Failed</h1><p>No access token.</p></body></html>",
|
|
||||||
status_code=400,
|
|
||||||
)
|
|
||||||
|
|
||||||
rootInterface = getRootInterface()
|
|
||||||
if not refresh_token:
|
|
||||||
try:
|
|
||||||
existing_tokens = rootInterface.getTokensByConnectionIdAndAuthority(
|
|
||||||
connection_id, AuthAuthority.INFOMANIAK
|
|
||||||
)
|
|
||||||
if existing_tokens:
|
|
||||||
existing_tokens.sort(
|
|
||||||
key=lambda x: parseTimestamp(x.createdAt, default=0), reverse=True
|
|
||||||
)
|
|
||||||
refresh_token = existing_tokens[0].tokenRefresh or ""
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async with httpx.AsyncClient() as client:
|
|
||||||
profile_resp = await client.get(
|
|
||||||
f"{INFOMANIAK_API_BASE}/1/profile",
|
|
||||||
headers={
|
|
||||||
"Authorization": f"Bearer {access_token}",
|
|
||||||
"Accept": "application/json",
|
|
||||||
},
|
|
||||||
timeout=30.0,
|
|
||||||
)
|
|
||||||
if profile_resp.status_code != 200:
|
|
||||||
logger.error(
|
|
||||||
f"Infomaniak profile lookup failed: {profile_resp.status_code} {profile_resp.text}"
|
|
||||||
)
|
|
||||||
return HTMLResponse(
|
|
||||||
content="<html><body><h1>Connection Failed</h1><p>Could not load Infomaniak profile.</p></body></html>",
|
|
||||||
status_code=400,
|
|
||||||
)
|
|
||||||
profile_payload = profile_resp.json()
|
|
||||||
profile = profile_payload.get("data") if isinstance(profile_payload, dict) else None
|
|
||||||
profile = profile or {}
|
|
||||||
|
|
||||||
user = rootInterface.getUser(user_id)
|
|
||||||
if not user:
|
|
||||||
return HTMLResponse(
|
|
||||||
content="""
|
|
||||||
<html><body><script>
|
|
||||||
if (window.opener) {
|
|
||||||
window.opener.postMessage({ type: 'infomaniak_connection_error', error: 'User not found' }, '*');
|
|
||||||
setTimeout(() => window.close(), 1000);
|
|
||||||
} else window.close();
|
|
||||||
</script></body></html>
|
|
||||||
""",
|
|
||||||
status_code=404,
|
|
||||||
)
|
|
||||||
|
|
||||||
interface = getInterface(user)
|
|
||||||
connections = interface.getUserConnections(user_id)
|
|
||||||
connection = None
|
connection = None
|
||||||
for conn in connections:
|
for conn in interface.getUserConnections(currentUser.id):
|
||||||
if conn.id == connection_id:
|
if conn.id == connectionId and conn.authority == AuthAuthority.INFOMANIAK:
|
||||||
connection = conn
|
connection = conn
|
||||||
break
|
break
|
||||||
if not connection:
|
if not connection:
|
||||||
return HTMLResponse(
|
raise HTTPException(
|
||||||
content="""
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
<html><body><script>
|
detail=routeApiMsg("Infomaniak connection not found"),
|
||||||
if (window.opener) {
|
|
||||||
window.opener.postMessage({ type: 'infomaniak_connection_error', error: 'Connection not found' }, '*');
|
|
||||||
setTimeout(() => window.close(), 1000);
|
|
||||||
} else window.close();
|
|
||||||
</script></body></html>
|
|
||||||
""",
|
|
||||||
status_code=404,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
ext_id = str(profile.get("id", "")) if profile.get("id") is not None else ""
|
try:
|
||||||
username = profile.get("login") or profile.get("email") or ext_id
|
drives = await listAccessibleDrives(pat)
|
||||||
email = profile.get("email")
|
except InfomaniakIdentityError as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Infomaniak token submit for connection {connectionId} could not "
|
||||||
|
f"list drives: {e}"
|
||||||
|
)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=routeApiMsg(
|
||||||
|
"Token rejected by Infomaniak (missing scope 'drive'). "
|
||||||
|
"Required scopes: 'drive' (kDrive) and "
|
||||||
|
"'workspace:calendar' (or 'workspace:contact'). Mail "
|
||||||
|
"scope 'workspace:mail' is reserved."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
expires_at = createExpirationTimestamp(expires_in)
|
try:
|
||||||
granted_scopes_list = (
|
identity = await resolveOwnerIdentity(pat)
|
||||||
granted_scopes
|
except InfomaniakIdentityError as e:
|
||||||
if isinstance(granted_scopes, list)
|
logger.warning(
|
||||||
else (granted_scopes.split(" ") if granted_scopes else infomaniakDataScopes)
|
f"Infomaniak token submit for connection {connectionId} could not "
|
||||||
)
|
f"resolve owner identity: {e}"
|
||||||
|
)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=routeApiMsg(
|
||||||
|
"Could not derive your Infomaniak account from the token. "
|
||||||
|
"Please ensure the PAT carries 'workspace:calendar' or "
|
||||||
|
"'workspace:contact' so we can identify your account."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
tokenFingerprint = "pat-" + hashlib.sha256(pat.encode("utf-8")).hexdigest()[:8]
|
||||||
|
username = identity["displayName"] or f"infomaniak-{tokenFingerprint}"
|
||||||
|
expiresAt = createExpirationTimestamp(_INFOMANIAK_TOKEN_EXPIRES_IN_SEC)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
connection.status = ConnectionStatus.ACTIVE
|
connection.status = ConnectionStatus.ACTIVE
|
||||||
connection.lastChecked = getUtcTimestamp()
|
connection.lastChecked = getUtcTimestamp()
|
||||||
connection.expiresAt = expires_at
|
connection.expiresAt = expiresAt
|
||||||
connection.externalId = ext_id
|
connection.externalId = str(identity["accountId"])
|
||||||
connection.externalUsername = username
|
connection.externalUsername = username
|
||||||
if email:
|
connection.grantedScopes = [
|
||||||
connection.externalEmail = email
|
"drive",
|
||||||
connection.grantedScopes = granted_scopes_list
|
"workspace:mail",
|
||||||
rootInterface.db.recordModify(UserConnection, connection_id, connection.model_dump())
|
"workspace:calendar",
|
||||||
|
"workspace:contact",
|
||||||
|
]
|
||||||
|
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
|
||||||
|
|
||||||
token = Token(
|
token = Token(
|
||||||
userId=user.id,
|
userId=currentUser.id,
|
||||||
authority=AuthAuthority.INFOMANIAK,
|
authority=AuthAuthority.INFOMANIAK,
|
||||||
connectionId=connection_id,
|
connectionId=connectionId,
|
||||||
tokenPurpose=TokenPurpose.DATA_CONNECTION,
|
tokenPurpose=TokenPurpose.DATA_CONNECTION,
|
||||||
tokenAccess=access_token,
|
tokenAccess=pat,
|
||||||
tokenRefresh=refresh_token,
|
tokenRefresh=None,
|
||||||
tokenType=token_json.get("token_type", "bearer"),
|
tokenType="bearer",
|
||||||
expiresAt=expires_at,
|
expiresAt=expiresAt,
|
||||||
createdAt=getUtcTimestamp(),
|
createdAt=getUtcTimestamp(),
|
||||||
)
|
)
|
||||||
interface.saveConnectionToken(token)
|
interface.saveConnectionToken(token)
|
||||||
|
|
||||||
return HTMLResponse(
|
driveSummary = [
|
||||||
content=f"""
|
{"id": d.get("id"), "name": d.get("name"), "role": d.get("role")}
|
||||||
<html>
|
for d in drives
|
||||||
<head><title>Connection Successful</title></head>
|
]
|
||||||
<body>
|
logger.info(
|
||||||
<script>
|
f"Infomaniak PAT stored for connection {connectionId} "
|
||||||
if (window.opener) {{
|
f"(user {currentUser.id}, externalUsername={username}, "
|
||||||
window.opener.postMessage({{
|
f"kSuiteAccountId={identity['accountId']}, "
|
||||||
type: 'infomaniak_connection_success',
|
f"accessibleDrives={driveSummary})"
|
||||||
connection: {{
|
|
||||||
id: '{connection.id}',
|
|
||||||
status: 'connected',
|
|
||||||
type: 'infomaniak',
|
|
||||||
lastChecked: {getUtcTimestamp()},
|
|
||||||
expiresAt: {expires_at}
|
|
||||||
}}
|
|
||||||
}}, '*');
|
|
||||||
setTimeout(() => window.close(), 1000);
|
|
||||||
}} else {{
|
|
||||||
window.close();
|
|
||||||
}}
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
"""
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": connection.id,
|
||||||
|
"status": "connected",
|
||||||
|
"type": "infomaniak",
|
||||||
|
"externalUsername": username,
|
||||||
|
"externalEmail": None,
|
||||||
|
"lastChecked": connection.lastChecked,
|
||||||
|
}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error updating Infomaniak connection: {str(e)}", exc_info=True)
|
logger.error(
|
||||||
return HTMLResponse(
|
f"Error persisting Infomaniak token for connection {connectionId}: {e}",
|
||||||
content=f"""
|
exc_info=True,
|
||||||
<html><body><script>
|
)
|
||||||
if (window.opener) {{
|
raise HTTPException(
|
||||||
window.opener.postMessage({{ type: 'infomaniak_connection_error', error: {json.dumps(str(e))} }}, '*');
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
setTimeout(() => window.close(), 1000);
|
detail=routeApiMsg("Failed to store Infomaniak token"),
|
||||||
}} else window.close();
|
|
||||||
</script></body></html>
|
|
||||||
""",
|
|
||||||
status_code=500,
|
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -244,9 +244,15 @@ async def auth_login_callback(
|
||||||
def auth_connect(
|
def auth_connect(
|
||||||
request: Request,
|
request: Request,
|
||||||
connectionId: str = Query(..., description="UserConnection id"),
|
connectionId: str = Query(..., description="UserConnection id"),
|
||||||
|
reauth: Optional[int] = Query(0, description="If 1, force the consent screen so newly added scopes are granted"),
|
||||||
currentUser: User = Depends(getCurrentUser),
|
currentUser: User = Depends(getCurrentUser),
|
||||||
) -> RedirectResponse:
|
) -> RedirectResponse:
|
||||||
"""Start Microsoft Data OAuth for an existing connection."""
|
"""Start Microsoft Data OAuth for an existing connection.
|
||||||
|
|
||||||
|
With ``reauth=1`` the consent screen is forced (``prompt=consent``) so the
|
||||||
|
user re-grants permissions and any newly added scopes (e.g. Calendars.Read,
|
||||||
|
Contacts.Read) actually land on the access token.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
_require_msft_data_config()
|
_require_msft_data_config()
|
||||||
interface = getInterface(currentUser)
|
interface = getInterface(currentUser)
|
||||||
|
|
@ -280,6 +286,8 @@ def auth_connect(
|
||||||
if "@" in login_hint:
|
if "@" in login_hint:
|
||||||
login_kwargs["domain_hint"] = login_hint.split("@", 1)[1]
|
login_kwargs["domain_hint"] = login_hint.split("@", 1)[1]
|
||||||
login_kwargs["prompt"] = "login"
|
login_kwargs["prompt"] = "login"
|
||||||
|
if reauth:
|
||||||
|
login_kwargs["prompt"] = "consent"
|
||||||
|
|
||||||
auth_url = msal_app.get_authorization_request_url(
|
auth_url = msal_app.get_authorization_request_url(
|
||||||
scopes=msftDataScopes,
|
scopes=msftDataScopes,
|
||||||
|
|
@ -412,6 +420,29 @@ async def auth_connect_callback(
|
||||||
)
|
)
|
||||||
interface.saveConnectionToken(token)
|
interface.saveConnectionToken(token)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from modules.shared.callbackRegistry import callbackRegistry
|
||||||
|
|
||||||
|
if connection.knowledgeIngestionEnabled:
|
||||||
|
callbackRegistry.trigger(
|
||||||
|
"connection.established",
|
||||||
|
connectionId=connection.id,
|
||||||
|
authority=str(getattr(connection.authority, "value", connection.authority) or "msft"),
|
||||||
|
userId=str(user.id),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.skipped — knowledge ingestion disabled by user",
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.skipped",
|
||||||
|
"connectionId": connection.id,
|
||||||
|
"authority": "msft",
|
||||||
|
"reason": "consent_disabled",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception as _cbErr:
|
||||||
|
logger.warning("connection.established callback failed for %s: %s", connection.id, _cbErr)
|
||||||
|
|
||||||
return HTMLResponse(
|
return HTMLResponse(
|
||||||
content=f"""
|
content=f"""
|
||||||
<html>
|
<html>
|
||||||
|
|
|
||||||
|
|
@ -187,7 +187,15 @@ def _catalogTypeToJsonSchema(typeStr: str, _depth: int = 0) -> Dict[str, Any]:
|
||||||
|
|
||||||
|
|
||||||
def _createDispatchHandler(actionExecutor, methodName: str, actionName: str):
|
def _createDispatchHandler(actionExecutor, methodName: str, actionName: str):
|
||||||
"""Create an async handler that dispatches to the ActionExecutor."""
|
"""Create an async handler that dispatches to the ActionExecutor.
|
||||||
|
|
||||||
|
Parameter validation and Ref-payload normalization (collapsing
|
||||||
|
``{id: ..., featureCode: ...}`` from the agent's typed tool schema to the
|
||||||
|
bare UUID expected by action implementations) happen centrally inside
|
||||||
|
``ActionExecutor.executeAction`` via ``parameterValidation``. This keeps
|
||||||
|
a single source of truth for the action parameter contract regardless
|
||||||
|
of caller (agent, workflow graph, REST route).
|
||||||
|
"""
|
||||||
async def _handler(args: Dict[str, Any], context: Dict[str, Any]) -> ToolResult:
|
async def _handler(args: Dict[str, Any], context: Dict[str, Any]) -> ToolResult:
|
||||||
try:
|
try:
|
||||||
if context:
|
if context:
|
||||||
|
|
|
||||||
|
|
@ -392,6 +392,18 @@ def buildSystemPrompt(
|
||||||
"- Prefer modular file structures over monolithic files.\n"
|
"- Prefer modular file structures over monolithic files.\n"
|
||||||
"- When generating applications, create separate files for logical components.\n"
|
"- When generating applications, create separate files for logical components.\n"
|
||||||
"- Always plan the structure before writing code.\n\n"
|
"- Always plan the structure before writing code.\n\n"
|
||||||
|
"### Document references for AI tools (CRITICAL)\n"
|
||||||
|
"Tools that produce a file (`downloadFromDataSource`, `writeFile mode=create`, "
|
||||||
|
"`renderDocument`, `generateImage`, `createChart`) return a result line with TWO ids:\n"
|
||||||
|
"- `documentList ref: docItem:<chatDocId>` — pass this STRING VERBATIM as an entry of "
|
||||||
|
" `documentList` for `ai_process`, `ai_summarizeDocument`, `context_extractContent`, "
|
||||||
|
" `context_neutralizeData`, etc. Always as the literal `docItem:<id>` — do NOT wrap "
|
||||||
|
" in `{\"documents\":[{\"id\":...}]}` and do NOT use the file id here, the documentList "
|
||||||
|
" resolver only matches `docItem:` references.\n"
|
||||||
|
"- `file id: <fileId>` — use for `readFile`, `searchInFileContent`, `writeFile mode=append`, "
|
||||||
|
" and image embeds (``).\n"
|
||||||
|
"Example: after `downloadFromDataSource` returns `docItem:abc123`, call "
|
||||||
|
"`ai_summarizeDocument(documentList=[\"docItem:abc123\"], summaryLength=\"medium\")`.\n\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
if toolsFormatted:
|
if toolsFormatted:
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,9 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
|
||||||
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
|
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
|
||||||
|
|
||||||
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
|
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
|
||||||
|
_attachFileAsChatDocument,
|
||||||
_buildResolverDbFromServices,
|
_buildResolverDbFromServices,
|
||||||
|
_formatToolFileResult,
|
||||||
_getOrCreateTempFolder,
|
_getOrCreateTempFolder,
|
||||||
_looksLikeBinary,
|
_looksLikeBinary,
|
||||||
_resolveFileScope,
|
_resolveFileScope,
|
||||||
|
|
@ -37,6 +39,11 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
return getattr(chatService, "interfaceDbComponent", None)
|
return getattr(chatService, "interfaceDbComponent", None)
|
||||||
|
|
||||||
# ---- DataSource convenience tools ----
|
# ---- DataSource convenience tools ----
|
||||||
|
# Maps the FE-side `sourceType` literal (see SourcesTab.tsx
|
||||||
|
# `_SERVICE_TO_SOURCE_TYPE`) to the Connector's `service` key in
|
||||||
|
# `_SERVICE_MAP`. Keep this table in sync with both the FE and the
|
||||||
|
# Connector `_SERVICE_MAP` entries -- a missing row produces
|
||||||
|
# "Service '<sourceType>' not available" in the agent tools.
|
||||||
_SOURCE_TYPE_TO_SERVICE = {
|
_SOURCE_TYPE_TO_SERVICE = {
|
||||||
"sharepointFolder": "sharepoint",
|
"sharepointFolder": "sharepoint",
|
||||||
"onedriveFolder": "onedrive",
|
"onedriveFolder": "onedrive",
|
||||||
|
|
@ -45,6 +52,9 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
"gmailFolder": "gmail",
|
"gmailFolder": "gmail",
|
||||||
"ftpFolder": "files",
|
"ftpFolder": "files",
|
||||||
"clickupList": "clickup",
|
"clickupList": "clickup",
|
||||||
|
"kdriveFolder": "kdrive",
|
||||||
|
"calendarFolder": "calendar",
|
||||||
|
"contactFolder": "contact",
|
||||||
}
|
}
|
||||||
|
|
||||||
async def _resolveDataSource(dsId: str):
|
async def _resolveDataSource(dsId: str):
|
||||||
|
|
@ -223,11 +233,27 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
tempFolderId = _getOrCreateTempFolder(chatService)
|
tempFolderId = _getOrCreateTempFolder(chatService)
|
||||||
if tempFolderId:
|
if tempFolderId:
|
||||||
chatService.interfaceDbComponent.updateFile(fileItem.id, {"folderId": tempFolderId})
|
chatService.interfaceDbComponent.updateFile(fileItem.id, {"folderId": tempFolderId})
|
||||||
|
|
||||||
|
chatDocId = _attachFileAsChatDocument(
|
||||||
|
services, fileItem,
|
||||||
|
label=f"datasource:{dsId or directService or 'download'}",
|
||||||
|
userMessage=f"Downloaded {fileName} from external data source",
|
||||||
|
)
|
||||||
|
|
||||||
ext = fileName.rsplit(".", 1)[-1].lower() if "." in fileName else ""
|
ext = fileName.rsplit(".", 1)[-1].lower() if "." in fileName else ""
|
||||||
hint = "Use readFile to read the text content." if ext in ("doc", "docx", "txt", "csv", "json", "xml", "html", "md", "rtf", "odt", "xls", "xlsx", "pptx", "pdf", "eml", "msg") else "Use readFile to access the content."
|
hint = (
|
||||||
|
"Use readFile to read the text content."
|
||||||
|
if ext in ("doc", "docx", "txt", "csv", "json", "xml", "html", "md", "rtf", "odt", "xls", "xlsx", "pptx", "pdf", "eml", "msg")
|
||||||
|
else "Use readFile to access the content."
|
||||||
|
)
|
||||||
return ToolResult(
|
return ToolResult(
|
||||||
toolCallId="", toolName="downloadFromDataSource", success=True,
|
toolCallId="", toolName="downloadFromDataSource", success=True,
|
||||||
data=f"Downloaded '{fileName}' ({len(fileBytes)} bytes) → local file id: {fileItem.id}. {hint}"
|
data=_formatToolFileResult(
|
||||||
|
fileItem=fileItem,
|
||||||
|
chatDocId=chatDocId,
|
||||||
|
actionLabel="Downloaded",
|
||||||
|
extraInfo=hint,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return ToolResult(toolCallId="", toolName="downloadFromDataSource", success=False, error=str(e))
|
return ToolResult(toolCallId="", toolName="downloadFromDataSource", success=False, error=str(e))
|
||||||
|
|
@ -300,8 +326,15 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
registry.register(
|
registry.register(
|
||||||
"downloadFromDataSource", _downloadFromDataSource,
|
"downloadFromDataSource", _downloadFromDataSource,
|
||||||
description=(
|
description=(
|
||||||
"Download a file or email from a data source into local storage. Returns a local file ID "
|
"Download a file or email from a data source into local storage. "
|
||||||
"to read with readFile. Accepts either dataSourceId OR connectionId+service. "
|
"The result line contains TWO ids you must use for different purposes:\n"
|
||||||
|
" - `documentList ref: docItem:<chatDocId>` -- pass this string verbatim "
|
||||||
|
" inside the `documentList` parameter of `ai_process`, "
|
||||||
|
" `ai_summarizeDocument`, `context_extractContent`, `context_neutralizeData`, etc. "
|
||||||
|
" Always use the `docItem:<chatDocId>` form, NOT the file id, NOT a `{\"documents\":[{\"id\":...}]}` "
|
||||||
|
" wrapper -- the documentList resolver only matches `docItem:` references against the workflow.\n"
|
||||||
|
" - `file id: <fileId>` -- pass this to `readFile`, `searchInFileContent`, image embeds (`file:<fileId>`).\n"
|
||||||
|
"Accepts either dataSourceId OR connectionId+service. "
|
||||||
"For email sources (Outlook, Gmail), browse/search only return subjects -- use this to get full content."
|
"For email sources (Outlook, Gmail), browse/search only return subjects -- use this to get full content."
|
||||||
),
|
),
|
||||||
parameters={
|
parameters={
|
||||||
|
|
|
||||||
|
|
@ -11,8 +11,6 @@ from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistr
|
||||||
|
|
||||||
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
|
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
|
||||||
_getOrCreateTempFolder,
|
_getOrCreateTempFolder,
|
||||||
_looksLikeBinary,
|
|
||||||
_resolveFileScope,
|
|
||||||
_MAX_TOOL_RESULT_CHARS,
|
_MAX_TOOL_RESULT_CHARS,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -392,65 +390,7 @@ def _registerDocumentTools(registry: ToolRegistry, services):
|
||||||
if chunkMime:
|
if chunkMime:
|
||||||
mimeType = chunkMime
|
mimeType = chunkMime
|
||||||
|
|
||||||
# 2) File not yet indexed -> trigger extraction via ExtractionService, then retry
|
# 2) Direct image file (not a container) - use raw file data
|
||||||
if not imageData and knowledgeService and not knowledgeService.isFileIndexed(fileId):
|
|
||||||
try:
|
|
||||||
chatService = services.chat
|
|
||||||
fileInfo = chatService.getFileInfo(fileId)
|
|
||||||
fileContent = chatService.getFileContent(fileId)
|
|
||||||
if fileContent and fileInfo:
|
|
||||||
rawData = fileContent.get("data", "")
|
|
||||||
if isinstance(rawData, str) and len(rawData) > 100:
|
|
||||||
rawBytes = _b64.b64decode(rawData)
|
|
||||||
elif isinstance(rawData, bytes):
|
|
||||||
rawBytes = rawData
|
|
||||||
else:
|
|
||||||
rawBytes = None
|
|
||||||
|
|
||||||
if rawBytes:
|
|
||||||
from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry
|
|
||||||
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
|
||||||
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
|
||||||
|
|
||||||
fileMime = fileInfo.get("mimeType", "application/octet-stream")
|
|
||||||
fileName = fileInfo.get("fileName", fileId)
|
|
||||||
extracted = runExtraction(
|
|
||||||
ExtractorRegistry(), None,
|
|
||||||
rawBytes, fileName, fileMime, ExtractionOptions(),
|
|
||||||
)
|
|
||||||
|
|
||||||
contentObjects = []
|
|
||||||
for part in extracted.parts:
|
|
||||||
tg = (part.typeGroup or "").lower()
|
|
||||||
ct = "image" if tg == "image" else "text"
|
|
||||||
if not part.data or not part.data.strip():
|
|
||||||
continue
|
|
||||||
contentObjects.append({
|
|
||||||
"contentObjectId": part.id,
|
|
||||||
"contentType": ct,
|
|
||||||
"data": part.data,
|
|
||||||
"contextRef": {"containerPath": fileName, "location": part.label, **(part.metadata or {})},
|
|
||||||
})
|
|
||||||
|
|
||||||
if contentObjects:
|
|
||||||
_diFiId, _diMId = _resolveFileScope(fileId, context)
|
|
||||||
await knowledgeService.indexFile(
|
|
||||||
fileId=fileId, fileName=fileName, mimeType=fileMime,
|
|
||||||
userId=context.get("userId", ""), contentObjects=contentObjects,
|
|
||||||
featureInstanceId=_diFiId,
|
|
||||||
mandateId=_diMId,
|
|
||||||
)
|
|
||||||
|
|
||||||
chunks = knowledgeService._knowledgeDb.getContentChunks(fileId)
|
|
||||||
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
|
|
||||||
if pageIndex is not None:
|
|
||||||
imageChunks = [c for c in imageChunks if c.get("contextRef", {}).get("pageIndex") == pageIndex]
|
|
||||||
if imageChunks:
|
|
||||||
imageData = imageChunks[0].get("data", "")
|
|
||||||
except Exception as extractErr:
|
|
||||||
logger.warning(f"describeImage: on-demand extraction failed: {extractErr}")
|
|
||||||
|
|
||||||
# 3) Direct image file (not a container) - use raw file data
|
|
||||||
if not imageData:
|
if not imageData:
|
||||||
chatService = services.chat
|
chatService = services.chat
|
||||||
fileContent = chatService.getFileContent(fileId)
|
fileContent = chatService.getFileContent(fileId)
|
||||||
|
|
@ -460,7 +400,7 @@ def _registerDocumentTools(registry: ToolRegistry, services):
|
||||||
imageData = fileContent.get("data", "")
|
imageData = fileContent.get("data", "")
|
||||||
mimeType = fileMimeType
|
mimeType = fileMimeType
|
||||||
|
|
||||||
# 4) PDF page rendering: render the requested page as an image via PyMuPDF
|
# 3) PDF page rendering: render the requested page as an image via PyMuPDF
|
||||||
if not imageData:
|
if not imageData:
|
||||||
chatService = services.chat
|
chatService = services.chat
|
||||||
fileInfo = chatService.getFileInfo(fileId) if hasattr(chatService, "getFileInfo") else None
|
fileInfo = chatService.getFileInfo(fileId) if hasattr(chatService, "getFileInfo") else None
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,8 @@
|
||||||
"""Shared helpers for core agent tools (file scope, binary detection, temp folder)."""
|
"""Shared helpers for core agent tools (file scope, binary detection, temp folder)."""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Optional
|
import uuid
|
||||||
|
from typing import Any, Dict, Optional, Tuple
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -78,6 +79,138 @@ def _getOrCreateTempFolder(chatService) -> Optional[str]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _attachFileAsChatDocument(
|
||||||
|
services: Any,
|
||||||
|
fileItem: Any,
|
||||||
|
*,
|
||||||
|
label: str = "agent_tool_output",
|
||||||
|
userMessage: str = "",
|
||||||
|
role: str = "assistant",
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""Bind a persisted FileItem to the active workflow as a ChatDocument.
|
||||||
|
|
||||||
|
This is the **single canonical bridge** between agent-tool-produced
|
||||||
|
artefacts and the workflow's document model. Mirrors the pattern
|
||||||
|
used by workflow actions (``workflowProcessor.persistTaskResult`` /
|
||||||
|
``methodTrustee.extractFromFiles``): every artefact a workflow step
|
||||||
|
-- including agent tools -- materialises ends up addressable via
|
||||||
|
``docItem:<chatDocId>`` so downstream tools that consume
|
||||||
|
``documentList`` can resolve it against
|
||||||
|
``workflow.messages[*].documents[*].id``.
|
||||||
|
|
||||||
|
Without this bind the agent's ``downloadFromDataSource`` /
|
||||||
|
``writeFile(create)`` / ``renderDocument`` / ``generateImage`` /
|
||||||
|
``createChart`` outputs are FileItem-only and unreachable from
|
||||||
|
``getChatDocumentsFromDocumentList`` -- the symptom is
|
||||||
|
``ai_summarizeDocument`` etc. running with 0 ContentParts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
services: agent-tool services container (must expose ``.chat``).
|
||||||
|
fileItem: persisted FileItem (Pydantic obj or dict) returned
|
||||||
|
from ``saveUploadedFile`` / ``createFile`` /
|
||||||
|
``saveGeneratedFile``.
|
||||||
|
label: ``documentsLabel`` for the carrier ChatMessage --
|
||||||
|
picked up by ``docList:<label>`` references.
|
||||||
|
userMessage: optional human-readable message text.
|
||||||
|
role: ``"assistant"`` (default) or ``"tool"``; affects only
|
||||||
|
display semantics, not resolution.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The new ``ChatDocument.id`` on success, or ``None`` when no
|
||||||
|
active workflow is bound to the chat service (e.g. standalone
|
||||||
|
agent calls outside a chat workflow). Never raises.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
chatService = services.chat
|
||||||
|
workflow = getattr(chatService, "_workflow", None)
|
||||||
|
if not workflow or not getattr(workflow, "id", None):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if isinstance(fileItem, dict):
|
||||||
|
fileId = fileItem.get("id")
|
||||||
|
fileName = fileItem.get("fileName")
|
||||||
|
fileSize = fileItem.get("fileSize") or 0
|
||||||
|
mimeType = fileItem.get("mimeType") or "application/octet-stream"
|
||||||
|
else:
|
||||||
|
fileId = getattr(fileItem, "id", None)
|
||||||
|
fileName = getattr(fileItem, "fileName", None)
|
||||||
|
fileSize = getattr(fileItem, "fileSize", None) or 0
|
||||||
|
mimeType = getattr(fileItem, "mimeType", None) or "application/octet-stream"
|
||||||
|
|
||||||
|
if not fileId:
|
||||||
|
logger.warning("_attachFileAsChatDocument: fileItem has no id, skipping bind.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
chatDoc: Dict[str, Any] = {
|
||||||
|
"id": str(uuid.uuid4()),
|
||||||
|
"fileId": fileId,
|
||||||
|
"fileName": fileName or fileId,
|
||||||
|
"fileSize": fileSize,
|
||||||
|
"mimeType": mimeType,
|
||||||
|
"roundNumber": getattr(workflow, "currentRound", None),
|
||||||
|
"taskNumber": getattr(workflow, "currentTask", None),
|
||||||
|
"actionNumber": getattr(workflow, "currentAction", None),
|
||||||
|
}
|
||||||
|
messageData: Dict[str, Any] = {
|
||||||
|
"id": f"msg_tool_{uuid.uuid4().hex[:12]}",
|
||||||
|
"role": role,
|
||||||
|
"status": "step",
|
||||||
|
"message": userMessage or f"Tool result: {fileName or fileId}",
|
||||||
|
"documentsLabel": label,
|
||||||
|
}
|
||||||
|
|
||||||
|
createdMessage = chatService.storeMessageWithDocuments(
|
||||||
|
workflow, messageData, [chatDoc],
|
||||||
|
)
|
||||||
|
if not createdMessage or not getattr(createdMessage, "documents", None):
|
||||||
|
return None
|
||||||
|
return createdMessage.documents[0].id
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"_attachFileAsChatDocument failed (fileItem id={getattr(fileItem, 'id', None) or (fileItem.get('id') if isinstance(fileItem, dict) else '?')}): {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _formatToolFileResult(
|
||||||
|
*,
|
||||||
|
fileItem: Any,
|
||||||
|
chatDocId: Optional[str],
|
||||||
|
actionLabel: str = "Created",
|
||||||
|
extraInfo: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""Render the canonical agent-tool file result message.
|
||||||
|
|
||||||
|
Always presents BOTH ids the agent needs:
|
||||||
|
* ``docItem:<chatDocId>`` -- use as ``documentList`` entry for
|
||||||
|
tools like ``ai_process`` / ``ai_summarizeDocument`` /
|
||||||
|
``context_extractContent`` (resolved through ChatDocument).
|
||||||
|
* ``file id: <fileItem.id>`` -- use as ``fileId`` for direct
|
||||||
|
reads via ``readFile`` / ``downloadFile`` / image embedding
|
||||||
|
(``file:<fileItem.id>``).
|
||||||
|
|
||||||
|
When no active workflow is bound, ``chatDocId`` is ``None`` and
|
||||||
|
only the file-id line is shown -- the file is still usable for
|
||||||
|
direct reads, just not for ``documentList`` references (those
|
||||||
|
require a workflow context anyway).
|
||||||
|
"""
|
||||||
|
if isinstance(fileItem, dict):
|
||||||
|
fileId = fileItem.get("id", "?")
|
||||||
|
fileName = fileItem.get("fileName", "")
|
||||||
|
fileSize = fileItem.get("fileSize", 0)
|
||||||
|
else:
|
||||||
|
fileId = getattr(fileItem, "id", "?")
|
||||||
|
fileName = getattr(fileItem, "fileName", "")
|
||||||
|
fileSize = getattr(fileItem, "fileSize", 0)
|
||||||
|
|
||||||
|
head = f"{actionLabel} '{fileName}' ({fileSize} bytes)" if fileName else f"{actionLabel} file ({fileSize} bytes)"
|
||||||
|
parts = [head]
|
||||||
|
if chatDocId:
|
||||||
|
parts.append(f" documentList ref: docItem:{chatDocId}")
|
||||||
|
parts.append(f" file id: {fileId}")
|
||||||
|
if extraInfo:
|
||||||
|
parts.append(extraInfo)
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
def _buildResolverDbFromServices(services: Any):
|
def _buildResolverDbFromServices(services: Any):
|
||||||
"""DB adapter for ConnectorResolver: load UserConnections by id.
|
"""DB adapter for ConnectorResolver: load UserConnections by id.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,8 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
|
||||||
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
|
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
|
||||||
|
|
||||||
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
|
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
|
||||||
|
_attachFileAsChatDocument,
|
||||||
|
_formatToolFileResult,
|
||||||
_getOrCreateTempFolder,
|
_getOrCreateTempFolder,
|
||||||
_looksLikeBinary,
|
_looksLikeBinary,
|
||||||
_resolveFileScope,
|
_resolveFileScope,
|
||||||
|
|
@ -316,7 +318,13 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
tempFolderId = _getOrCreateTempFolder(chatService)
|
tempFolderId = _getOrCreateTempFolder(chatService)
|
||||||
if tempFolderId:
|
if tempFolderId:
|
||||||
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
|
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
|
||||||
savedFiles.append(f"- {docName} (id: {fid})")
|
chatDocId = _attachFileAsChatDocument(
|
||||||
|
services, fileItem,
|
||||||
|
label=f"renderDocument:{docName}",
|
||||||
|
userMessage=f"Rendered document {docName}",
|
||||||
|
)
|
||||||
|
refSuffix = f", doc ref: docItem:{chatDocId}" if chatDocId else ""
|
||||||
|
savedFiles.append(f"- {docName} (file id: {fid}{refSuffix})")
|
||||||
sideEvents.append({
|
sideEvents.append({
|
||||||
"type": "fileCreated",
|
"type": "fileCreated",
|
||||||
"data": {
|
"data": {
|
||||||
|
|
@ -340,7 +348,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
"Render markdown into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). "
|
"Render markdown into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). "
|
||||||
"For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with "
|
"For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with "
|
||||||
"`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. "
|
"`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. "
|
||||||
"Images:  in the markdown."
|
"Images:  in the markdown. "
|
||||||
|
"Each rendered file's result line contains `file id: <fileId>` (for embeds / readFile) AND "
|
||||||
|
"`doc ref: docItem:<chatDocId>` -- pass the latter inside `documentList` of subsequent "
|
||||||
|
"`ai_process` / `ai_summarizeDocument` / `context_extractContent` calls."
|
||||||
),
|
),
|
||||||
parameters={
|
parameters={
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|
@ -588,7 +599,13 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
tempFolderId = _getOrCreateTempFolder(chatService)
|
tempFolderId = _getOrCreateTempFolder(chatService)
|
||||||
if tempFolderId:
|
if tempFolderId:
|
||||||
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
|
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
|
||||||
savedFiles.append(f"- {docName} (id: {fid})")
|
chatDocId = _attachFileAsChatDocument(
|
||||||
|
services, fileItem,
|
||||||
|
label=f"generateImage:{docName}",
|
||||||
|
userMessage=f"Generated image {docName}",
|
||||||
|
)
|
||||||
|
refSuffix = f", doc ref: docItem:{chatDocId}" if chatDocId else ""
|
||||||
|
savedFiles.append(f"- {docName} (file id: {fid}{refSuffix})")
|
||||||
sideEvents.append({
|
sideEvents.append({
|
||||||
"type": "fileCreated",
|
"type": "fileCreated",
|
||||||
"data": {
|
"data": {
|
||||||
|
|
@ -612,7 +629,9 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
"Generate an image from a text description using AI (DALL-E). "
|
"Generate an image from a text description using AI (DALL-E). "
|
||||||
"The generated image is saved as a file in the workspace. "
|
"The generated image is saved as a file in the workspace. "
|
||||||
"Use this when the user asks to create, generate, draw, or design an image, illustration, icon, logo, diagram, or any visual content. "
|
"Use this when the user asks to create, generate, draw, or design an image, illustration, icon, logo, diagram, or any visual content. "
|
||||||
"Provide a detailed, descriptive prompt for best results."
|
"Provide a detailed, descriptive prompt for best results. "
|
||||||
|
"Each image's result line carries `file id: <fileId>` (for embeds / readFile) and "
|
||||||
|
"`doc ref: docItem:<chatDocId>` (use inside `documentList` for downstream AI tools)."
|
||||||
),
|
),
|
||||||
parameters={
|
parameters={
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|
@ -743,14 +762,24 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
if tempFolderId and fid != "?":
|
if tempFolderId and fid != "?":
|
||||||
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
|
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
|
||||||
|
|
||||||
|
chatDocId = _attachFileAsChatDocument(
|
||||||
|
services, fileItem,
|
||||||
|
label=f"createChart:{fileName}",
|
||||||
|
userMessage=f"Created chart {fileName}",
|
||||||
|
)
|
||||||
|
|
||||||
sideEvents = [{"type": "fileCreated", "data": {
|
sideEvents = [{"type": "fileCreated", "data": {
|
||||||
"fileId": fid, "fileName": fileName,
|
"fileId": fid, "fileName": fileName,
|
||||||
"mimeType": "image/png", "fileSize": len(pngData),
|
"mimeType": "image/png", "fileSize": len(pngData),
|
||||||
}}]
|
}}]
|
||||||
return ToolResult(
|
return ToolResult(
|
||||||
toolCallId="", toolName="createChart", success=True,
|
toolCallId="", toolName="createChart", success=True,
|
||||||
data=f"Chart saved as '{fileName}' (id: {fid}, {len(pngData)} bytes). "
|
data=_formatToolFileResult(
|
||||||
f"Embed in documents with: ",
|
fileItem=fileItem,
|
||||||
|
chatDocId=chatDocId,
|
||||||
|
actionLabel="Chart saved as",
|
||||||
|
extraInfo=f"Embed in documents with: ",
|
||||||
|
),
|
||||||
sideEvents=sideEvents,
|
sideEvents=sideEvents,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -764,7 +793,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
"Create a data chart/graph as a PNG image using matplotlib. "
|
"Create a data chart/graph as a PNG image using matplotlib. "
|
||||||
"Supported types: bar, horizontalBar, line, area, scatter, pie, donut. "
|
"Supported types: bar, horizontalBar, line, area, scatter, pie, donut. "
|
||||||
"The chart is saved as a file in the workspace. "
|
"The chart is saved as a file in the workspace. "
|
||||||
"Use the returned fileId to embed in documents via renderDocument: . "
|
"Use the returned `file id: <fileId>` to embed in documents via "
|
||||||
|
"renderDocument: . The result line also carries "
|
||||||
|
"`doc ref: docItem:<chatDocId>` -- use it inside `documentList` for "
|
||||||
|
"downstream AI tools that need the chart as a data source. "
|
||||||
"Provide structured data with labels and datasets."
|
"Provide structured data with labels and datasets."
|
||||||
),
|
),
|
||||||
parameters={
|
parameters={
|
||||||
|
|
|
||||||
|
|
@ -9,10 +9,11 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
|
||||||
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
|
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
|
||||||
|
|
||||||
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
|
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
|
||||||
|
_attachFileAsChatDocument,
|
||||||
|
_formatToolFileResult,
|
||||||
_getOrCreateInstanceFolder,
|
_getOrCreateInstanceFolder,
|
||||||
_getOrCreateTempFolder,
|
_getOrCreateTempFolder,
|
||||||
_looksLikeBinary,
|
_looksLikeBinary,
|
||||||
_resolveFileScope,
|
|
||||||
_MAX_TOOL_RESULT_CHARS,
|
_MAX_TOOL_RESULT_CHARS,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -48,6 +49,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
||||||
return ToolResult(toolCallId="", toolName="readFile", success=False, error="fileId is required")
|
return ToolResult(toolCallId="", toolName="readFile", success=False, error="fileId is required")
|
||||||
try:
|
try:
|
||||||
knowledgeService = services.getService("knowledge") if hasattr(services, "getService") else None
|
knowledgeService = services.getService("knowledge") if hasattr(services, "getService") else None
|
||||||
|
fileStatus = None
|
||||||
|
|
||||||
# 1) Knowledge Store: return already-extracted text chunks
|
# 1) Knowledge Store: return already-extracted text chunks
|
||||||
if knowledgeService:
|
if knowledgeService:
|
||||||
|
|
@ -75,7 +77,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
||||||
data=f"[File {fileId} is currently being processed (status: {fileStatus}). Try again shortly.]",
|
data=f"[File {fileId} is currently being processed (status: {fileStatus}). Try again shortly.]",
|
||||||
)
|
)
|
||||||
|
|
||||||
# 2) Not indexed yet: try on-demand extraction
|
# 2) Not indexed yet: inspect file type to decide how to serve the agent
|
||||||
|
# (binary -> instruct agent to wait / re-upload; text -> decode raw bytes inline)
|
||||||
chatService = services.chat
|
chatService = services.chat
|
||||||
fileInfo = chatService.getFileInfo(fileId)
|
fileInfo = chatService.getFileInfo(fileId)
|
||||||
if not fileInfo:
|
if not fileInfo:
|
||||||
|
|
@ -98,83 +101,14 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
||||||
isBinary = _looksLikeBinary(rawBytes)
|
isBinary = _looksLikeBinary(rawBytes)
|
||||||
|
|
||||||
if isBinary:
|
if isBinary:
|
||||||
try:
|
|
||||||
from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry, ChunkerRegistry
|
|
||||||
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
|
||||||
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
|
||||||
|
|
||||||
extracted = runExtraction(
|
|
||||||
ExtractorRegistry(), ChunkerRegistry(),
|
|
||||||
rawBytes, fileName, mimeType, ExtractionOptions(),
|
|
||||||
)
|
|
||||||
|
|
||||||
contentObjects = []
|
|
||||||
for part in extracted.parts:
|
|
||||||
tg = (part.typeGroup or "").lower()
|
|
||||||
ct = "image" if tg == "image" else "text"
|
|
||||||
if not part.data or not part.data.strip():
|
|
||||||
continue
|
|
||||||
contentObjects.append({
|
|
||||||
"contentObjectId": part.id,
|
|
||||||
"contentType": ct,
|
|
||||||
"data": part.data,
|
|
||||||
"contextRef": {
|
|
||||||
"containerPath": fileName,
|
|
||||||
"location": part.label or "file",
|
|
||||||
**(part.metadata or {}),
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
if contentObjects:
|
|
||||||
if knowledgeService:
|
|
||||||
try:
|
|
||||||
userId = context.get("userId", "")
|
|
||||||
_fiId, _mId = _resolveFileScope(fileId, context)
|
|
||||||
await knowledgeService.indexFile(
|
|
||||||
fileId=fileId, fileName=fileName, mimeType=mimeType,
|
|
||||||
userId=userId, contentObjects=contentObjects,
|
|
||||||
featureInstanceId=_fiId,
|
|
||||||
mandateId=_mId,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"readFile: knowledge indexing failed for {fileId}: {e}")
|
|
||||||
|
|
||||||
joined = ""
|
|
||||||
if knowledgeService:
|
|
||||||
_chunks = knowledgeService._knowledgeDb.getContentChunks(fileId)
|
|
||||||
_textChunks = [
|
|
||||||
c for c in (_chunks or [])
|
|
||||||
if c.get("contentType") != "image" and c.get("data")
|
|
||||||
]
|
|
||||||
if _textChunks:
|
|
||||||
joined = "\n\n".join(c["data"] for c in _textChunks)
|
|
||||||
if not joined:
|
|
||||||
textParts = [o["data"] for o in contentObjects if o["contentType"] != "image"]
|
|
||||||
joined = "\n\n".join(textParts) if textParts else ""
|
|
||||||
if joined:
|
|
||||||
chunked = _applyOffsetLimit(joined, offset, limit)
|
|
||||||
if chunked is not None:
|
|
||||||
return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked)
|
|
||||||
if len(joined) > _MAX_TOOL_RESULT_CHARS:
|
|
||||||
joined = joined[:_MAX_TOOL_RESULT_CHARS] + f"\n\n[Truncated – showing first {_MAX_TOOL_RESULT_CHARS} chars of {len(joined)}. Use offset/limit to read specific sections.]"
|
|
||||||
return ToolResult(
|
|
||||||
toolCallId="", toolName="readFile", success=True,
|
|
||||||
data=joined,
|
|
||||||
)
|
|
||||||
imgCount = sum(1 for o in contentObjects if o["contentType"] == "image")
|
|
||||||
return ToolResult(
|
|
||||||
toolCallId="", toolName="readFile", success=True,
|
|
||||||
data=f"[Extracted {len(contentObjects)} content objects from '{fileName}' "
|
|
||||||
f"({imgCount} images, no readable text). "
|
|
||||||
f"Use describeImage(fileId='{fileId}') to analyze visual content.]",
|
|
||||||
)
|
|
||||||
except Exception as extractErr:
|
|
||||||
logger.warning(f"readFile extraction failed for {fileId} ({fileName}): {extractErr}")
|
|
||||||
|
|
||||||
return ToolResult(
|
return ToolResult(
|
||||||
toolCallId="", toolName="readFile", success=True,
|
toolCallId="", toolName="readFile", success=True,
|
||||||
data=f"[Binary file: '{fileName}', type={mimeType}, size={len(rawBytes)} bytes. "
|
data=(
|
||||||
f"Text extraction not available. Use describeImage for images.]",
|
f"[File '{fileName}' ({mimeType}) is not yet indexed "
|
||||||
|
f"(status: {fileStatus or 'unknown'}). Indexing runs automatically "
|
||||||
|
f"on upload. Please wait a few seconds and retry, or re-upload the file. "
|
||||||
|
f"For visual content use describeImage(fileId='{fileId}').]"
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# 3) Text file: decode raw bytes
|
# 3) Text file: decode raw bytes
|
||||||
|
|
@ -428,9 +362,19 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
||||||
dbMgmt.updateFile(fileItem.id, {"folderId": instanceFolderId})
|
dbMgmt.updateFile(fileItem.id, {"folderId": instanceFolderId})
|
||||||
if args.get("tags"):
|
if args.get("tags"):
|
||||||
dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
|
dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
|
||||||
|
|
||||||
|
chatDocId = _attachFileAsChatDocument(
|
||||||
|
services, fileItem,
|
||||||
|
label=f"writeFile:{name}",
|
||||||
|
userMessage=f"Created {name} via writeFile",
|
||||||
|
)
|
||||||
return ToolResult(
|
return ToolResult(
|
||||||
toolCallId="", toolName="writeFile", success=True,
|
toolCallId="", toolName="writeFile", success=True,
|
||||||
data=f"File '{name}' created (id: {fileItem.id})",
|
data=_formatToolFileResult(
|
||||||
|
fileItem=fileItem,
|
||||||
|
chatDocId=chatDocId,
|
||||||
|
actionLabel="Created",
|
||||||
|
),
|
||||||
sideEvents=[{
|
sideEvents=[{
|
||||||
"type": "fileCreated",
|
"type": "fileCreated",
|
||||||
"data": {
|
"data": {
|
||||||
|
|
@ -573,7 +517,11 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
|
||||||
"- create (default): create a new file (name required).\n"
|
"- create (default): create a new file (name required).\n"
|
||||||
"- append: append content to an existing file (fileId required). "
|
"- append: append content to an existing file (fileId required). "
|
||||||
"Use for large content that exceeds a single tool call (~8000 chars per call).\n"
|
"Use for large content that exceeds a single tool call (~8000 chars per call).\n"
|
||||||
"- overwrite: replace entire file content (fileId required)."
|
"- overwrite: replace entire file content (fileId required).\n"
|
||||||
|
"On `mode=create` the result line contains BOTH a `documentList ref: docItem:<chatDocId>` "
|
||||||
|
"(use this for documentList parameters of `ai_process` / `ai_summarizeDocument` / "
|
||||||
|
"`context_extractContent` etc., always as the literal string `docItem:<id>`) AND a "
|
||||||
|
"`file id: <fileId>` (use this for `readFile`, `writeFile mode=append`, image embeds)."
|
||||||
),
|
),
|
||||||
parameters={
|
parameters={
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|
|
||||||
|
|
@ -178,6 +178,33 @@ class AgentService:
|
||||||
if workflowId is None:
|
if workflowId is None:
|
||||||
workflowId = getattr(self.services.workflow, "id", "unknown") if self.services.workflow else "unknown"
|
workflowId = getattr(self.services.workflow, "id", "unknown") if self.services.workflow else "unknown"
|
||||||
|
|
||||||
|
# Propagate the active workflow into every service's request
|
||||||
|
# context so agent-tool side effects (e.g. _attachFileAsChatDocument
|
||||||
|
# for downloadFromDataSource / writeFile / renderDocument) can
|
||||||
|
# bind their FileItem outputs to the workflow as ChatDocuments.
|
||||||
|
# Without this, chatService._workflow (= chatService._context.workflow)
|
||||||
|
# stays None and the documentList resolver finds zero docs --
|
||||||
|
# which is exactly the "Building structure prompt with 0 valid
|
||||||
|
# ContentParts" symptom we see when the workspace route calls
|
||||||
|
# runAgent for an attached single-file data source.
|
||||||
|
# Mirrors workflowManager._propagateWorkflowToContext.
|
||||||
|
if workflowId and workflowId != "unknown":
|
||||||
|
try:
|
||||||
|
workflow = getattr(self.services, "workflow", None)
|
||||||
|
if workflow is None or getattr(workflow, "id", None) != workflowId:
|
||||||
|
workflow = self.services.chat.getWorkflow(workflowId)
|
||||||
|
if workflow is not None:
|
||||||
|
self.services.workflow = workflow
|
||||||
|
ctx = getattr(self.services, "_service_context", None)
|
||||||
|
if ctx is not None:
|
||||||
|
ctx.workflow = workflow
|
||||||
|
for attr in ("chat", "ai", "extraction", "sharepoint", "clickup", "utils", "billing", "generation"):
|
||||||
|
svc = getattr(self.services, attr, None)
|
||||||
|
if svc is not None and hasattr(svc, "_context") and svc._context is not None:
|
||||||
|
svc._context.workflow = workflow
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"runAgent: could not propagate workflow {workflowId} into service contexts: {e}")
|
||||||
|
|
||||||
resolvedLanguage = userLanguage or ""
|
resolvedLanguage = userLanguage or ""
|
||||||
|
|
||||||
enrichedPrompt = await self._enrichPromptWithFiles(prompt, fileIds)
|
enrichedPrompt = await self._enrichPromptWithFiles(prompt, fileIds)
|
||||||
|
|
|
||||||
|
|
@ -164,12 +164,29 @@ class AiService:
|
||||||
# SPEECH_TEAMS: Dedicated pipeline, bypasses standard model selection
|
# SPEECH_TEAMS: Dedicated pipeline, bypasses standard model selection
|
||||||
if request.options and request.options.operationType == OperationTypeEnum.SPEECH_TEAMS:
|
if request.options and request.options.operationType == OperationTypeEnum.SPEECH_TEAMS:
|
||||||
return await self._handleSpeechTeams(request)
|
return await self._handleSpeechTeams(request)
|
||||||
|
|
||||||
# FAIL-SAFE: Pre-flight billing validation (like 0 CHF credit card check)
|
_opType = request.options.operationType if request.options else None
|
||||||
self._preflightBillingCheck()
|
_isNeutralizationCall = _opType in (
|
||||||
|
OperationTypeEnum.NEUTRALIZATION_TEXT,
|
||||||
# Balance & provider permission checks
|
OperationTypeEnum.NEUTRALIZATION_IMAGE,
|
||||||
await self._checkBillingBeforeAiCall()
|
)
|
||||||
|
|
||||||
|
if not _isNeutralizationCall:
|
||||||
|
# FAIL-SAFE: Pre-flight billing validation (like 0 CHF credit card check)
|
||||||
|
self._preflightBillingCheck()
|
||||||
|
# Balance & provider permission checks
|
||||||
|
await self._checkBillingBeforeAiCall()
|
||||||
|
else:
|
||||||
|
# Neutralization calls are system-level operations (connector anonymization).
|
||||||
|
# They run without a mandate context (e.g. personal-scope connections) and
|
||||||
|
# are billed the same way as embedding calls: best-effort, skipped when no
|
||||||
|
# billing settings exist for an empty mandate.
|
||||||
|
logger.debug(
|
||||||
|
"callAi: skipping billing preflight for neutralization call "
|
||||||
|
"(operationType=%s, user=%s)",
|
||||||
|
_opType,
|
||||||
|
getattr(getattr(self.services, 'user', None), 'id', 'unknown'),
|
||||||
|
)
|
||||||
|
|
||||||
# Calculate effective allowedProviders: RBAC ∩ Workflow
|
# Calculate effective allowedProviders: RBAC ∩ Workflow
|
||||||
effectiveProviders = self._calculateEffectiveProviders()
|
effectiveProviders = self._calculateEffectiveProviders()
|
||||||
|
|
@ -218,8 +235,15 @@ class AiService:
|
||||||
Rehydration happens on the final AiCallResponse (not on individual str deltas).
|
Rehydration happens on the final AiCallResponse (not on individual str deltas).
|
||||||
"""
|
"""
|
||||||
await self.ensureAiObjectsInitialized()
|
await self.ensureAiObjectsInitialized()
|
||||||
self._preflightBillingCheck()
|
|
||||||
await self._checkBillingBeforeAiCall()
|
_streamOpType = request.options.operationType if request.options else None
|
||||||
|
_isNeutralizationStream = _streamOpType in (
|
||||||
|
OperationTypeEnum.NEUTRALIZATION_TEXT,
|
||||||
|
OperationTypeEnum.NEUTRALIZATION_IMAGE,
|
||||||
|
)
|
||||||
|
if not _isNeutralizationStream:
|
||||||
|
self._preflightBillingCheck()
|
||||||
|
await self._checkBillingBeforeAiCall()
|
||||||
|
|
||||||
effectiveProviders = self._calculateEffectiveProviders()
|
effectiveProviders = self._calculateEffectiveProviders()
|
||||||
if effectiveProviders and request.options:
|
if effectiveProviders and request.options:
|
||||||
|
|
|
||||||
|
|
@ -463,36 +463,38 @@ class ChatService:
|
||||||
Returns:
|
Returns:
|
||||||
List of file info dicts.
|
List of file info dicts.
|
||||||
"""
|
"""
|
||||||
|
# `getAllFiles` returns `List[dict]` (each entry is a
|
||||||
|
# `FileItem.model_dump()` enriched with label columns) -- not
|
||||||
|
# Pydantic objects -- so we use dict-access throughout.
|
||||||
allFiles = self.interfaceDbComponent.getAllFiles()
|
allFiles = self.interfaceDbComponent.getAllFiles()
|
||||||
results = []
|
results = []
|
||||||
for fileItem in allFiles:
|
for fileItem in allFiles:
|
||||||
if folderId is not None:
|
if folderId is not None:
|
||||||
itemFolderId = getattr(fileItem, "folderId", None)
|
if fileItem.get("folderId") != folderId:
|
||||||
if itemFolderId != folderId:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if tags:
|
if tags:
|
||||||
itemTags = getattr(fileItem, "tags", None) or []
|
itemTags = fileItem.get("tags") or []
|
||||||
if not any(t in itemTags for t in tags):
|
if not any(t in itemTags for t in tags):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if search:
|
if search:
|
||||||
searchLower = search.lower()
|
searchLower = search.lower()
|
||||||
nameMatch = searchLower in (fileItem.fileName or "").lower()
|
nameMatch = searchLower in (fileItem.get("fileName") or "").lower()
|
||||||
descMatch = searchLower in (getattr(fileItem, "description", None) or "").lower()
|
descMatch = searchLower in (fileItem.get("description") or "").lower()
|
||||||
if not nameMatch and not descMatch:
|
if not nameMatch and not descMatch:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append({
|
results.append({
|
||||||
"id": fileItem.id,
|
"id": fileItem.get("id"),
|
||||||
"fileName": fileItem.fileName,
|
"fileName": fileItem.get("fileName"),
|
||||||
"mimeType": fileItem.mimeType,
|
"mimeType": fileItem.get("mimeType"),
|
||||||
"fileSize": fileItem.fileSize,
|
"fileSize": fileItem.get("fileSize"),
|
||||||
"creationDate": fileItem.sysCreatedAt,
|
"creationDate": fileItem.get("sysCreatedAt"),
|
||||||
"tags": getattr(fileItem, "tags", None),
|
"tags": fileItem.get("tags"),
|
||||||
"folderId": getattr(fileItem, "folderId", None),
|
"folderId": fileItem.get("folderId"),
|
||||||
"description": getattr(fileItem, "description", None),
|
"description": fileItem.get("description"),
|
||||||
"status": getattr(fileItem, "status", None),
|
"status": fileItem.get("status"),
|
||||||
})
|
})
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,13 @@
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
"""Knowledge service: 3-tier RAG with indexing, semantic search, and context building."""
|
"""Knowledge service: 3-tier RAG with indexing, semantic search, and context building."""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from typing import Any, Callable, Dict, List, Optional
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Callable, Dict, List, Optional, Union
|
||||||
|
|
||||||
from modules.datamodels.datamodelKnowledge import (
|
from modules.datamodels.datamodelKnowledge import (
|
||||||
FileContentIndex, ContentChunk, WorkflowMemory,
|
FileContentIndex, ContentChunk, WorkflowMemory,
|
||||||
|
|
@ -20,6 +24,68 @@ DEFAULT_CHUNK_TOKENS = 400
|
||||||
DEFAULT_CONTEXT_BUDGET = 12000
|
DEFAULT_CONTEXT_BUDGET = 12000
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Ingestion façade (P0 of unified-knowledge-indexing concept)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IngestionJob:
|
||||||
|
"""One request to add or refresh content in the unified knowledge store.
|
||||||
|
|
||||||
|
Callers from any lane (routes, feature hooks, agent tools, connector sync)
|
||||||
|
describe the work they want done via this object; idempotency, scope
|
||||||
|
resolution, and embedding are handled by KnowledgeService.requestIngestion.
|
||||||
|
"""
|
||||||
|
sourceKind: str
|
||||||
|
sourceId: str
|
||||||
|
fileName: str
|
||||||
|
mimeType: str
|
||||||
|
userId: str
|
||||||
|
contentObjects: List[Dict[str, Any]] = field(default_factory=list)
|
||||||
|
featureInstanceId: str = ""
|
||||||
|
mandateId: str = ""
|
||||||
|
structure: Optional[Dict[str, Any]] = None
|
||||||
|
containerPath: Optional[str] = None
|
||||||
|
contentVersion: Optional[str] = None
|
||||||
|
provenance: Optional[Dict[str, Any]] = None
|
||||||
|
# Connector-driven neutralization: True when the user opted in via §2.6 preferences.
|
||||||
|
# For sourceKind == "file", _indexFileInternal resolves this from FileItem.neutralize instead.
|
||||||
|
neutralize: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IngestionHandle:
|
||||||
|
"""Result of requestIngestion. Stable across in-process and future queue impls."""
|
||||||
|
jobId: str
|
||||||
|
status: str
|
||||||
|
contentHash: str
|
||||||
|
fileId: str
|
||||||
|
index: Optional[FileContentIndex] = None
|
||||||
|
error: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _computeIngestionHash(contentObjects: List[Dict[str, Any]]) -> str:
|
||||||
|
"""Deterministic SHA256 over (contentType, data) tuples in extractor order.
|
||||||
|
|
||||||
|
`contentObjectId` is intentionally excluded because extractors generate
|
||||||
|
fresh UUIDs per run (`uuid.uuid4()`), which would make the hash unstable
|
||||||
|
across re-extractions of the same source — defeating idempotency.
|
||||||
|
Order is preserved (no sort) because two different documents can share the
|
||||||
|
same multiset of parts but differ in arrangement (e.g. swapped pages).
|
||||||
|
Text whitespace is preserved intentionally because chunk boundaries
|
||||||
|
depend on it.
|
||||||
|
"""
|
||||||
|
normalized = [
|
||||||
|
(
|
||||||
|
str(o.get("contentType", "text") or "text"),
|
||||||
|
o.get("data", "") or "",
|
||||||
|
)
|
||||||
|
for o in (contentObjects or [])
|
||||||
|
]
|
||||||
|
payload = json.dumps(normalized, ensure_ascii=False, separators=(",", ":"))
|
||||||
|
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
class KnowledgeService:
|
class KnowledgeService:
|
||||||
"""Service for Knowledge Store operations: indexing, retrieval, and context building."""
|
"""Service for Knowledge Store operations: indexing, retrieval, and context building."""
|
||||||
|
|
||||||
|
|
@ -46,6 +112,224 @@ class KnowledgeService:
|
||||||
results = await self._embed([text])
|
results = await self._embed([text])
|
||||||
return results[0] if results else []
|
return results[0] if results else []
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Ingestion façade (single entry point for all lanes)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
async def requestIngestion(self, job: IngestionJob) -> IngestionHandle:
|
||||||
|
"""Unified entry point for filling the knowledge corpus.
|
||||||
|
|
||||||
|
Applies idempotency based on a content hash (or caller-supplied
|
||||||
|
`contentVersion`) persisted in `FileContentIndex.structure._ingestion`.
|
||||||
|
Re-runs indexing only when the hash differs or the previous run did
|
||||||
|
not reach `indexed` state. Runs embedding synchronously for now
|
||||||
|
(callers already schedule background tasks where needed).
|
||||||
|
"""
|
||||||
|
jobId = f"{job.sourceKind}:{job.sourceId}"
|
||||||
|
startMs = time.time()
|
||||||
|
contentHash = job.contentVersion or _computeIngestionHash(job.contentObjects)
|
||||||
|
|
||||||
|
# 1. Check for duplicate via existing FileContentIndex row.
|
||||||
|
existing = None
|
||||||
|
try:
|
||||||
|
existing = self._knowledgeDb.getFileContentIndex(job.sourceId)
|
||||||
|
except Exception:
|
||||||
|
existing = None
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
existingStructure = (
|
||||||
|
existing.get("structure") if isinstance(existing, dict)
|
||||||
|
else getattr(existing, "structure", {})
|
||||||
|
) or {}
|
||||||
|
existingMeta = existingStructure.get("_ingestion", {}) or {}
|
||||||
|
existingStatus = (
|
||||||
|
existing.get("status") if isinstance(existing, dict)
|
||||||
|
else getattr(existing, "status", "")
|
||||||
|
) or ""
|
||||||
|
if existingMeta.get("hash") == contentHash and existingStatus == "indexed":
|
||||||
|
logger.info(
|
||||||
|
"ingestion.skipped.duplicate sourceKind=%s sourceId=%s hash=%s",
|
||||||
|
job.sourceKind, job.sourceId, contentHash[:12],
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.skipped.duplicate",
|
||||||
|
"jobId": jobId,
|
||||||
|
"sourceKind": job.sourceKind,
|
||||||
|
"sourceId": job.sourceId,
|
||||||
|
"hash": contentHash,
|
||||||
|
"durationMs": int((time.time() - startMs) * 1000),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return IngestionHandle(
|
||||||
|
jobId=jobId,
|
||||||
|
status="duplicate",
|
||||||
|
contentHash=contentHash,
|
||||||
|
fileId=job.sourceId,
|
||||||
|
index=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Prepare ingestion metadata; stays in structure._ingestion so
|
||||||
|
# later connector revoke/purge can filter chunks by sourceKind /
|
||||||
|
# provenance.connectionId without a schema migration.
|
||||||
|
ingestionMeta = {
|
||||||
|
"hash": contentHash,
|
||||||
|
"sourceKind": job.sourceKind,
|
||||||
|
"sourceId": job.sourceId,
|
||||||
|
"contentVersion": job.contentVersion,
|
||||||
|
"indexedAt": getUtcTimestamp(),
|
||||||
|
"provenance": dict(job.provenance or {}),
|
||||||
|
}
|
||||||
|
structure = dict(job.structure or {})
|
||||||
|
structure["_ingestion"] = ingestionMeta
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"ingestion.queued sourceKind=%s sourceId=%s objects=%d hash=%s",
|
||||||
|
job.sourceKind, job.sourceId, len(job.contentObjects or []), contentHash[:12],
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.queued",
|
||||||
|
"jobId": jobId,
|
||||||
|
"sourceKind": job.sourceKind,
|
||||||
|
"sourceId": job.sourceId,
|
||||||
|
"hash": contentHash,
|
||||||
|
"objectCount": len(job.contentObjects or []),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. Run real indexing.
|
||||||
|
try:
|
||||||
|
index = await self._indexFileInternal(
|
||||||
|
fileId=job.sourceId,
|
||||||
|
fileName=job.fileName,
|
||||||
|
mimeType=job.mimeType,
|
||||||
|
userId=job.userId,
|
||||||
|
featureInstanceId=job.featureInstanceId,
|
||||||
|
mandateId=job.mandateId,
|
||||||
|
contentObjects=job.contentObjects or [],
|
||||||
|
structure=structure,
|
||||||
|
containerPath=job.containerPath,
|
||||||
|
sourceKind=job.sourceKind,
|
||||||
|
connectionId=(job.provenance or {}).get("connectionId"),
|
||||||
|
neutralize=job.neutralize,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(
|
||||||
|
"ingestion.failed sourceKind=%s sourceId=%s error=%s",
|
||||||
|
job.sourceKind, job.sourceId, exc,
|
||||||
|
exc_info=True,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.failed",
|
||||||
|
"jobId": jobId,
|
||||||
|
"sourceKind": job.sourceKind,
|
||||||
|
"sourceId": job.sourceId,
|
||||||
|
"hash": contentHash,
|
||||||
|
"error": str(exc),
|
||||||
|
"durationMs": int((time.time() - startMs) * 1000),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
self._knowledgeDb.updateFileStatus(job.sourceId, "failed")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return IngestionHandle(
|
||||||
|
jobId=jobId,
|
||||||
|
status="failed",
|
||||||
|
contentHash=contentHash,
|
||||||
|
fileId=job.sourceId,
|
||||||
|
index=None,
|
||||||
|
error=str(exc),
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"ingestion.indexed sourceKind=%s sourceId=%s objects=%d durationMs=%d",
|
||||||
|
job.sourceKind, job.sourceId, len(job.contentObjects or []),
|
||||||
|
int((time.time() - startMs) * 1000),
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.indexed",
|
||||||
|
"jobId": jobId,
|
||||||
|
"sourceKind": job.sourceKind,
|
||||||
|
"sourceId": job.sourceId,
|
||||||
|
"hash": contentHash,
|
||||||
|
"objectCount": len(job.contentObjects or []),
|
||||||
|
"durationMs": int((time.time() - startMs) * 1000),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return IngestionHandle(
|
||||||
|
jobId=jobId,
|
||||||
|
status="indexed",
|
||||||
|
contentHash=contentHash,
|
||||||
|
fileId=job.sourceId,
|
||||||
|
index=index,
|
||||||
|
)
|
||||||
|
|
||||||
|
def purgeConnection(self, connectionId: str) -> Dict[str, int]:
|
||||||
|
"""Delete every FileContentIndex + ContentChunk linked to a UserConnection.
|
||||||
|
|
||||||
|
Called on `connection.revoked` events so the knowledge corpus never
|
||||||
|
holds chunks the user has withdrawn access to. Returns deletion counts
|
||||||
|
for observability.
|
||||||
|
"""
|
||||||
|
if not connectionId:
|
||||||
|
return {"indexRows": 0, "chunks": 0}
|
||||||
|
startMs = time.time()
|
||||||
|
result = self._knowledgeDb.deleteFileContentIndexByConnectionId(connectionId)
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.purged connectionId=%s rows=%d chunks=%d durationMs=%d",
|
||||||
|
connectionId, result["indexRows"], result["chunks"],
|
||||||
|
int((time.time() - startMs) * 1000),
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.purged",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"indexRows": result["indexRows"],
|
||||||
|
"chunks": result["chunks"],
|
||||||
|
"durationMs": int((time.time() - startMs) * 1000),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def getIngestionStatus(
|
||||||
|
self, handleOrJobId: Union[IngestionHandle, str]
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Map a handle or `sourceKind:sourceId` jobId to a status snapshot."""
|
||||||
|
if isinstance(handleOrJobId, IngestionHandle):
|
||||||
|
sourceId = handleOrJobId.fileId
|
||||||
|
jobId = handleOrJobId.jobId
|
||||||
|
elif isinstance(handleOrJobId, str) and ":" in handleOrJobId:
|
||||||
|
jobId = handleOrJobId
|
||||||
|
sourceId = handleOrJobId.split(":", 1)[1]
|
||||||
|
else:
|
||||||
|
jobId = str(handleOrJobId)
|
||||||
|
sourceId = str(handleOrJobId)
|
||||||
|
|
||||||
|
row = None
|
||||||
|
try:
|
||||||
|
row = self._knowledgeDb.getFileContentIndex(sourceId)
|
||||||
|
except Exception:
|
||||||
|
row = None
|
||||||
|
if not row:
|
||||||
|
return {
|
||||||
|
"jobId": jobId,
|
||||||
|
"sourceId": sourceId,
|
||||||
|
"status": "unknown",
|
||||||
|
"contentHash": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
structure = (
|
||||||
|
row.get("structure") if isinstance(row, dict)
|
||||||
|
else getattr(row, "structure", {})
|
||||||
|
) or {}
|
||||||
|
meta = structure.get("_ingestion", {}) or {}
|
||||||
|
status = (
|
||||||
|
row.get("status") if isinstance(row, dict)
|
||||||
|
else getattr(row, "status", "")
|
||||||
|
) or "unknown"
|
||||||
|
return {
|
||||||
|
"jobId": jobId,
|
||||||
|
"sourceId": sourceId,
|
||||||
|
"status": status,
|
||||||
|
"contentHash": meta.get("hash"),
|
||||||
|
"sourceKind": meta.get("sourceKind"),
|
||||||
|
"indexedAt": meta.get("indexedAt"),
|
||||||
|
}
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# File Indexing (called after extraction, before embedding)
|
# File Indexing (called after extraction, before embedding)
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
@ -61,6 +345,57 @@ class KnowledgeService:
|
||||||
contentObjects: List[Dict[str, Any]] = None,
|
contentObjects: List[Dict[str, Any]] = None,
|
||||||
structure: Dict[str, Any] = None,
|
structure: Dict[str, Any] = None,
|
||||||
containerPath: str = None,
|
containerPath: str = None,
|
||||||
|
) -> Optional[FileContentIndex]:
|
||||||
|
"""Backward-compatible wrapper delegating to requestIngestion.
|
||||||
|
|
||||||
|
Existing callers that still invoke `indexFile` directly automatically
|
||||||
|
participate in the idempotency/metrics layer. New callers should
|
||||||
|
prefer `requestIngestion` so they can pass `sourceKind` and
|
||||||
|
`provenance` for connector revoke/purge later.
|
||||||
|
"""
|
||||||
|
job = IngestionJob(
|
||||||
|
sourceKind="file",
|
||||||
|
sourceId=fileId,
|
||||||
|
fileName=fileName,
|
||||||
|
mimeType=mimeType,
|
||||||
|
userId=userId,
|
||||||
|
featureInstanceId=featureInstanceId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
contentObjects=list(contentObjects or []),
|
||||||
|
structure=structure,
|
||||||
|
containerPath=containerPath,
|
||||||
|
)
|
||||||
|
handle = await self.requestIngestion(job)
|
||||||
|
if handle.index is not None:
|
||||||
|
return handle.index
|
||||||
|
if handle.status == "duplicate":
|
||||||
|
row = None
|
||||||
|
try:
|
||||||
|
row = self._knowledgeDb.getFileContentIndex(fileId)
|
||||||
|
except Exception:
|
||||||
|
row = None
|
||||||
|
if isinstance(row, dict):
|
||||||
|
try:
|
||||||
|
return FileContentIndex(**row)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
return row
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _indexFileInternal(
|
||||||
|
self,
|
||||||
|
fileId: str,
|
||||||
|
fileName: str,
|
||||||
|
mimeType: str,
|
||||||
|
userId: str,
|
||||||
|
featureInstanceId: str = "",
|
||||||
|
mandateId: str = "",
|
||||||
|
contentObjects: List[Dict[str, Any]] = None,
|
||||||
|
structure: Dict[str, Any] = None,
|
||||||
|
containerPath: str = None,
|
||||||
|
sourceKind: str = "file",
|
||||||
|
connectionId: Optional[str] = None,
|
||||||
|
neutralize: bool = False,
|
||||||
) -> FileContentIndex:
|
) -> FileContentIndex:
|
||||||
"""Index a file's content objects and create embeddings for text chunks.
|
"""Index a file's content objects and create embeddings for text chunks.
|
||||||
|
|
||||||
|
|
@ -83,39 +418,41 @@ class KnowledgeService:
|
||||||
"""
|
"""
|
||||||
contentObjects = contentObjects or []
|
contentObjects = contentObjects or []
|
||||||
|
|
||||||
# 1. Resolve scope fields from FileItem (Single Source of Truth)
|
# 1. Resolve scope fields from FileItem (Single Source of Truth) for
|
||||||
# FileItem lives in poweron_management; its scope/mandateId/featureInstanceId
|
# uploaded files. Connector-sourced ingestion (sharepoint_item,
|
||||||
# are authoritative and must be mirrored onto the FileContentIndex.
|
# outlook_message, ...) has no FileItem row — trust the caller's
|
||||||
|
# scope + ids directly.
|
||||||
resolvedScope = "personal"
|
resolvedScope = "personal"
|
||||||
resolvedMandateId = mandateId
|
resolvedMandateId = mandateId
|
||||||
resolvedFeatureInstanceId = featureInstanceId
|
resolvedFeatureInstanceId = featureInstanceId
|
||||||
resolvedUserId = userId
|
resolvedUserId = userId
|
||||||
_shouldNeutralize = False
|
_shouldNeutralize = neutralize # caller-supplied flag (connector prefs / IngestionJob)
|
||||||
try:
|
if sourceKind == "file":
|
||||||
from modules.datamodels.datamodelFiles import FileItem as _FileItem
|
try:
|
||||||
_dbComponent = getattr(self._context, "interfaceDbComponent", None)
|
from modules.datamodels.datamodelFiles import FileItem as _FileItem
|
||||||
_fileRecords = _dbComponent.getRecordset(_FileItem, recordFilter={"id": fileId}) if _dbComponent else []
|
_dbComponent = getattr(self._context, "interfaceDbComponent", None)
|
||||||
if not _fileRecords:
|
_fileRecords = _dbComponent.getRecordset(_FileItem, recordFilter={"id": fileId}) if _dbComponent else []
|
||||||
from modules.interfaces.interfaceDbManagement import ComponentObjects
|
if not _fileRecords:
|
||||||
_row = ComponentObjects().db._loadRecord(_FileItem, fileId)
|
from modules.interfaces.interfaceDbManagement import ComponentObjects
|
||||||
if _row:
|
_row = ComponentObjects().db._loadRecord(_FileItem, fileId)
|
||||||
_fileRecords = [_row]
|
if _row:
|
||||||
if _fileRecords:
|
_fileRecords = [_row]
|
||||||
_fileRecord = _fileRecords[0]
|
if _fileRecords:
|
||||||
_get = (lambda k, d=None: _fileRecord.get(k, d)) if isinstance(_fileRecord, dict) else (lambda k, d=None: getattr(_fileRecord, k, d))
|
_fileRecord = _fileRecords[0]
|
||||||
_shouldNeutralize = bool(_get("neutralize", False))
|
_get = (lambda k, d=None: _fileRecord.get(k, d)) if isinstance(_fileRecord, dict) else (lambda k, d=None: getattr(_fileRecord, k, d))
|
||||||
_fileScope = _get("scope")
|
_shouldNeutralize = bool(_get("neutralize", False)) # FileItem is authoritative for uploads
|
||||||
if _fileScope:
|
_fileScope = _get("scope")
|
||||||
resolvedScope = _fileScope
|
if _fileScope:
|
||||||
if not resolvedMandateId:
|
resolvedScope = _fileScope
|
||||||
resolvedMandateId = str(_get("mandateId", "") or "")
|
if not resolvedMandateId:
|
||||||
if not resolvedFeatureInstanceId:
|
resolvedMandateId = str(_get("mandateId", "") or "")
|
||||||
resolvedFeatureInstanceId = str(_get("featureInstanceId", "") or "")
|
if not resolvedFeatureInstanceId:
|
||||||
_fileCreatedBy = _get("sysCreatedBy")
|
resolvedFeatureInstanceId = str(_get("featureInstanceId", "") or "")
|
||||||
if _fileCreatedBy:
|
_fileCreatedBy = _get("sysCreatedBy")
|
||||||
resolvedUserId = str(_fileCreatedBy)
|
if _fileCreatedBy:
|
||||||
except Exception:
|
resolvedUserId = str(_fileCreatedBy)
|
||||||
pass
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# 2. Create FileContentIndex with correct scope from the start
|
# 2. Create FileContentIndex with correct scope from the start
|
||||||
index = FileContentIndex(
|
index = FileContentIndex(
|
||||||
|
|
@ -124,6 +461,8 @@ class KnowledgeService:
|
||||||
featureInstanceId=resolvedFeatureInstanceId,
|
featureInstanceId=resolvedFeatureInstanceId,
|
||||||
mandateId=resolvedMandateId,
|
mandateId=resolvedMandateId,
|
||||||
scope=resolvedScope,
|
scope=resolvedScope,
|
||||||
|
sourceKind=sourceKind,
|
||||||
|
connectionId=connectionId,
|
||||||
fileName=fileName,
|
fileName=fileName,
|
||||||
mimeType=mimeType,
|
mimeType=mimeType,
|
||||||
containerPath=containerPath,
|
containerPath=containerPath,
|
||||||
|
|
@ -300,7 +639,12 @@ class KnowledgeService:
|
||||||
Formatted context string for injection into the agent's system prompt.
|
Formatted context string for injection into the agent's system prompt.
|
||||||
"""
|
"""
|
||||||
queryVector = await self._embedSingle(currentPrompt)
|
queryVector = await self._embedSingle(currentPrompt)
|
||||||
|
logger.debug(
|
||||||
|
"buildAgentContext.start userId=%s featureInstanceId=%s mandateId=%s isSysAdmin=%s prompt=%r",
|
||||||
|
userId, featureInstanceId, mandateId, isSysAdmin, (currentPrompt or "")[:120],
|
||||||
|
)
|
||||||
if not queryVector:
|
if not queryVector:
|
||||||
|
logger.debug("buildAgentContext.abort reason=no_query_vector")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
builder = _ContextBuilder(budget=contextBudget)
|
builder = _ContextBuilder(budget=contextBudget)
|
||||||
|
|
@ -327,9 +671,14 @@ class KnowledgeService:
|
||||||
featureInstanceId=featureInstanceId,
|
featureInstanceId=featureInstanceId,
|
||||||
mandateId=mandateId,
|
mandateId=mandateId,
|
||||||
limit=15,
|
limit=15,
|
||||||
minScore=0.65,
|
minScore=0.35,
|
||||||
isSysAdmin=isSysAdmin,
|
isSysAdmin=isSysAdmin,
|
||||||
)
|
)
|
||||||
|
logger.debug(
|
||||||
|
"buildAgentContext.layer1 instanceChunks=%d top_scores=%s",
|
||||||
|
len(instanceChunks),
|
||||||
|
[round(float(c.get("_score", 0) or 0), 3) for c in (instanceChunks or [])[:3]],
|
||||||
|
)
|
||||||
if instanceChunks:
|
if instanceChunks:
|
||||||
builder.add(priority=1, label="Relevant Documents", items=instanceChunks, maxChars=4000)
|
builder.add(priority=1, label="Relevant Documents", items=instanceChunks, maxChars=4000)
|
||||||
|
|
||||||
|
|
@ -338,7 +687,7 @@ class KnowledgeService:
|
||||||
queryVector=queryVector,
|
queryVector=queryVector,
|
||||||
workflowId=workflowId,
|
workflowId=workflowId,
|
||||||
limit=10,
|
limit=10,
|
||||||
minScore=0.55,
|
minScore=0.35,
|
||||||
)
|
)
|
||||||
if roundMemories:
|
if roundMemories:
|
||||||
memItems = []
|
memItems = []
|
||||||
|
|
@ -376,7 +725,7 @@ class KnowledgeService:
|
||||||
scope="mandate",
|
scope="mandate",
|
||||||
mandateId=mandateId,
|
mandateId=mandateId,
|
||||||
limit=10,
|
limit=10,
|
||||||
minScore=0.7,
|
minScore=0.35,
|
||||||
isSysAdmin=isSysAdmin,
|
isSysAdmin=isSysAdmin,
|
||||||
)
|
)
|
||||||
if mandateChunks:
|
if mandateChunks:
|
||||||
|
|
@ -392,7 +741,12 @@ class KnowledgeService:
|
||||||
maxChars=500,
|
maxChars=500,
|
||||||
)
|
)
|
||||||
|
|
||||||
return builder.build()
|
_result = builder.build()
|
||||||
|
logger.debug(
|
||||||
|
"buildAgentContext.done totalChars=%d userId=%s",
|
||||||
|
len(_result), userId,
|
||||||
|
)
|
||||||
|
return _result
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Workflow Memory
|
# Workflow Memory
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,334 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Connection-lifecycle consumer bridging OAuth events to ingestion jobs.
|
||||||
|
|
||||||
|
Subscribes to `connection.established` and `connection.revoked` callbacks
|
||||||
|
emitted by the OAuth callbacks / connection management routes and dispatches:
|
||||||
|
|
||||||
|
- `connection.established` -> enqueue a `connection.bootstrap` BackgroundJob
|
||||||
|
that walks the connector and ingests all reachable items via
|
||||||
|
KnowledgeService.requestIngestion (file-like or virtual documents).
|
||||||
|
- `connection.revoked` -> run `KnowledgeService.purgeConnection` synchronously
|
||||||
|
so the knowledge corpus releases the data before the UI confirms the revoke.
|
||||||
|
|
||||||
|
The consumer is registered once at process boot (see `app.py` lifespan).
|
||||||
|
It intentionally does NOT hold a per-user service context; each callback
|
||||||
|
creates whatever context it needs from the UserConnection row itself.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
|
||||||
|
from modules.shared.callbackRegistry import callbackRegistry
|
||||||
|
from modules.serviceCenter.services.serviceBackgroundJobs import (
|
||||||
|
registerJobHandler,
|
||||||
|
startJob,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
BOOTSTRAP_JOB_TYPE = "connection.bootstrap"
|
||||||
|
|
||||||
|
_registered = False
|
||||||
|
|
||||||
|
|
||||||
|
def _onConnectionEstablished(
|
||||||
|
*,
|
||||||
|
connectionId: str,
|
||||||
|
authority: str,
|
||||||
|
userId: Optional[str] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
"""Fire-and-forget bootstrap enqueue for a freshly connected UserConnection."""
|
||||||
|
if not connectionId:
|
||||||
|
logger.warning("connection.established without connectionId; ignoring")
|
||||||
|
return
|
||||||
|
payload: Dict[str, Any] = {
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": (authority or "").lower(),
|
||||||
|
"userId": userId,
|
||||||
|
}
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.queued connectionId=%s authority=%s",
|
||||||
|
connectionId, authority,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.queued",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": authority,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _enqueue() -> None:
|
||||||
|
try:
|
||||||
|
await startJob(
|
||||||
|
BOOTSTRAP_JOB_TYPE,
|
||||||
|
payload,
|
||||||
|
triggeredBy=userId,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(
|
||||||
|
"ingestion.connection.bootstrap.enqueue_failed connectionId=%s error=%s",
|
||||||
|
connectionId, exc, exc_info=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
if loop.is_running():
|
||||||
|
loop.create_task(_enqueue())
|
||||||
|
else:
|
||||||
|
loop.run_until_complete(_enqueue())
|
||||||
|
except RuntimeError:
|
||||||
|
asyncio.run(_enqueue())
|
||||||
|
|
||||||
|
|
||||||
|
def _onConnectionRevoked(
|
||||||
|
*,
|
||||||
|
connectionId: str,
|
||||||
|
authority: Optional[str] = None,
|
||||||
|
userId: Optional[str] = None,
|
||||||
|
reason: Optional[str] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
"""Run the knowledge purge synchronously so UI feedback is authoritative."""
|
||||||
|
if not connectionId:
|
||||||
|
logger.warning("connection.revoked without connectionId; ignoring")
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
# Purge lives on the DB interface to avoid ServiceCenter/user-context
|
||||||
|
# plumbing here; the service method is a thin wrapper on top of this.
|
||||||
|
result = getKnowledgeInterface(None).deleteFileContentIndexByConnectionId(connectionId)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(
|
||||||
|
"ingestion.connection.purged.failed connectionId=%s error=%s",
|
||||||
|
connectionId, exc, exc_info=True,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.purged connectionId=%s authority=%s reason=%s rows=%d chunks=%d",
|
||||||
|
connectionId, authority, reason,
|
||||||
|
result.get("indexRows", 0), result.get("chunks", 0),
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.purged",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": authority,
|
||||||
|
"reason": reason,
|
||||||
|
"indexRows": result.get("indexRows", 0),
|
||||||
|
"chunks": result.get("chunks", 0),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _bootstrapJobHandler(
|
||||||
|
job: Dict[str, Any],
|
||||||
|
progressCb,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Dispatch bootstrap by authority. Each authority runs its own sub-bootstraps."""
|
||||||
|
payload = job.get("payload") or {}
|
||||||
|
connectionId = payload.get("connectionId")
|
||||||
|
authority = (payload.get("authority") or "").lower()
|
||||||
|
if not connectionId:
|
||||||
|
raise ValueError("connection.bootstrap requires payload.connectionId")
|
||||||
|
|
||||||
|
progressCb(5, f"resolving {authority} connection")
|
||||||
|
|
||||||
|
# Defensive consent check: if the connection has since disabled knowledge ingestion
|
||||||
|
# (e.g. user toggled setting after the job was enqueued), skip all walkers.
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
_root = getRootInterface()
|
||||||
|
_conn = _root.getUserConnectionById(connectionId)
|
||||||
|
if _conn and not getattr(_conn, "knowledgeIngestionEnabled", True):
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.skipped — consent disabled connectionId=%s",
|
||||||
|
connectionId,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.skipped",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": authority,
|
||||||
|
"reason": "consent_disabled",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return {"connectionId": connectionId, "authority": authority, "skipped": True, "reason": "consent_disabled"}
|
||||||
|
except Exception as _guardErr:
|
||||||
|
logger.debug("Could not load connection for consent guard: %s", _guardErr)
|
||||||
|
|
||||||
|
def _normalize(res: Any, label: str) -> Dict[str, Any]:
|
||||||
|
if isinstance(res, Exception):
|
||||||
|
logger.error(
|
||||||
|
"ingestion.connection.bootstrap.failed part=%s connectionId=%s error=%s",
|
||||||
|
label, connectionId, res, exc_info=res,
|
||||||
|
)
|
||||||
|
return {"error": str(res)}
|
||||||
|
return res or {}
|
||||||
|
|
||||||
|
if authority == "msft":
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
|
||||||
|
bootstrapSharepoint,
|
||||||
|
)
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncOutlook import (
|
||||||
|
bootstrapOutlook,
|
||||||
|
)
|
||||||
|
|
||||||
|
progressCb(10, "sharepoint + outlook")
|
||||||
|
spResult, olResult = await asyncio.gather(
|
||||||
|
bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb),
|
||||||
|
bootstrapOutlook(connectionId=connectionId, progressCb=progressCb),
|
||||||
|
return_exceptions=True,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": authority,
|
||||||
|
"sharepoint": _normalize(spResult, "sharepoint"),
|
||||||
|
"outlook": _normalize(olResult, "outlook"),
|
||||||
|
}
|
||||||
|
|
||||||
|
if authority == "google":
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGdrive import (
|
||||||
|
bootstrapGdrive,
|
||||||
|
)
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
|
||||||
|
bootstrapGmail,
|
||||||
|
)
|
||||||
|
|
||||||
|
progressCb(10, "drive + gmail")
|
||||||
|
gdResult, gmResult = await asyncio.gather(
|
||||||
|
bootstrapGdrive(connectionId=connectionId, progressCb=progressCb),
|
||||||
|
bootstrapGmail(connectionId=connectionId, progressCb=progressCb),
|
||||||
|
return_exceptions=True,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": authority,
|
||||||
|
"drive": _normalize(gdResult, "gdrive"),
|
||||||
|
"gmail": _normalize(gmResult, "gmail"),
|
||||||
|
}
|
||||||
|
|
||||||
|
if authority == "clickup":
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup import (
|
||||||
|
bootstrapClickup,
|
||||||
|
)
|
||||||
|
|
||||||
|
progressCb(10, "clickup tasks")
|
||||||
|
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb)
|
||||||
|
return {
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": authority,
|
||||||
|
"clickup": _normalize(cuResult, "clickup"),
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.skipped reason=unsupported_authority authority=%s connectionId=%s",
|
||||||
|
authority, connectionId,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.skipped",
|
||||||
|
"authority": authority,
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"reason": "unsupported_authority",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": authority,
|
||||||
|
"skipped": True,
|
||||||
|
"reason": "unsupported_authority",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _scheduledDailyResync() -> None:
|
||||||
|
"""Enqueue a connection.bootstrap job for every active knowledge connection.
|
||||||
|
|
||||||
|
Runs once per day (default 2 AM Europe/Zurich). Each job re-walks the
|
||||||
|
connector and hands new / changed items to KnowledgeService.requestIngestion.
|
||||||
|
Unchanged items are deduplicated by content-hash and skipped automatically.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
rootInterface = getRootInterface()
|
||||||
|
connections = rootInterface.getActiveKnowledgeConnections()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("knowledge.daily_resync: could not load connections: %s", exc, exc_info=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not connections:
|
||||||
|
logger.info("knowledge.daily_resync: no active knowledge connections — nothing to do")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"knowledge.daily_resync: enqueuing bootstrap for %d connection(s)",
|
||||||
|
len(connections),
|
||||||
|
extra={"event": "knowledge.daily_resync.started", "count": len(connections)},
|
||||||
|
)
|
||||||
|
|
||||||
|
enqueued = 0
|
||||||
|
skipped = 0
|
||||||
|
for conn in connections:
|
||||||
|
connectionId = str(conn.id)
|
||||||
|
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority)
|
||||||
|
userId = str(conn.userId)
|
||||||
|
payload: Dict[str, Any] = {
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": authority.lower(),
|
||||||
|
"userId": userId,
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
await startJob(
|
||||||
|
BOOTSTRAP_JOB_TYPE,
|
||||||
|
payload,
|
||||||
|
triggeredBy="scheduler.daily_resync",
|
||||||
|
)
|
||||||
|
enqueued += 1
|
||||||
|
logger.debug(
|
||||||
|
"knowledge.daily_resync: queued connectionId=%s authority=%s",
|
||||||
|
connectionId, authority,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
skipped += 1
|
||||||
|
logger.error(
|
||||||
|
"knowledge.daily_resync: failed to enqueue connectionId=%s: %s",
|
||||||
|
connectionId, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"knowledge.daily_resync: done — enqueued=%d skipped=%d",
|
||||||
|
enqueued, skipped,
|
||||||
|
extra={"event": "knowledge.daily_resync.done", "enqueued": enqueued, "skipped": skipped},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def registerDailyResyncScheduler(*, hour: int = 2, minute: int = 0) -> None:
|
||||||
|
"""Register the daily knowledge re-sync cron job. Idempotent.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hour: Hour of day to run (0–23, default 2 → 2 AM Europe/Zurich).
|
||||||
|
minute: Minute within the hour (default 0).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from modules.shared.eventManagement import eventManager
|
||||||
|
eventManager.registerCron(
|
||||||
|
jobId="knowledge.daily_resync",
|
||||||
|
func=_scheduledDailyResync,
|
||||||
|
cronKwargs={"hour": str(hour), "minute": str(minute)},
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"knowledge.daily_resync scheduler registered (daily %02d:%02d Europe/Zurich)",
|
||||||
|
hour, minute,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("knowledge.daily_resync scheduler registration failed (non-critical): %s", exc)
|
||||||
|
|
||||||
|
|
||||||
|
def registerKnowledgeIngestionConsumer() -> None:
|
||||||
|
"""Register callback subscribers + background job handler. Idempotent."""
|
||||||
|
global _registered
|
||||||
|
if _registered:
|
||||||
|
return
|
||||||
|
callbackRegistry.register("connection.established", _onConnectionEstablished)
|
||||||
|
callbackRegistry.register("connection.revoked", _onConnectionRevoked)
|
||||||
|
registerJobHandler(BOOTSTRAP_JOB_TYPE, _bootstrapJobHandler)
|
||||||
|
registerDailyResyncScheduler()
|
||||||
|
_registered = True
|
||||||
|
logger.info("KnowledgeIngestionConsumer registered (established/revoked + %s handler + daily resync)", BOOTSTRAP_JOB_TYPE)
|
||||||
|
|
@ -0,0 +1,101 @@
|
||||||
|
"""Per-connection knowledge ingestion preference helpers.
|
||||||
|
|
||||||
|
Walkers call `loadConnectionPrefs(connectionId)` once at bootstrap start and
|
||||||
|
receive a `ConnectionIngestionPrefs` dataclass they can pass down into their
|
||||||
|
inner loops. All fields have safe defaults so walkers stay backward-compatible
|
||||||
|
with connections that predate the §2.6 preference schema (knowledgePreferences
|
||||||
|
is None).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_DEFAULT_MAX_AGE_DAYS = 90
|
||||||
|
_DEFAULT_MAIL_DEPTH = "full"
|
||||||
|
_DEFAULT_CLICKUP_SCOPE = "title_description"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ConnectionIngestionPrefs:
|
||||||
|
"""Parsed per-connection preferences for knowledge ingestion walkers."""
|
||||||
|
|
||||||
|
# PII
|
||||||
|
neutralizeBeforeEmbed: bool = False
|
||||||
|
|
||||||
|
# Mail (Outlook + Gmail)
|
||||||
|
mailContentDepth: str = _DEFAULT_MAIL_DEPTH # "metadata" | "snippet" | "full"
|
||||||
|
mailIndexAttachments: bool = False
|
||||||
|
|
||||||
|
# Files (Drive / SharePoint / OneDrive)
|
||||||
|
filesIndexBinaries: bool = True
|
||||||
|
mimeAllowlist: List[str] = field(default_factory=list) # empty = all allowed
|
||||||
|
|
||||||
|
# ClickUp
|
||||||
|
clickupScope: str = _DEFAULT_CLICKUP_SCOPE # "titles" | "title_description" | "with_comments"
|
||||||
|
clickupIndexAttachments: bool = False
|
||||||
|
|
||||||
|
# Per-authority surface toggles (default everything on)
|
||||||
|
gmailEnabled: bool = True
|
||||||
|
driveEnabled: bool = True
|
||||||
|
sharepointEnabled: bool = True
|
||||||
|
outlookEnabled: bool = True
|
||||||
|
|
||||||
|
# Time window
|
||||||
|
maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS # 0 = no limit
|
||||||
|
|
||||||
|
|
||||||
|
def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs:
|
||||||
|
"""Load and parse per-connection preferences from the database.
|
||||||
|
|
||||||
|
Returns safe defaults for any missing or unparseable values so walkers
|
||||||
|
never fail due to missing preference data.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
root = getRootInterface()
|
||||||
|
conn = root.getUserConnectionById(connectionId)
|
||||||
|
if not conn:
|
||||||
|
logger.debug("loadConnectionPrefs: connection %s not found, using defaults", connectionId)
|
||||||
|
return ConnectionIngestionPrefs()
|
||||||
|
|
||||||
|
raw: Optional[Dict[str, Any]] = getattr(conn, "knowledgePreferences", None)
|
||||||
|
if not raw or not isinstance(raw, dict):
|
||||||
|
return ConnectionIngestionPrefs()
|
||||||
|
|
||||||
|
def _bool(key: str, default: bool) -> bool:
|
||||||
|
v = raw.get(key)
|
||||||
|
return bool(v) if isinstance(v, bool) else default
|
||||||
|
|
||||||
|
def _str(key: str, allowed: List[str], default: str) -> str:
|
||||||
|
v = raw.get(key)
|
||||||
|
return v if v in allowed else default
|
||||||
|
|
||||||
|
def _int(key: str, default: int) -> int:
|
||||||
|
v = raw.get(key)
|
||||||
|
return int(v) if isinstance(v, int) else default
|
||||||
|
|
||||||
|
surface = raw.get("surfaceToggles") or {}
|
||||||
|
google_surf = surface.get("google") or {}
|
||||||
|
msft_surf = surface.get("msft") or {}
|
||||||
|
|
||||||
|
return ConnectionIngestionPrefs(
|
||||||
|
neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False),
|
||||||
|
mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH),
|
||||||
|
mailIndexAttachments=_bool("mailIndexAttachments", False),
|
||||||
|
filesIndexBinaries=_bool("filesIndexBinaries", True),
|
||||||
|
mimeAllowlist=list(raw.get("mimeAllowlist") or []),
|
||||||
|
clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE),
|
||||||
|
clickupIndexAttachments=_bool("clickupIndexAttachments", False),
|
||||||
|
gmailEnabled=bool(google_surf.get("gmail", True)),
|
||||||
|
driveEnabled=bool(google_surf.get("drive", True)),
|
||||||
|
sharepointEnabled=bool(msft_surf.get("sharepoint", True)),
|
||||||
|
outlookEnabled=bool(msft_surf.get("outlook", True)),
|
||||||
|
maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS),
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("loadConnectionPrefs failed for %s, using defaults: %s", connectionId, exc)
|
||||||
|
return ConnectionIngestionPrefs()
|
||||||
|
|
@ -0,0 +1,512 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""ClickUp bootstrap for the unified knowledge ingestion lane.
|
||||||
|
|
||||||
|
ClickUp tasks are ingested as *virtual documents* — we never download file
|
||||||
|
bytes. Each task becomes a `sourceKind="clickup_task"` IngestionJob whose
|
||||||
|
`contentObjects` carry a summary header (name + status + metadata) and the
|
||||||
|
task description / text content so retrieval finds them without a live API
|
||||||
|
call.
|
||||||
|
|
||||||
|
Hierarchy traversal: workspace (team) → spaces → folders / folderless lists →
|
||||||
|
tasks. We cap the fan-out with `maxWorkspaces` / `maxListsPerWorkspace` /
|
||||||
|
`maxTasks` and skip tasks older than `maxAgeDays` (default 180 d).
|
||||||
|
|
||||||
|
Idempotency: `date_updated` from the ClickUp task payload is a millisecond
|
||||||
|
timestamp and strictly monotonic per revision — used as `contentVersion`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
MAX_TASKS_DEFAULT = 500
|
||||||
|
MAX_WORKSPACES_DEFAULT = 3
|
||||||
|
MAX_LISTS_PER_WORKSPACE_DEFAULT = 20
|
||||||
|
MAX_DESCRIPTION_CHARS_DEFAULT = 8000
|
||||||
|
MAX_AGE_DAYS_DEFAULT = 180
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ClickupBootstrapLimits:
|
||||||
|
maxTasks: int = MAX_TASKS_DEFAULT
|
||||||
|
maxWorkspaces: int = MAX_WORKSPACES_DEFAULT
|
||||||
|
maxListsPerWorkspace: int = MAX_LISTS_PER_WORKSPACE_DEFAULT
|
||||||
|
maxDescriptionChars: int = MAX_DESCRIPTION_CHARS_DEFAULT
|
||||||
|
# Only ingest tasks updated within the last N days. None disables filter.
|
||||||
|
maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
|
||||||
|
# Include closed/archived tasks if they still meet the recency filter.
|
||||||
|
# ClickUp `closed` tasks often carry the most useful RAG context
|
||||||
|
# ("why was this shipped the way it was?").
|
||||||
|
includeClosed: bool = True
|
||||||
|
# Pass-through to IngestionJob.neutralize
|
||||||
|
neutralize: bool = False
|
||||||
|
# Content scope: "titles" | "title_description" | "with_comments"
|
||||||
|
clickupScope: str = "title_description"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ClickupBootstrapResult:
|
||||||
|
connectionId: str
|
||||||
|
indexed: int = 0
|
||||||
|
skippedDuplicate: int = 0
|
||||||
|
skippedPolicy: int = 0
|
||||||
|
failed: int = 0
|
||||||
|
workspaces: int = 0
|
||||||
|
lists: int = 0
|
||||||
|
errors: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def _syntheticTaskId(connectionId: str, taskId: str) -> str:
|
||||||
|
token = hashlib.sha256(f"{connectionId}:{taskId}".encode("utf-8")).hexdigest()[:16]
|
||||||
|
return f"cu:{connectionId[:8]}:{token}"
|
||||||
|
|
||||||
|
|
||||||
|
def _truncate(value: Any, limit: int) -> str:
|
||||||
|
text = str(value or "").strip()
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
if len(text) <= limit:
|
||||||
|
return text
|
||||||
|
return text[:limit].rstrip() + "\n[truncated]"
|
||||||
|
|
||||||
|
|
||||||
|
def _isRecent(dateUpdatedMs: Any, maxAgeDays: Optional[int]) -> bool:
|
||||||
|
if not maxAgeDays:
|
||||||
|
return True
|
||||||
|
if not dateUpdatedMs:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
ts = datetime.fromtimestamp(int(dateUpdatedMs) / 1000.0, tz=timezone.utc)
|
||||||
|
except Exception:
|
||||||
|
return True
|
||||||
|
cutoff = datetime.now(timezone.utc) - timedelta(days=maxAgeDays)
|
||||||
|
return ts >= cutoff
|
||||||
|
|
||||||
|
|
||||||
|
def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -> List[Dict[str, Any]]:
|
||||||
|
"""Header (name/status/metadata) + optional description + text_content.
|
||||||
|
|
||||||
|
`limits.clickupScope` controls how much is embedded:
|
||||||
|
- "titles": task name + status metadata only
|
||||||
|
- "title_description": header + description / text_content (default)
|
||||||
|
- "with_comments": header + description + text_content
|
||||||
|
(comments themselves are not yet fetched in v1)
|
||||||
|
"""
|
||||||
|
name = task.get("name") or f"Task {task.get('id', '')}"
|
||||||
|
status = ((task.get("status") or {}).get("status")) or ""
|
||||||
|
assignees = ", ".join(
|
||||||
|
filter(None, [
|
||||||
|
(a.get("username") or a.get("email") or "")
|
||||||
|
for a in (task.get("assignees") or [])
|
||||||
|
])
|
||||||
|
)
|
||||||
|
tags = ", ".join(filter(None, [t.get("name", "") for t in (task.get("tags") or [])]))
|
||||||
|
listInfo = task.get("list") or {}
|
||||||
|
folderInfo = task.get("folder") or {}
|
||||||
|
spaceInfo = task.get("space") or {}
|
||||||
|
dueMs = task.get("due_date")
|
||||||
|
dueIso = ""
|
||||||
|
if dueMs:
|
||||||
|
try:
|
||||||
|
dueIso = datetime.fromtimestamp(int(dueMs) / 1000.0, tz=timezone.utc).strftime("%Y-%m-%d")
|
||||||
|
except Exception:
|
||||||
|
dueIso = ""
|
||||||
|
|
||||||
|
headerLines = [
|
||||||
|
f"Task: {name}",
|
||||||
|
f"Status: {status}" if status else "",
|
||||||
|
f"List: {listInfo.get('name', '')}" if listInfo else "",
|
||||||
|
f"Folder: {folderInfo.get('name', '')}" if folderInfo else "",
|
||||||
|
f"Space: {spaceInfo.get('name', '')}" if spaceInfo else "",
|
||||||
|
f"Assignees: {assignees}" if assignees else "",
|
||||||
|
f"Tags: {tags}" if tags else "",
|
||||||
|
f"Due: {dueIso}" if dueIso else "",
|
||||||
|
f"Url: {task.get('url', '')}" if task.get("url") else "",
|
||||||
|
]
|
||||||
|
header = "\n".join(line for line in headerLines if line)
|
||||||
|
|
||||||
|
parts: List[Dict[str, Any]] = [{
|
||||||
|
"contentObjectId": "header",
|
||||||
|
"contentType": "text",
|
||||||
|
"data": header,
|
||||||
|
"contextRef": {"part": "header"},
|
||||||
|
}]
|
||||||
|
|
||||||
|
scope = getattr(limits, "clickupScope", "title_description")
|
||||||
|
if scope in ("title_description", "with_comments"):
|
||||||
|
description = _truncate(task.get("description"), limits.maxDescriptionChars)
|
||||||
|
if description:
|
||||||
|
parts.append({
|
||||||
|
"contentObjectId": "description",
|
||||||
|
"contentType": "text",
|
||||||
|
"data": description,
|
||||||
|
"contextRef": {"part": "description"},
|
||||||
|
})
|
||||||
|
# text_content is ClickUp's rendered-markdown version; include if it adds
|
||||||
|
# something beyond the plain description (common for bullet lists, checklists).
|
||||||
|
textContent = _truncate(task.get("text_content"), limits.maxDescriptionChars)
|
||||||
|
if textContent and textContent != description:
|
||||||
|
parts.append({
|
||||||
|
"contentObjectId": "text_content",
|
||||||
|
"contentType": "text",
|
||||||
|
"data": textContent,
|
||||||
|
"contextRef": {"part": "text_content"},
|
||||||
|
})
|
||||||
|
return parts
|
||||||
|
|
||||||
|
|
||||||
|
async def bootstrapClickup(
|
||||||
|
connectionId: str,
|
||||||
|
*,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||||
|
adapter: Any = None,
|
||||||
|
connection: Any = None,
|
||||||
|
knowledgeService: Any = None,
|
||||||
|
limits: Optional[ClickupBootstrapLimits] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Walk workspaces → lists → tasks and ingest each task as a virtual doc."""
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||||
|
prefs = loadConnectionPrefs(connectionId)
|
||||||
|
|
||||||
|
if not limits:
|
||||||
|
limits = ClickupBootstrapLimits(
|
||||||
|
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
||||||
|
neutralize=prefs.neutralizeBeforeEmbed,
|
||||||
|
clickupScope=prefs.clickupScope,
|
||||||
|
)
|
||||||
|
|
||||||
|
startMs = time.time()
|
||||||
|
result = ClickupBootstrapResult(connectionId=connectionId)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.started part=clickup connectionId=%s",
|
||||||
|
connectionId,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.started",
|
||||||
|
"part": "clickup",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if adapter is None or knowledgeService is None or connection is None:
|
||||||
|
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
|
||||||
|
|
||||||
|
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||||
|
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||||
|
|
||||||
|
svc = getattr(adapter, "_svc", None)
|
||||||
|
if svc is None:
|
||||||
|
result.errors.append("adapter missing _svc instance")
|
||||||
|
return _finalizeResult(connectionId, result, startMs)
|
||||||
|
|
||||||
|
try:
|
||||||
|
teamsResp = await svc.getAuthorizedTeams()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("clickup team discovery failed for %s: %s", connectionId, exc, exc_info=True)
|
||||||
|
result.errors.append(f"teams: {exc}")
|
||||||
|
return _finalizeResult(connectionId, result, startMs)
|
||||||
|
|
||||||
|
teams = (teamsResp or {}).get("teams") or []
|
||||||
|
for team in teams[: limits.maxWorkspaces]:
|
||||||
|
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||||
|
break
|
||||||
|
teamId = str(team.get("id", "") or "")
|
||||||
|
if not teamId:
|
||||||
|
continue
|
||||||
|
result.workspaces += 1
|
||||||
|
try:
|
||||||
|
await _walkTeam(
|
||||||
|
svc=svc,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
team=team,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
|
||||||
|
result.errors.append(f"team({teamId}): {exc}")
|
||||||
|
|
||||||
|
return _finalizeResult(connectionId, result, startMs)
|
||||||
|
|
||||||
|
|
||||||
|
async def _resolveDependencies(connectionId: str):
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.auth import TokenManager
|
||||||
|
from modules.connectors.providerClickup.connectorClickup import ClickupConnector
|
||||||
|
from modules.serviceCenter import getService
|
||||||
|
from modules.serviceCenter.context import ServiceCenterContext
|
||||||
|
from modules.security.rootAccess import getRootUser
|
||||||
|
|
||||||
|
rootInterface = getRootInterface()
|
||||||
|
connection = rootInterface.getUserConnectionById(connectionId)
|
||||||
|
if connection is None:
|
||||||
|
raise ValueError(f"UserConnection not found: {connectionId}")
|
||||||
|
|
||||||
|
token = TokenManager().getFreshToken(connectionId)
|
||||||
|
if not token or not token.tokenAccess:
|
||||||
|
raise ValueError(f"No valid token for connection {connectionId}")
|
||||||
|
|
||||||
|
provider = ClickupConnector(connection, token.tokenAccess)
|
||||||
|
adapter = provider.getServiceAdapter("clickup")
|
||||||
|
|
||||||
|
rootUser = getRootUser()
|
||||||
|
ctx = ServiceCenterContext(
|
||||||
|
user=rootUser,
|
||||||
|
mandate_id=str(getattr(connection, "mandateId", "") or ""),
|
||||||
|
)
|
||||||
|
knowledgeService = getService("knowledge", ctx)
|
||||||
|
return adapter, connection, knowledgeService
|
||||||
|
|
||||||
|
|
||||||
|
async def _walkTeam(
|
||||||
|
*,
|
||||||
|
svc,
|
||||||
|
knowledgeService,
|
||||||
|
connectionId: str,
|
||||||
|
mandateId: str,
|
||||||
|
userId: str,
|
||||||
|
team: Dict[str, Any],
|
||||||
|
limits: ClickupBootstrapLimits,
|
||||||
|
result: ClickupBootstrapResult,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||||
|
) -> None:
|
||||||
|
teamId = str(team.get("id", "") or "")
|
||||||
|
spacesResp = await svc.getSpaces(teamId)
|
||||||
|
spaces = (spacesResp or {}).get("spaces") or []
|
||||||
|
|
||||||
|
listsCollected: List[Dict[str, Any]] = []
|
||||||
|
for space in spaces:
|
||||||
|
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
||||||
|
break
|
||||||
|
spaceId = str(space.get("id", "") or "")
|
||||||
|
if not spaceId:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Folderless lists directly under the space
|
||||||
|
folderless = await svc.getFolderlessLists(spaceId)
|
||||||
|
for lst in (folderless or {}).get("lists") or []:
|
||||||
|
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
||||||
|
break
|
||||||
|
listsCollected.append({**lst, "_space": space})
|
||||||
|
|
||||||
|
# Lists inside folders
|
||||||
|
foldersResp = await svc.getFolders(spaceId)
|
||||||
|
for folder in (foldersResp or {}).get("folders") or []:
|
||||||
|
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
||||||
|
break
|
||||||
|
folderId = str(folder.get("id", "") or "")
|
||||||
|
if not folderId:
|
||||||
|
continue
|
||||||
|
folderLists = await svc.getListsInFolder(folderId)
|
||||||
|
for lst in (folderLists or {}).get("lists") or []:
|
||||||
|
if len(listsCollected) >= limits.maxListsPerWorkspace:
|
||||||
|
break
|
||||||
|
listsCollected.append({**lst, "_space": space, "_folder": folder})
|
||||||
|
|
||||||
|
for lst in listsCollected:
|
||||||
|
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||||
|
return
|
||||||
|
result.lists += 1
|
||||||
|
await _walkList(
|
||||||
|
svc=svc,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
teamId=teamId,
|
||||||
|
lst=lst,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _walkList(
|
||||||
|
*,
|
||||||
|
svc,
|
||||||
|
knowledgeService,
|
||||||
|
connectionId: str,
|
||||||
|
mandateId: str,
|
||||||
|
userId: str,
|
||||||
|
teamId: str,
|
||||||
|
lst: Dict[str, Any],
|
||||||
|
limits: ClickupBootstrapLimits,
|
||||||
|
result: ClickupBootstrapResult,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||||
|
) -> None:
|
||||||
|
listId = str(lst.get("id", "") or "")
|
||||||
|
if not listId:
|
||||||
|
return
|
||||||
|
page = 0
|
||||||
|
while result.indexed + result.skippedDuplicate < limits.maxTasks:
|
||||||
|
resp = await svc.getTasksInList(
|
||||||
|
listId,
|
||||||
|
page=page,
|
||||||
|
include_closed=limits.includeClosed,
|
||||||
|
subtasks=True,
|
||||||
|
)
|
||||||
|
if isinstance(resp, dict) and resp.get("error"):
|
||||||
|
logger.warning("clickup tasks list=%s page=%d error: %s", listId, page, resp.get("error"))
|
||||||
|
result.errors.append(f"list({listId}): {resp.get('error')}")
|
||||||
|
return
|
||||||
|
tasks = (resp or {}).get("tasks") or []
|
||||||
|
if not tasks:
|
||||||
|
return
|
||||||
|
|
||||||
|
for task in tasks:
|
||||||
|
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
|
||||||
|
return
|
||||||
|
if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
continue
|
||||||
|
# Inject the list/folder/space metadata we already loaded.
|
||||||
|
task["list"] = task.get("list") or {"id": listId, "name": lst.get("name")}
|
||||||
|
task["folder"] = task.get("folder") or lst.get("_folder") or {}
|
||||||
|
task["space"] = task.get("space") or lst.get("_space") or {}
|
||||||
|
await _ingestTask(
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
teamId=teamId,
|
||||||
|
task=task,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(tasks) < 100: # ClickUp page-size hint: fewer than 100 => last page
|
||||||
|
return
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
|
||||||
|
async def _ingestTask(
|
||||||
|
*,
|
||||||
|
knowledgeService,
|
||||||
|
connectionId: str,
|
||||||
|
mandateId: str,
|
||||||
|
userId: str,
|
||||||
|
teamId: str,
|
||||||
|
task: Dict[str, Any],
|
||||||
|
limits: ClickupBootstrapLimits,
|
||||||
|
result: ClickupBootstrapResult,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||||
|
) -> None:
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
||||||
|
taskId = str(task.get("id", "") or "")
|
||||||
|
if not taskId:
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
return
|
||||||
|
revision = str(task.get("date_updated") or task.get("date_created") or "")
|
||||||
|
name = task.get("name") or f"Task {taskId}"
|
||||||
|
syntheticId = _syntheticTaskId(connectionId, taskId)
|
||||||
|
fileName = f"{name[:80].strip() or taskId}.task.json"
|
||||||
|
|
||||||
|
contentObjects = _buildContentObjects(task, limits)
|
||||||
|
|
||||||
|
try:
|
||||||
|
handle = await knowledgeService.requestIngestion(
|
||||||
|
IngestionJob(
|
||||||
|
sourceKind="clickup_task",
|
||||||
|
sourceId=syntheticId,
|
||||||
|
fileName=fileName,
|
||||||
|
mimeType="application/vnd.clickup.task+json",
|
||||||
|
userId=userId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
contentObjects=contentObjects,
|
||||||
|
contentVersion=revision or None,
|
||||||
|
neutralize=limits.neutralize,
|
||||||
|
provenance={
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": "clickup",
|
||||||
|
"service": "clickup",
|
||||||
|
"externalItemId": taskId,
|
||||||
|
"teamId": teamId,
|
||||||
|
"listId": ((task.get("list") or {}).get("id")),
|
||||||
|
"spaceId": ((task.get("space") or {}).get("id")),
|
||||||
|
"url": task.get("url"),
|
||||||
|
"status": ((task.get("status") or {}).get("status")),
|
||||||
|
"tier": limits.clickupScope,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("clickup ingestion %s failed: %s", taskId, exc, exc_info=True)
|
||||||
|
result.failed += 1
|
||||||
|
result.errors.append(f"ingest({taskId}): {exc}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if handle.status == "duplicate":
|
||||||
|
result.skippedDuplicate += 1
|
||||||
|
elif handle.status == "indexed":
|
||||||
|
result.indexed += 1
|
||||||
|
else:
|
||||||
|
result.failed += 1
|
||||||
|
|
||||||
|
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||||
|
processed = result.indexed + result.skippedDuplicate
|
||||||
|
try:
|
||||||
|
progressCb(
|
||||||
|
min(90, 10 + int(80 * processed / max(1, limits.maxTasks))),
|
||||||
|
f"clickup processed={processed}",
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.progress part=clickup processed=%d skippedDup=%d failed=%d",
|
||||||
|
processed, result.skippedDuplicate, result.failed,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.progress",
|
||||||
|
"part": "clickup",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"processed": processed,
|
||||||
|
"skippedDup": result.skippedDuplicate,
|
||||||
|
"failed": result.failed,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||||
|
durationMs = int((time.time() - startMs) * 1000)
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.done part=clickup connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d workspaces=%d lists=%d durationMs=%d",
|
||||||
|
connectionId,
|
||||||
|
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
||||||
|
result.failed, result.workspaces, result.lists, durationMs,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.done",
|
||||||
|
"part": "clickup",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"indexed": result.indexed,
|
||||||
|
"skippedDup": result.skippedDuplicate,
|
||||||
|
"skippedPolicy": result.skippedPolicy,
|
||||||
|
"failed": result.failed,
|
||||||
|
"workspaces": result.workspaces,
|
||||||
|
"lists": result.lists,
|
||||||
|
"durationMs": durationMs,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"connectionId": result.connectionId,
|
||||||
|
"indexed": result.indexed,
|
||||||
|
"skippedDuplicate": result.skippedDuplicate,
|
||||||
|
"skippedPolicy": result.skippedPolicy,
|
||||||
|
"failed": result.failed,
|
||||||
|
"workspaces": result.workspaces,
|
||||||
|
"lists": result.lists,
|
||||||
|
"durationMs": durationMs,
|
||||||
|
"errors": result.errors[:20],
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,443 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Google Drive bootstrap for the unified knowledge ingestion lane.
|
||||||
|
|
||||||
|
Mirrors the SharePoint pilot (see subConnectorSyncSharepoint.py). Walks the
|
||||||
|
user's *My Drive* tree from the virtual `root` folder, downloads each
|
||||||
|
file-like item via `DriveAdapter.download` (which handles native Google docs
|
||||||
|
via export), runs the standard extraction pipeline and routes results through
|
||||||
|
`KnowledgeService.requestIngestion` with `sourceKind="gdrive_item"` and
|
||||||
|
`contentVersion = modifiedTime` (monotonic per-revision).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
|
|
||||||
|
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
MAX_ITEMS_DEFAULT = 500
|
||||||
|
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
|
||||||
|
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
|
||||||
|
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
||||||
|
MAX_DEPTH_DEFAULT = 4
|
||||||
|
MAX_AGE_DAYS_DEFAULT = 365
|
||||||
|
|
||||||
|
# Google Drive uses virtual mime-types for folders and non-downloadable assets.
|
||||||
|
FOLDER_MIME = "application/vnd.google-apps.folder"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GdriveBootstrapLimits:
|
||||||
|
maxItems: int = MAX_ITEMS_DEFAULT
|
||||||
|
maxBytes: int = MAX_BYTES_DEFAULT
|
||||||
|
maxFileSize: int = MAX_FILE_SIZE_DEFAULT
|
||||||
|
skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
|
||||||
|
maxDepth: int = MAX_DEPTH_DEFAULT
|
||||||
|
# Only ingest files modified within the last N days. None disables filter.
|
||||||
|
maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
|
||||||
|
# Pass-through to IngestionJob.neutralize
|
||||||
|
neutralize: bool = False
|
||||||
|
# Whether to skip binary/non-text files
|
||||||
|
filesIndexBinaries: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GdriveBootstrapResult:
|
||||||
|
connectionId: str
|
||||||
|
indexed: int = 0
|
||||||
|
skippedDuplicate: int = 0
|
||||||
|
skippedPolicy: int = 0
|
||||||
|
failed: int = 0
|
||||||
|
bytesProcessed: int = 0
|
||||||
|
errors: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
||||||
|
token = hashlib.sha256(f"{connectionId}:{externalItemId}".encode("utf-8")).hexdigest()[:16]
|
||||||
|
return f"gd:{connectionId[:8]}:{token}"
|
||||||
|
|
||||||
|
|
||||||
|
def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
|
||||||
|
parts = getattr(extracted, "parts", None) or []
|
||||||
|
out: List[Dict[str, Any]] = []
|
||||||
|
for part in parts:
|
||||||
|
data = getattr(part, "data", None) or ""
|
||||||
|
if not data or not str(data).strip():
|
||||||
|
continue
|
||||||
|
typeGroup = getattr(part, "typeGroup", "text") or "text"
|
||||||
|
contentType = "text"
|
||||||
|
if typeGroup == "image":
|
||||||
|
contentType = "image"
|
||||||
|
elif typeGroup in ("binary", "container"):
|
||||||
|
contentType = "other"
|
||||||
|
out.append({
|
||||||
|
"contentObjectId": getattr(part, "id", ""),
|
||||||
|
"contentType": contentType,
|
||||||
|
"data": data,
|
||||||
|
"contextRef": {
|
||||||
|
"containerPath": fileName,
|
||||||
|
"location": getattr(part, "label", None) or "file",
|
||||||
|
**(getattr(part, "metadata", None) or {}),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
|
||||||
|
if not maxAgeDays:
|
||||||
|
return True
|
||||||
|
if not modifiedIso:
|
||||||
|
# No timestamp -> be permissive (Drive native docs sometimes omit it on export).
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
# Google returns RFC 3339 with `Z` or offset; python 3.11+ parses both.
|
||||||
|
ts = datetime.fromisoformat(modifiedIso.replace("Z", "+00:00"))
|
||||||
|
except Exception:
|
||||||
|
return True
|
||||||
|
cutoff = datetime.now(timezone.utc) - timedelta(days=maxAgeDays)
|
||||||
|
if ts.tzinfo is None:
|
||||||
|
ts = ts.replace(tzinfo=timezone.utc)
|
||||||
|
return ts >= cutoff
|
||||||
|
|
||||||
|
|
||||||
|
async def bootstrapGdrive(
|
||||||
|
connectionId: str,
|
||||||
|
*,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||||
|
adapter: Any = None,
|
||||||
|
connection: Any = None,
|
||||||
|
knowledgeService: Any = None,
|
||||||
|
limits: Optional[GdriveBootstrapLimits] = None,
|
||||||
|
runExtractionFn: Optional[Callable[..., Any]] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Walk My Drive starting from the virtual root folder."""
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||||
|
prefs = loadConnectionPrefs(connectionId)
|
||||||
|
|
||||||
|
if not limits:
|
||||||
|
limits = GdriveBootstrapLimits(
|
||||||
|
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
||||||
|
neutralize=prefs.neutralizeBeforeEmbed,
|
||||||
|
filesIndexBinaries=prefs.filesIndexBinaries,
|
||||||
|
)
|
||||||
|
|
||||||
|
startMs = time.time()
|
||||||
|
result = GdriveBootstrapResult(connectionId=connectionId)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.started part=gdrive connectionId=%s",
|
||||||
|
connectionId,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.started",
|
||||||
|
"part": "gdrive",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if adapter is None or knowledgeService is None or connection is None:
|
||||||
|
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
|
||||||
|
if runExtractionFn is None:
|
||||||
|
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||||
|
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
|
||||||
|
ExtractorRegistry, ChunkerRegistry,
|
||||||
|
)
|
||||||
|
extractorRegistry = ExtractorRegistry()
|
||||||
|
chunkerRegistry = ChunkerRegistry()
|
||||||
|
|
||||||
|
def runExtractionFn(bytesData, name, mime, options): # type: ignore[no-redef]
|
||||||
|
return runExtraction(extractorRegistry, chunkerRegistry, bytesData, name, mime, options)
|
||||||
|
|
||||||
|
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||||
|
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
await _walkFolder(
|
||||||
|
adapter=adapter,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
runExtractionFn=runExtractionFn,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
folderPath="/", # DriveAdapter.browse maps "" / "/" -> "root"
|
||||||
|
depth=0,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("gdrive walk failed for %s: %s", connectionId, exc, exc_info=True)
|
||||||
|
result.errors.append(f"walk: {exc}")
|
||||||
|
|
||||||
|
return _finalizeResult(connectionId, result, startMs)
|
||||||
|
|
||||||
|
|
||||||
|
async def _resolveDependencies(connectionId: str):
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.auth import TokenManager
|
||||||
|
from modules.connectors.providerGoogle.connectorGoogle import GoogleConnector
|
||||||
|
from modules.serviceCenter import getService
|
||||||
|
from modules.serviceCenter.context import ServiceCenterContext
|
||||||
|
from modules.security.rootAccess import getRootUser
|
||||||
|
|
||||||
|
rootInterface = getRootInterface()
|
||||||
|
connection = rootInterface.getUserConnectionById(connectionId)
|
||||||
|
if connection is None:
|
||||||
|
raise ValueError(f"UserConnection not found: {connectionId}")
|
||||||
|
|
||||||
|
token = TokenManager().getFreshToken(connectionId)
|
||||||
|
if not token or not token.tokenAccess:
|
||||||
|
raise ValueError(f"No valid token for connection {connectionId}")
|
||||||
|
|
||||||
|
provider = GoogleConnector(connection, token.tokenAccess)
|
||||||
|
adapter = provider.getServiceAdapter("drive")
|
||||||
|
|
||||||
|
rootUser = getRootUser()
|
||||||
|
ctx = ServiceCenterContext(
|
||||||
|
user=rootUser,
|
||||||
|
mandate_id=str(getattr(connection, "mandateId", "") or ""),
|
||||||
|
)
|
||||||
|
knowledgeService = getService("knowledge", ctx)
|
||||||
|
return adapter, connection, knowledgeService
|
||||||
|
|
||||||
|
|
||||||
|
async def _walkFolder(
|
||||||
|
*,
|
||||||
|
adapter,
|
||||||
|
knowledgeService,
|
||||||
|
runExtractionFn,
|
||||||
|
connectionId: str,
|
||||||
|
mandateId: str,
|
||||||
|
userId: str,
|
||||||
|
folderPath: str,
|
||||||
|
depth: int,
|
||||||
|
limits: GdriveBootstrapLimits,
|
||||||
|
result: GdriveBootstrapResult,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||||
|
) -> None:
|
||||||
|
if depth > limits.maxDepth:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
entries = await adapter.browse(folderPath)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("gdrive browse %s failed: %s", folderPath, exc)
|
||||||
|
result.errors.append(f"browse({folderPath}): {exc}")
|
||||||
|
return
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||||
|
return
|
||||||
|
if result.bytesProcessed >= limits.maxBytes:
|
||||||
|
return
|
||||||
|
|
||||||
|
entryPath = getattr(entry, "path", "") or ""
|
||||||
|
metadata = getattr(entry, "metadata", {}) or {}
|
||||||
|
mimeType = getattr(entry, "mimeType", None) or metadata.get("mimeType")
|
||||||
|
|
||||||
|
if getattr(entry, "isFolder", False) or mimeType == FOLDER_MIME:
|
||||||
|
await _walkFolder(
|
||||||
|
adapter=adapter,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
runExtractionFn=runExtractionFn,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
folderPath=entryPath,
|
||||||
|
depth=depth + 1,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
effectiveMime = mimeType or "application/octet-stream"
|
||||||
|
if any(effectiveMime.startswith(prefix) for prefix in limits.skipMimePrefixes):
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
continue
|
||||||
|
size = int(getattr(entry, "size", 0) or 0)
|
||||||
|
if size and size > limits.maxFileSize:
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
continue
|
||||||
|
modifiedTime = metadata.get("modifiedTime")
|
||||||
|
if not _isRecent(modifiedTime, limits.maxAgeDays):
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
externalItemId = metadata.get("id") or entryPath
|
||||||
|
revision = modifiedTime
|
||||||
|
|
||||||
|
await _ingestOne(
|
||||||
|
adapter=adapter,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
runExtractionFn=runExtractionFn,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
entry=entry,
|
||||||
|
entryPath=entryPath,
|
||||||
|
mimeType=effectiveMime,
|
||||||
|
externalItemId=externalItemId,
|
||||||
|
revision=revision,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _ingestOne(
|
||||||
|
*,
|
||||||
|
adapter,
|
||||||
|
knowledgeService,
|
||||||
|
runExtractionFn,
|
||||||
|
connectionId: str,
|
||||||
|
mandateId: str,
|
||||||
|
userId: str,
|
||||||
|
entry,
|
||||||
|
entryPath: str,
|
||||||
|
mimeType: str,
|
||||||
|
externalItemId: str,
|
||||||
|
revision: Optional[str],
|
||||||
|
limits: GdriveBootstrapLimits,
|
||||||
|
result: GdriveBootstrapResult,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||||
|
) -> None:
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
||||||
|
syntheticFileId = _syntheticFileId(connectionId, externalItemId)
|
||||||
|
fileName = getattr(entry, "name", "") or externalItemId
|
||||||
|
|
||||||
|
try:
|
||||||
|
downloaded = await adapter.download(entryPath)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("gdrive download %s failed: %s", entryPath, exc)
|
||||||
|
result.failed += 1
|
||||||
|
result.errors.append(f"download({entryPath}): {exc}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Adapter.download returns raw bytes today; guard DownloadResult shape too.
|
||||||
|
fileBytes: bytes
|
||||||
|
if isinstance(downloaded, (bytes, bytearray)):
|
||||||
|
fileBytes = bytes(downloaded)
|
||||||
|
else:
|
||||||
|
fileBytes = bytes(getattr(downloaded, "data", b"") or b"")
|
||||||
|
if getattr(downloaded, "mimeType", None):
|
||||||
|
mimeType = downloaded.mimeType # export may have changed the type
|
||||||
|
if not fileBytes:
|
||||||
|
result.failed += 1
|
||||||
|
return
|
||||||
|
if len(fileBytes) > limits.maxFileSize:
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
result.bytesProcessed += len(fileBytes)
|
||||||
|
|
||||||
|
try:
|
||||||
|
extracted = runExtractionFn(
|
||||||
|
fileBytes, fileName, mimeType,
|
||||||
|
ExtractionOptions(mergeStrategy=None),
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("gdrive extraction %s failed: %s", entryPath, exc)
|
||||||
|
result.failed += 1
|
||||||
|
result.errors.append(f"extract({entryPath}): {exc}")
|
||||||
|
return
|
||||||
|
|
||||||
|
contentObjects = _toContentObjects(extracted, fileName)
|
||||||
|
if not contentObjects:
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
handle = await knowledgeService.requestIngestion(
|
||||||
|
IngestionJob(
|
||||||
|
sourceKind="gdrive_item",
|
||||||
|
sourceId=syntheticFileId,
|
||||||
|
fileName=fileName,
|
||||||
|
mimeType=mimeType,
|
||||||
|
userId=userId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
contentObjects=contentObjects,
|
||||||
|
contentVersion=revision,
|
||||||
|
neutralize=limits.neutralize,
|
||||||
|
provenance={
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": "google",
|
||||||
|
"service": "drive",
|
||||||
|
"externalItemId": externalItemId,
|
||||||
|
"entryPath": entryPath,
|
||||||
|
"tier": "body",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("gdrive ingestion %s failed: %s", entryPath, exc, exc_info=True)
|
||||||
|
result.failed += 1
|
||||||
|
result.errors.append(f"ingest({entryPath}): {exc}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if handle.status == "duplicate":
|
||||||
|
result.skippedDuplicate += 1
|
||||||
|
elif handle.status == "indexed":
|
||||||
|
result.indexed += 1
|
||||||
|
else:
|
||||||
|
result.failed += 1
|
||||||
|
|
||||||
|
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||||
|
processed = result.indexed + result.skippedDuplicate
|
||||||
|
try:
|
||||||
|
progressCb(
|
||||||
|
min(90, 10 + int(80 * processed / max(1, limits.maxItems))),
|
||||||
|
f"gdrive processed={processed}",
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.progress part=gdrive processed=%d skippedDup=%d failed=%d",
|
||||||
|
processed, result.skippedDuplicate, result.failed,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.progress",
|
||||||
|
"part": "gdrive",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"processed": processed,
|
||||||
|
"skippedDup": result.skippedDuplicate,
|
||||||
|
"failed": result.failed,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||||
|
durationMs = int((time.time() - startMs) * 1000)
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.done part=gdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d bytes=%d durationMs=%d",
|
||||||
|
connectionId,
|
||||||
|
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
||||||
|
result.failed, result.bytesProcessed, durationMs,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.done",
|
||||||
|
"part": "gdrive",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"indexed": result.indexed,
|
||||||
|
"skippedDup": result.skippedDuplicate,
|
||||||
|
"skippedPolicy": result.skippedPolicy,
|
||||||
|
"failed": result.failed,
|
||||||
|
"bytes": result.bytesProcessed,
|
||||||
|
"durationMs": durationMs,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"connectionId": result.connectionId,
|
||||||
|
"indexed": result.indexed,
|
||||||
|
"skippedDuplicate": result.skippedDuplicate,
|
||||||
|
"skippedPolicy": result.skippedPolicy,
|
||||||
|
"failed": result.failed,
|
||||||
|
"bytesProcessed": result.bytesProcessed,
|
||||||
|
"durationMs": durationMs,
|
||||||
|
"errors": result.errors[:20],
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,606 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Gmail bootstrap for the unified knowledge ingestion lane.
|
||||||
|
|
||||||
|
Mirrors the Outlook pilot (see subConnectorSyncOutlook.py) but talks to Google
|
||||||
|
Mail's REST API. Messages become `sourceKind="gmail_message"` virtual documents
|
||||||
|
with header / snippet / cleaned body content-objects; attachments are optional
|
||||||
|
child jobs with `sourceKind="gmail_attachment"`.
|
||||||
|
|
||||||
|
Idempotency: Gmail's stable `historyId` (or `internalDate` as fallback) is
|
||||||
|
passed as `contentVersion`, so rerunning the bootstrap yields
|
||||||
|
`ingestion.skipped.duplicate` for unchanged messages.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
MAX_MESSAGES_DEFAULT = 500
|
||||||
|
MAX_BODY_CHARS_DEFAULT = 8000
|
||||||
|
MAX_ATTACHMENT_BYTES_DEFAULT = 10 * 1024 * 1024
|
||||||
|
DEFAULT_LABELS = ("INBOX", "SENT")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GmailBootstrapLimits:
|
||||||
|
maxMessages: int = MAX_MESSAGES_DEFAULT
|
||||||
|
labels: tuple = DEFAULT_LABELS
|
||||||
|
maxBodyChars: int = MAX_BODY_CHARS_DEFAULT
|
||||||
|
includeAttachments: bool = False
|
||||||
|
maxAttachmentBytes: int = MAX_ATTACHMENT_BYTES_DEFAULT
|
||||||
|
# Only fetch messages newer than N days. None disables filter.
|
||||||
|
maxAgeDays: Optional[int] = 90
|
||||||
|
# Content depth: "metadata" | "snippet" | "full"
|
||||||
|
mailContentDepth: str = "full"
|
||||||
|
# Pass-through to IngestionJob.neutralize
|
||||||
|
neutralize: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GmailBootstrapResult:
|
||||||
|
connectionId: str
|
||||||
|
indexed: int = 0
|
||||||
|
skippedDuplicate: int = 0
|
||||||
|
skippedPolicy: int = 0
|
||||||
|
failed: int = 0
|
||||||
|
attachmentsIndexed: int = 0
|
||||||
|
errors: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def _syntheticMessageId(connectionId: str, messageId: str) -> str:
|
||||||
|
token = hashlib.sha256(f"{connectionId}:{messageId}".encode("utf-8")).hexdigest()[:16]
|
||||||
|
return f"gm:{connectionId[:8]}:{token}"
|
||||||
|
|
||||||
|
|
||||||
|
def _syntheticAttachmentId(connectionId: str, messageId: str, attachmentId: str) -> str:
|
||||||
|
token = hashlib.sha256(
|
||||||
|
f"{connectionId}:{messageId}:{attachmentId}".encode("utf-8")
|
||||||
|
).hexdigest()[:16]
|
||||||
|
return f"ga:{connectionId[:8]}:{token}"
|
||||||
|
|
||||||
|
|
||||||
|
def _decodeBase64Url(data: str) -> bytes:
|
||||||
|
if not data:
|
||||||
|
return b""
|
||||||
|
# Gmail uses URL-safe base64 without padding.
|
||||||
|
padding = 4 - (len(data) % 4)
|
||||||
|
if padding != 4:
|
||||||
|
data = data + ("=" * padding)
|
||||||
|
try:
|
||||||
|
return base64.urlsafe_b64decode(data)
|
||||||
|
except Exception:
|
||||||
|
return b""
|
||||||
|
|
||||||
|
|
||||||
|
def _walkPayloadForBody(payload: Dict[str, Any]) -> Dict[str, str]:
|
||||||
|
"""Return {"text": ..., "html": ...} by walking MIME parts.
|
||||||
|
|
||||||
|
Gmail `payload` is a tree of parts. We prefer `text/plain` for the cleaned
|
||||||
|
body, but capture `text/html` as a fallback so `cleanEmailBody` can strip
|
||||||
|
markup if plain is missing.
|
||||||
|
"""
|
||||||
|
found: Dict[str, str] = {"text": "", "html": ""}
|
||||||
|
|
||||||
|
def _walk(part: Dict[str, Any]) -> None:
|
||||||
|
mime = (part.get("mimeType") or "").lower()
|
||||||
|
body = part.get("body") or {}
|
||||||
|
raw = body.get("data") or ""
|
||||||
|
if raw and mime.startswith("text/"):
|
||||||
|
decoded = _decodeBase64Url(raw).decode("utf-8", errors="replace")
|
||||||
|
key = "text" if mime == "text/plain" else ("html" if mime == "text/html" else "")
|
||||||
|
if key and not found[key]:
|
||||||
|
found[key] = decoded
|
||||||
|
for sub in part.get("parts") or []:
|
||||||
|
_walk(sub)
|
||||||
|
|
||||||
|
_walk(payload or {})
|
||||||
|
return found
|
||||||
|
|
||||||
|
|
||||||
|
def _headerMap(payload: Dict[str, Any]) -> Dict[str, str]:
|
||||||
|
return {
|
||||||
|
(h.get("name") or "").lower(): (h.get("value") or "")
|
||||||
|
for h in (payload.get("headers") or [])
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _buildContentObjects(
|
||||||
|
message: Dict[str, Any],
|
||||||
|
maxBodyChars: int,
|
||||||
|
mailContentDepth: str = "full",
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Build content objects for a Gmail message.
|
||||||
|
|
||||||
|
`mailContentDepth` controls how much is embedded:
|
||||||
|
- "metadata": header only (subject, from, to, date)
|
||||||
|
- "snippet": header + Gmail snippet (~155 chars, no full body)
|
||||||
|
- "full": header + snippet + cleaned full body (default)
|
||||||
|
"""
|
||||||
|
payload = message.get("payload") or {}
|
||||||
|
headers = _headerMap(payload)
|
||||||
|
subject = headers.get("subject") or "(no subject)"
|
||||||
|
fromAddr = headers.get("from") or ""
|
||||||
|
toAddr = headers.get("to") or ""
|
||||||
|
ccAddr = headers.get("cc") or ""
|
||||||
|
date = headers.get("date") or ""
|
||||||
|
snippet = message.get("snippet") or ""
|
||||||
|
|
||||||
|
parts: List[Dict[str, Any]] = []
|
||||||
|
header = (
|
||||||
|
f"Subject: {subject}\n"
|
||||||
|
f"From: {fromAddr}\n"
|
||||||
|
f"To: {toAddr}\n"
|
||||||
|
+ (f"Cc: {ccAddr}\n" if ccAddr else "")
|
||||||
|
+ f"Date: {date}"
|
||||||
|
)
|
||||||
|
parts.append({
|
||||||
|
"contentObjectId": "header",
|
||||||
|
"contentType": "text",
|
||||||
|
"data": header,
|
||||||
|
"contextRef": {"part": "header"},
|
||||||
|
})
|
||||||
|
if mailContentDepth in ("snippet", "full") and snippet:
|
||||||
|
parts.append({
|
||||||
|
"contentObjectId": "snippet",
|
||||||
|
"contentType": "text",
|
||||||
|
"data": snippet,
|
||||||
|
"contextRef": {"part": "snippet"},
|
||||||
|
})
|
||||||
|
if mailContentDepth == "full":
|
||||||
|
bodies = _walkPayloadForBody(payload)
|
||||||
|
rawBody = bodies["text"] or bodies["html"]
|
||||||
|
cleanedBody = cleanEmailBody(rawBody, maxChars=maxBodyChars) if rawBody else ""
|
||||||
|
if cleanedBody:
|
||||||
|
parts.append({
|
||||||
|
"contentObjectId": "body",
|
||||||
|
"contentType": "text",
|
||||||
|
"data": cleanedBody,
|
||||||
|
"contextRef": {"part": "body"},
|
||||||
|
})
|
||||||
|
return parts
|
||||||
|
|
||||||
|
|
||||||
|
async def bootstrapGmail(
|
||||||
|
connectionId: str,
|
||||||
|
*,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||||
|
adapter: Any = None,
|
||||||
|
connection: Any = None,
|
||||||
|
knowledgeService: Any = None,
|
||||||
|
limits: Optional[GmailBootstrapLimits] = None,
|
||||||
|
googleGetFn: Optional[Callable[..., Any]] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Enumerate Gmail labels (INBOX + SENT default) and ingest messages."""
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||||
|
prefs = loadConnectionPrefs(connectionId)
|
||||||
|
|
||||||
|
if not limits:
|
||||||
|
limits = GmailBootstrapLimits(
|
||||||
|
includeAttachments=prefs.mailIndexAttachments,
|
||||||
|
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
||||||
|
mailContentDepth=prefs.mailContentDepth,
|
||||||
|
neutralize=prefs.neutralizeBeforeEmbed,
|
||||||
|
)
|
||||||
|
|
||||||
|
startMs = time.time()
|
||||||
|
result = GmailBootstrapResult(connectionId=connectionId)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.started part=gmail connectionId=%s",
|
||||||
|
connectionId,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.started",
|
||||||
|
"part": "gmail",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if adapter is None or knowledgeService is None or connection is None:
|
||||||
|
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
|
||||||
|
|
||||||
|
if googleGetFn is None:
|
||||||
|
from modules.connectors.providerGoogle.connectorGoogle import _googleGet as _defaultGet
|
||||||
|
|
||||||
|
token = getattr(adapter, "_token", "")
|
||||||
|
|
||||||
|
async def googleGetFn(url: str) -> Dict[str, Any]: # type: ignore[no-redef]
|
||||||
|
return await _defaultGet(token, url)
|
||||||
|
|
||||||
|
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||||
|
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||||
|
|
||||||
|
for labelId in limits.labels:
|
||||||
|
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
await _ingestLabel(
|
||||||
|
googleGetFn=googleGetFn,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
labelId=labelId,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
|
||||||
|
result.errors.append(f"label({labelId}): {exc}")
|
||||||
|
|
||||||
|
return _finalizeResult(connectionId, result, startMs)
|
||||||
|
|
||||||
|
|
||||||
|
async def _resolveDependencies(connectionId: str):
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.auth import TokenManager
|
||||||
|
from modules.connectors.providerGoogle.connectorGoogle import GoogleConnector
|
||||||
|
from modules.serviceCenter import getService
|
||||||
|
from modules.serviceCenter.context import ServiceCenterContext
|
||||||
|
from modules.security.rootAccess import getRootUser
|
||||||
|
|
||||||
|
rootInterface = getRootInterface()
|
||||||
|
connection = rootInterface.getUserConnectionById(connectionId)
|
||||||
|
if connection is None:
|
||||||
|
raise ValueError(f"UserConnection not found: {connectionId}")
|
||||||
|
|
||||||
|
token = TokenManager().getFreshToken(connectionId)
|
||||||
|
if not token or not token.tokenAccess:
|
||||||
|
raise ValueError(f"No valid token for connection {connectionId}")
|
||||||
|
|
||||||
|
provider = GoogleConnector(connection, token.tokenAccess)
|
||||||
|
adapter = provider.getServiceAdapter("gmail")
|
||||||
|
|
||||||
|
rootUser = getRootUser()
|
||||||
|
ctx = ServiceCenterContext(
|
||||||
|
user=rootUser,
|
||||||
|
mandate_id=str(getattr(connection, "mandateId", "") or ""),
|
||||||
|
)
|
||||||
|
knowledgeService = getService("knowledge", ctx)
|
||||||
|
return adapter, connection, knowledgeService
|
||||||
|
|
||||||
|
|
||||||
|
async def _ingestLabel(
|
||||||
|
*,
|
||||||
|
googleGetFn,
|
||||||
|
knowledgeService,
|
||||||
|
connectionId: str,
|
||||||
|
mandateId: str,
|
||||||
|
userId: str,
|
||||||
|
labelId: str,
|
||||||
|
limits: GmailBootstrapLimits,
|
||||||
|
result: GmailBootstrapResult,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||||
|
) -> None:
|
||||||
|
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
|
||||||
|
if remaining <= 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
pageSize = min(100, remaining)
|
||||||
|
query = ""
|
||||||
|
if limits.maxAgeDays:
|
||||||
|
cutoff = datetime.now(timezone.utc) - timedelta(days=limits.maxAgeDays)
|
||||||
|
# Gmail uses YYYY/MM/DD.
|
||||||
|
query = f"after:{cutoff.strftime('%Y/%m/%d')}"
|
||||||
|
|
||||||
|
baseUrl = (
|
||||||
|
"https://gmail.googleapis.com/gmail/v1/users/me/messages"
|
||||||
|
f"?labelIds={labelId}&maxResults={pageSize}"
|
||||||
|
)
|
||||||
|
if query:
|
||||||
|
baseUrl = f"{baseUrl}&q={query}"
|
||||||
|
|
||||||
|
nextPageToken: Optional[str] = None
|
||||||
|
while (result.indexed + result.skippedDuplicate) < limits.maxMessages:
|
||||||
|
url = baseUrl if not nextPageToken else f"{baseUrl}&pageToken={nextPageToken}"
|
||||||
|
page = await googleGetFn(url)
|
||||||
|
if not isinstance(page, dict) or "error" in page:
|
||||||
|
err = (page or {}).get("error") if isinstance(page, dict) else "unknown"
|
||||||
|
logger.warning("gmail list page error for label %s: %s", labelId, err)
|
||||||
|
result.errors.append(f"list({labelId}): {err}")
|
||||||
|
return
|
||||||
|
|
||||||
|
messageStubs = page.get("messages") or []
|
||||||
|
for stub in messageStubs:
|
||||||
|
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||||
|
break
|
||||||
|
msgId = stub.get("id")
|
||||||
|
if not msgId:
|
||||||
|
continue
|
||||||
|
detailUrl = (
|
||||||
|
f"https://gmail.googleapis.com/gmail/v1/users/me/messages/{msgId}?format=full"
|
||||||
|
)
|
||||||
|
detail = await googleGetFn(detailUrl)
|
||||||
|
if not isinstance(detail, dict) or "error" in detail:
|
||||||
|
result.failed += 1
|
||||||
|
continue
|
||||||
|
await _ingestMessage(
|
||||||
|
googleGetFn=googleGetFn,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
labelId=labelId,
|
||||||
|
message=detail,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
)
|
||||||
|
|
||||||
|
nextPageToken = page.get("nextPageToken")
|
||||||
|
if not nextPageToken:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
async def _ingestMessage(
|
||||||
|
*,
|
||||||
|
googleGetFn,
|
||||||
|
knowledgeService,
|
||||||
|
connectionId: str,
|
||||||
|
mandateId: str,
|
||||||
|
userId: str,
|
||||||
|
labelId: str,
|
||||||
|
message: Dict[str, Any],
|
||||||
|
limits: GmailBootstrapLimits,
|
||||||
|
result: GmailBootstrapResult,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||||
|
) -> None:
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
||||||
|
messageId = message.get("id")
|
||||||
|
if not messageId:
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
return
|
||||||
|
revision = message.get("historyId") or message.get("internalDate")
|
||||||
|
headers = _headerMap(message.get("payload") or {})
|
||||||
|
subject = headers.get("subject") or "(no subject)"
|
||||||
|
syntheticId = _syntheticMessageId(connectionId, messageId)
|
||||||
|
fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml"
|
||||||
|
|
||||||
|
contentObjects = _buildContentObjects(
|
||||||
|
message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
handle = await knowledgeService.requestIngestion(
|
||||||
|
IngestionJob(
|
||||||
|
sourceKind="gmail_message",
|
||||||
|
sourceId=syntheticId,
|
||||||
|
fileName=fileName,
|
||||||
|
mimeType="message/rfc822",
|
||||||
|
userId=userId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
contentObjects=contentObjects,
|
||||||
|
contentVersion=str(revision) if revision else None,
|
||||||
|
neutralize=limits.neutralize,
|
||||||
|
provenance={
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": "google",
|
||||||
|
"service": "gmail",
|
||||||
|
"externalItemId": messageId,
|
||||||
|
"label": labelId,
|
||||||
|
"threadId": message.get("threadId"),
|
||||||
|
"tier": limits.mailContentDepth,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("gmail ingestion %s failed: %s", messageId, exc, exc_info=True)
|
||||||
|
result.failed += 1
|
||||||
|
result.errors.append(f"ingest({messageId}): {exc}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if handle.status == "duplicate":
|
||||||
|
result.skippedDuplicate += 1
|
||||||
|
elif handle.status == "indexed":
|
||||||
|
result.indexed += 1
|
||||||
|
else:
|
||||||
|
result.failed += 1
|
||||||
|
|
||||||
|
if limits.includeAttachments:
|
||||||
|
try:
|
||||||
|
await _ingestAttachments(
|
||||||
|
googleGetFn=googleGetFn,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
message=message,
|
||||||
|
parentSyntheticId=syntheticId,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("gmail attachments %s failed: %s", messageId, exc)
|
||||||
|
result.errors.append(f"attachments({messageId}): {exc}")
|
||||||
|
|
||||||
|
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||||
|
processed = result.indexed + result.skippedDuplicate
|
||||||
|
try:
|
||||||
|
progressCb(
|
||||||
|
min(90, 10 + int(80 * processed / max(1, limits.maxMessages))),
|
||||||
|
f"gmail processed={processed}",
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.progress part=gmail processed=%d skippedDup=%d failed=%d",
|
||||||
|
processed, result.skippedDuplicate, result.failed,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.progress",
|
||||||
|
"part": "gmail",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"processed": processed,
|
||||||
|
"skippedDup": result.skippedDuplicate,
|
||||||
|
"failed": result.failed,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
|
||||||
|
async def _ingestAttachments(
|
||||||
|
*,
|
||||||
|
googleGetFn,
|
||||||
|
knowledgeService,
|
||||||
|
connectionId: str,
|
||||||
|
mandateId: str,
|
||||||
|
userId: str,
|
||||||
|
message: Dict[str, Any],
|
||||||
|
parentSyntheticId: str,
|
||||||
|
limits: GmailBootstrapLimits,
|
||||||
|
result: GmailBootstrapResult,
|
||||||
|
) -> None:
|
||||||
|
"""Child ingestion jobs for file attachments. Skips inline images (cid: refs)."""
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||||
|
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||||
|
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
|
||||||
|
ExtractorRegistry, ChunkerRegistry,
|
||||||
|
)
|
||||||
|
|
||||||
|
messageId = message.get("id") or ""
|
||||||
|
|
||||||
|
def _collectAttachmentStubs(part: Dict[str, Any], acc: List[Dict[str, Any]]) -> None:
|
||||||
|
filename = part.get("filename") or ""
|
||||||
|
body = part.get("body") or {}
|
||||||
|
attId = body.get("attachmentId")
|
||||||
|
if filename and attId:
|
||||||
|
acc.append({
|
||||||
|
"filename": filename,
|
||||||
|
"mimeType": part.get("mimeType") or "application/octet-stream",
|
||||||
|
"attachmentId": attId,
|
||||||
|
"size": int(body.get("size") or 0),
|
||||||
|
})
|
||||||
|
for sub in part.get("parts") or []:
|
||||||
|
_collectAttachmentStubs(sub, acc)
|
||||||
|
|
||||||
|
stubs: List[Dict[str, Any]] = []
|
||||||
|
_collectAttachmentStubs(message.get("payload") or {}, stubs)
|
||||||
|
if not stubs:
|
||||||
|
return
|
||||||
|
|
||||||
|
extractorRegistry = ExtractorRegistry()
|
||||||
|
chunkerRegistry = ChunkerRegistry()
|
||||||
|
|
||||||
|
for stub in stubs:
|
||||||
|
if stub["size"] and stub["size"] > limits.maxAttachmentBytes:
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
continue
|
||||||
|
attUrl = (
|
||||||
|
f"https://gmail.googleapis.com/gmail/v1/users/me/messages/{messageId}"
|
||||||
|
f"/attachments/{stub['attachmentId']}"
|
||||||
|
)
|
||||||
|
detail = await googleGetFn(attUrl)
|
||||||
|
if not isinstance(detail, dict) or "error" in detail:
|
||||||
|
result.failed += 1
|
||||||
|
continue
|
||||||
|
rawBytes = _decodeBase64Url(detail.get("data") or "")
|
||||||
|
if not rawBytes:
|
||||||
|
continue
|
||||||
|
fileName = stub["filename"]
|
||||||
|
mimeType = stub["mimeType"]
|
||||||
|
syntheticId = _syntheticAttachmentId(connectionId, messageId, stub["attachmentId"])
|
||||||
|
|
||||||
|
try:
|
||||||
|
extracted = runExtraction(
|
||||||
|
extractorRegistry, chunkerRegistry,
|
||||||
|
rawBytes, fileName, mimeType,
|
||||||
|
ExtractionOptions(mergeStrategy=None),
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("gmail attachment extract %s failed: %s", stub["attachmentId"], exc)
|
||||||
|
result.failed += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
contentObjects: List[Dict[str, Any]] = []
|
||||||
|
for part in getattr(extracted, "parts", None) or []:
|
||||||
|
data = getattr(part, "data", None) or ""
|
||||||
|
if not data or not str(data).strip():
|
||||||
|
continue
|
||||||
|
typeGroup = getattr(part, "typeGroup", "text") or "text"
|
||||||
|
contentType = "text"
|
||||||
|
if typeGroup == "image":
|
||||||
|
contentType = "image"
|
||||||
|
elif typeGroup in ("binary", "container"):
|
||||||
|
contentType = "other"
|
||||||
|
contentObjects.append({
|
||||||
|
"contentObjectId": getattr(part, "id", ""),
|
||||||
|
"contentType": contentType,
|
||||||
|
"data": data,
|
||||||
|
"contextRef": {
|
||||||
|
"containerPath": fileName,
|
||||||
|
"location": getattr(part, "label", None) or "attachment",
|
||||||
|
**(getattr(part, "metadata", None) or {}),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if not contentObjects:
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
await knowledgeService.requestIngestion(
|
||||||
|
IngestionJob(
|
||||||
|
sourceKind="gmail_attachment",
|
||||||
|
sourceId=syntheticId,
|
||||||
|
fileName=fileName,
|
||||||
|
mimeType=mimeType,
|
||||||
|
userId=userId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
contentObjects=contentObjects,
|
||||||
|
provenance={
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": "google",
|
||||||
|
"service": "gmail",
|
||||||
|
"parentId": parentSyntheticId,
|
||||||
|
"externalItemId": stub["attachmentId"],
|
||||||
|
"parentMessageId": messageId,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
result.attachmentsIndexed += 1
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("gmail attachment ingest %s failed: %s", stub["attachmentId"], exc)
|
||||||
|
result.failed += 1
|
||||||
|
|
||||||
|
|
||||||
|
def _finalizeResult(connectionId: str, result: GmailBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||||
|
durationMs = int((time.time() - startMs) * 1000)
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.done part=gmail connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d attachments=%d failed=%d durationMs=%d",
|
||||||
|
connectionId,
|
||||||
|
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
||||||
|
result.attachmentsIndexed, result.failed, durationMs,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.done",
|
||||||
|
"part": "gmail",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"indexed": result.indexed,
|
||||||
|
"skippedDup": result.skippedDuplicate,
|
||||||
|
"skippedPolicy": result.skippedPolicy,
|
||||||
|
"attachmentsIndexed": result.attachmentsIndexed,
|
||||||
|
"failed": result.failed,
|
||||||
|
"durationMs": durationMs,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"connectionId": result.connectionId,
|
||||||
|
"indexed": result.indexed,
|
||||||
|
"skippedDuplicate": result.skippedDuplicate,
|
||||||
|
"skippedPolicy": result.skippedPolicy,
|
||||||
|
"attachmentsIndexed": result.attachmentsIndexed,
|
||||||
|
"failed": result.failed,
|
||||||
|
"durationMs": durationMs,
|
||||||
|
"errors": result.errors[:20],
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,576 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Outlook bootstrap for the unified knowledge ingestion lane.
|
||||||
|
|
||||||
|
Unlike SharePoint, Outlook messages are "virtual documents" — we never persist
|
||||||
|
file bytes in the store. Each message becomes a `sourceKind="outlook_message"`
|
||||||
|
IngestionJob whose `contentObjects` carry the header, snippet and cleaned body
|
||||||
|
so retrieval can show a compact answer without fetching Graph again.
|
||||||
|
|
||||||
|
Attachments are optional (`includeAttachments` limit flag) and enqueued as
|
||||||
|
child jobs with `sourceKind="outlook_attachment"` + `provenance.parentId`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
MAX_MESSAGES_DEFAULT = 500
|
||||||
|
MAX_FOLDERS_DEFAULT = 5
|
||||||
|
MAX_BODY_CHARS_DEFAULT = 8000
|
||||||
|
MAX_ATTACHMENT_BYTES_DEFAULT = 10 * 1024 * 1024
|
||||||
|
WELL_KNOWN_FOLDERS = ("inbox", "sentitems")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class OutlookBootstrapLimits:
|
||||||
|
maxMessages: int = MAX_MESSAGES_DEFAULT
|
||||||
|
maxFolders: int = MAX_FOLDERS_DEFAULT
|
||||||
|
maxBodyChars: int = MAX_BODY_CHARS_DEFAULT
|
||||||
|
includeAttachments: bool = False
|
||||||
|
maxAttachmentBytes: int = MAX_ATTACHMENT_BYTES_DEFAULT
|
||||||
|
# Only fetch messages newer than N days. None disables filter.
|
||||||
|
maxAgeDays: Optional[int] = 90
|
||||||
|
# Content depth: "metadata" | "snippet" | "full"
|
||||||
|
mailContentDepth: str = "full"
|
||||||
|
# Pass-through to IngestionJob.neutralize
|
||||||
|
neutralize: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class OutlookBootstrapResult:
|
||||||
|
connectionId: str
|
||||||
|
indexed: int = 0
|
||||||
|
skippedDuplicate: int = 0
|
||||||
|
skippedPolicy: int = 0
|
||||||
|
failed: int = 0
|
||||||
|
attachmentsIndexed: int = 0
|
||||||
|
errors: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def _syntheticMessageId(connectionId: str, messageId: str) -> str:
|
||||||
|
token = hashlib.sha256(f"{connectionId}:{messageId}".encode("utf-8")).hexdigest()[:16]
|
||||||
|
return f"om:{connectionId[:8]}:{token}"
|
||||||
|
|
||||||
|
|
||||||
|
def _syntheticAttachmentId(connectionId: str, messageId: str, attachmentId: str) -> str:
|
||||||
|
token = hashlib.sha256(
|
||||||
|
f"{connectionId}:{messageId}:{attachmentId}".encode("utf-8")
|
||||||
|
).hexdigest()[:16]
|
||||||
|
return f"oa:{connectionId[:8]}:{token}"
|
||||||
|
|
||||||
|
|
||||||
|
def _extractRecipient(recipient: Dict[str, Any]) -> str:
|
||||||
|
email = (recipient or {}).get("emailAddress") or {}
|
||||||
|
name = email.get("name") or ""
|
||||||
|
addr = email.get("address") or ""
|
||||||
|
if name and addr:
|
||||||
|
return f"{name} <{addr}>"
|
||||||
|
return addr or name
|
||||||
|
|
||||||
|
|
||||||
|
def _joinRecipients(recipients: List[Dict[str, Any]]) -> str:
|
||||||
|
return ", ".join(filter(None, [_extractRecipient(r) for r in recipients or []]))
|
||||||
|
|
||||||
|
|
||||||
|
def _buildContentObjects(
|
||||||
|
message: Dict[str, Any],
|
||||||
|
maxBodyChars: int,
|
||||||
|
mailContentDepth: str = "full",
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Build content objects for an Outlook message.
|
||||||
|
|
||||||
|
`mailContentDepth` mirrors the Gmail walker:
|
||||||
|
- "metadata": header only
|
||||||
|
- "snippet": header + bodyPreview (~255 chars)
|
||||||
|
- "full": header + snippet + cleaned body (default)
|
||||||
|
"""
|
||||||
|
subject = message.get("subject") or "(no subject)"
|
||||||
|
fromAddr = _extractRecipient(message.get("from") or {})
|
||||||
|
toAddr = _joinRecipients(message.get("toRecipients") or [])
|
||||||
|
ccAddr = _joinRecipients(message.get("ccRecipients") or [])
|
||||||
|
received = message.get("receivedDateTime") or ""
|
||||||
|
snippet = message.get("bodyPreview") or ""
|
||||||
|
|
||||||
|
parts: List[Dict[str, Any]] = []
|
||||||
|
header = (
|
||||||
|
f"Subject: {subject}\n"
|
||||||
|
f"From: {fromAddr}\n"
|
||||||
|
f"To: {toAddr}\n"
|
||||||
|
+ (f"Cc: {ccAddr}\n" if ccAddr else "")
|
||||||
|
+ f"Date: {received}"
|
||||||
|
)
|
||||||
|
parts.append({
|
||||||
|
"contentObjectId": "header",
|
||||||
|
"contentType": "text",
|
||||||
|
"data": header,
|
||||||
|
"contextRef": {"part": "header"},
|
||||||
|
})
|
||||||
|
if mailContentDepth in ("snippet", "full") and snippet:
|
||||||
|
parts.append({
|
||||||
|
"contentObjectId": "snippet",
|
||||||
|
"contentType": "text",
|
||||||
|
"data": snippet,
|
||||||
|
"contextRef": {"part": "snippet"},
|
||||||
|
})
|
||||||
|
if mailContentDepth == "full":
|
||||||
|
body = message.get("body") or {}
|
||||||
|
bodyContent = body.get("content") or ""
|
||||||
|
cleanedBody = cleanEmailBody(bodyContent, maxChars=maxBodyChars) if bodyContent else ""
|
||||||
|
if cleanedBody:
|
||||||
|
parts.append({
|
||||||
|
"contentObjectId": "body",
|
||||||
|
"contentType": "text",
|
||||||
|
"data": cleanedBody,
|
||||||
|
"contextRef": {"part": "body"},
|
||||||
|
})
|
||||||
|
return parts
|
||||||
|
|
||||||
|
|
||||||
|
async def bootstrapOutlook(
|
||||||
|
connectionId: str,
|
||||||
|
*,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||||
|
adapter: Any = None,
|
||||||
|
connection: Any = None,
|
||||||
|
knowledgeService: Any = None,
|
||||||
|
limits: Optional[OutlookBootstrapLimits] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Enumerate Outlook folders (inbox + sent by default) and ingest messages."""
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||||
|
prefs = loadConnectionPrefs(connectionId)
|
||||||
|
|
||||||
|
if not limits:
|
||||||
|
limits = OutlookBootstrapLimits(
|
||||||
|
includeAttachments=prefs.mailIndexAttachments,
|
||||||
|
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
|
||||||
|
mailContentDepth=prefs.mailContentDepth,
|
||||||
|
neutralize=prefs.neutralizeBeforeEmbed,
|
||||||
|
)
|
||||||
|
|
||||||
|
startMs = time.time()
|
||||||
|
result = OutlookBootstrapResult(connectionId=connectionId)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.started part=outlook connectionId=%s",
|
||||||
|
connectionId,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.started",
|
||||||
|
"part": "outlook",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if adapter is None or knowledgeService is None or connection is None:
|
||||||
|
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
|
||||||
|
|
||||||
|
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||||
|
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||||
|
|
||||||
|
folderIds = await _selectFolderIds(adapter, limits)
|
||||||
|
for folderId in folderIds:
|
||||||
|
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
await _ingestFolder(
|
||||||
|
adapter=adapter,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
folderId=folderId,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
|
||||||
|
result.errors.append(f"folder({folderId}): {exc}")
|
||||||
|
|
||||||
|
return _finalizeResult(connectionId, result, startMs)
|
||||||
|
|
||||||
|
|
||||||
|
async def _resolveDependencies(connectionId: str):
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.auth import TokenManager
|
||||||
|
from modules.connectors.providerMsft.connectorMsft import MsftConnector
|
||||||
|
from modules.serviceCenter import getService
|
||||||
|
from modules.serviceCenter.context import ServiceCenterContext
|
||||||
|
from modules.security.rootAccess import getRootUser
|
||||||
|
|
||||||
|
rootInterface = getRootInterface()
|
||||||
|
connection = rootInterface.getUserConnectionById(connectionId)
|
||||||
|
if connection is None:
|
||||||
|
raise ValueError(f"UserConnection not found: {connectionId}")
|
||||||
|
|
||||||
|
token = TokenManager().getFreshToken(connectionId)
|
||||||
|
if not token or not token.tokenAccess:
|
||||||
|
raise ValueError(f"No valid token for connection {connectionId}")
|
||||||
|
|
||||||
|
provider = MsftConnector(connection, token.tokenAccess)
|
||||||
|
adapter = provider.getServiceAdapter("outlook")
|
||||||
|
|
||||||
|
rootUser = getRootUser()
|
||||||
|
ctx = ServiceCenterContext(
|
||||||
|
user=rootUser,
|
||||||
|
mandate_id=str(getattr(connection, "mandateId", "") or ""),
|
||||||
|
)
|
||||||
|
knowledgeService = getService("knowledge", ctx)
|
||||||
|
return adapter, connection, knowledgeService
|
||||||
|
|
||||||
|
|
||||||
|
async def _selectFolderIds(adapter, limits: OutlookBootstrapLimits) -> List[str]:
|
||||||
|
"""Prefer well-known folders (inbox, sentitems); fall back to browse()."""
|
||||||
|
folderIds: List[str] = []
|
||||||
|
for wellKnown in WELL_KNOWN_FOLDERS:
|
||||||
|
if len(folderIds) >= limits.maxFolders:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
row = await adapter._graphGet(f"me/mailFolders/{wellKnown}")
|
||||||
|
except Exception:
|
||||||
|
row = None
|
||||||
|
if isinstance(row, dict) and "error" not in row and row.get("id"):
|
||||||
|
folderIds.append(row["id"])
|
||||||
|
|
||||||
|
if len(folderIds) < limits.maxFolders:
|
||||||
|
try:
|
||||||
|
entries = await adapter.browse("/")
|
||||||
|
except Exception:
|
||||||
|
entries = []
|
||||||
|
for entry in entries:
|
||||||
|
metadata = getattr(entry, "metadata", {}) or {}
|
||||||
|
fid = metadata.get("id")
|
||||||
|
if fid and fid not in folderIds:
|
||||||
|
folderIds.append(fid)
|
||||||
|
if len(folderIds) >= limits.maxFolders:
|
||||||
|
break
|
||||||
|
return folderIds
|
||||||
|
|
||||||
|
|
||||||
|
async def _ingestFolder(
|
||||||
|
*,
|
||||||
|
adapter,
|
||||||
|
knowledgeService,
|
||||||
|
connectionId: str,
|
||||||
|
mandateId: str,
|
||||||
|
userId: str,
|
||||||
|
folderId: str,
|
||||||
|
limits: OutlookBootstrapLimits,
|
||||||
|
result: OutlookBootstrapResult,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||||
|
) -> None:
|
||||||
|
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
|
||||||
|
if remaining <= 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
pageSize = min(100, remaining)
|
||||||
|
select = (
|
||||||
|
"id,subject,from,toRecipients,ccRecipients,receivedDateTime,"
|
||||||
|
"bodyPreview,body,internetMessageId,hasAttachments,changeKey"
|
||||||
|
)
|
||||||
|
endpoint: Optional[str] = (
|
||||||
|
f"me/mailFolders/{folderId}/messages"
|
||||||
|
f"?$top={pageSize}&$orderby=receivedDateTime desc&$select={select}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Keep header-based age filter in Graph itself to avoid shipping ancient
|
||||||
|
# messages we'd discard client-side.
|
||||||
|
if limits.maxAgeDays:
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
|
||||||
|
cutoff = datetime.now(timezone.utc) - timedelta(days=limits.maxAgeDays)
|
||||||
|
cutoffIso = cutoff.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
endpoint = f"{endpoint}&$filter=receivedDateTime ge {cutoffIso}"
|
||||||
|
|
||||||
|
while endpoint and (result.indexed + result.skippedDuplicate) < limits.maxMessages:
|
||||||
|
try:
|
||||||
|
page = await adapter._graphGet(endpoint)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("outlook graph page failed for folder %s: %s", folderId, exc)
|
||||||
|
result.errors.append(f"graph({folderId}): {exc}")
|
||||||
|
return
|
||||||
|
if not isinstance(page, dict) or "error" in page:
|
||||||
|
err = (page or {}).get("error") if isinstance(page, dict) else "unknown"
|
||||||
|
logger.warning("outlook graph page error for folder %s: %s", folderId, err)
|
||||||
|
result.errors.append(f"graph({folderId}): {err}")
|
||||||
|
return
|
||||||
|
|
||||||
|
for message in page.get("value", []) or []:
|
||||||
|
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
|
||||||
|
break
|
||||||
|
await _ingestMessage(
|
||||||
|
adapter=adapter,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
message=message,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
)
|
||||||
|
|
||||||
|
nextLink = page.get("@odata.nextLink")
|
||||||
|
if not nextLink:
|
||||||
|
break
|
||||||
|
# Strip Graph base so adapter._graphGet accepts the relative path.
|
||||||
|
from modules.connectors.providerMsft.connectorMsft import _stripGraphBase
|
||||||
|
|
||||||
|
endpoint = _stripGraphBase(nextLink)
|
||||||
|
|
||||||
|
|
||||||
|
async def _ingestMessage(
|
||||||
|
*,
|
||||||
|
adapter,
|
||||||
|
knowledgeService,
|
||||||
|
connectionId: str,
|
||||||
|
mandateId: str,
|
||||||
|
userId: str,
|
||||||
|
message: Dict[str, Any],
|
||||||
|
limits: OutlookBootstrapLimits,
|
||||||
|
result: OutlookBootstrapResult,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||||
|
) -> None:
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
||||||
|
messageId = message.get("id")
|
||||||
|
if not messageId:
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
return
|
||||||
|
revision = message.get("changeKey") or message.get("internetMessageId")
|
||||||
|
subject = message.get("subject") or "(no subject)"
|
||||||
|
syntheticId = _syntheticMessageId(connectionId, messageId)
|
||||||
|
fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml"
|
||||||
|
|
||||||
|
contentObjects = _buildContentObjects(
|
||||||
|
message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth
|
||||||
|
)
|
||||||
|
# Always at least the header is emitted, so `contentObjects` is non-empty.
|
||||||
|
try:
|
||||||
|
handle = await knowledgeService.requestIngestion(
|
||||||
|
IngestionJob(
|
||||||
|
sourceKind="outlook_message",
|
||||||
|
sourceId=syntheticId,
|
||||||
|
fileName=fileName,
|
||||||
|
mimeType="message/rfc822",
|
||||||
|
userId=userId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
contentObjects=contentObjects,
|
||||||
|
contentVersion=revision,
|
||||||
|
neutralize=limits.neutralize,
|
||||||
|
provenance={
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": "msft",
|
||||||
|
"service": "outlook",
|
||||||
|
"externalItemId": messageId,
|
||||||
|
"internetMessageId": message.get("internetMessageId"),
|
||||||
|
"tier": limits.mailContentDepth,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("outlook ingestion %s failed: %s", messageId, exc, exc_info=True)
|
||||||
|
result.failed += 1
|
||||||
|
result.errors.append(f"ingest({messageId}): {exc}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if handle.status == "duplicate":
|
||||||
|
result.skippedDuplicate += 1
|
||||||
|
elif handle.status == "indexed":
|
||||||
|
result.indexed += 1
|
||||||
|
else:
|
||||||
|
result.failed += 1
|
||||||
|
|
||||||
|
if limits.includeAttachments and message.get("hasAttachments"):
|
||||||
|
try:
|
||||||
|
await _ingestAttachments(
|
||||||
|
adapter=adapter,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
messageId=messageId,
|
||||||
|
parentSyntheticId=syntheticId,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("outlook attachments %s failed: %s", messageId, exc)
|
||||||
|
result.errors.append(f"attachments({messageId}): {exc}")
|
||||||
|
|
||||||
|
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||||
|
processed = result.indexed + result.skippedDuplicate
|
||||||
|
try:
|
||||||
|
progressCb(
|
||||||
|
min(90, 10 + int(80 * processed / max(1, limits.maxMessages))),
|
||||||
|
f"outlook processed={processed}",
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.progress part=outlook processed=%d skippedDup=%d failed=%d",
|
||||||
|
processed, result.skippedDuplicate, result.failed,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.progress",
|
||||||
|
"part": "outlook",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"processed": processed,
|
||||||
|
"skippedDup": result.skippedDuplicate,
|
||||||
|
"failed": result.failed,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
|
||||||
|
async def _ingestAttachments(
|
||||||
|
*,
|
||||||
|
adapter,
|
||||||
|
knowledgeService,
|
||||||
|
connectionId: str,
|
||||||
|
mandateId: str,
|
||||||
|
userId: str,
|
||||||
|
messageId: str,
|
||||||
|
parentSyntheticId: str,
|
||||||
|
limits: OutlookBootstrapLimits,
|
||||||
|
result: OutlookBootstrapResult,
|
||||||
|
) -> None:
|
||||||
|
"""Child ingestion jobs for file attachments (skip inline & oversized)."""
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||||
|
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||||
|
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
|
||||||
|
ExtractorRegistry, ChunkerRegistry,
|
||||||
|
)
|
||||||
|
import base64
|
||||||
|
|
||||||
|
page = await adapter._graphGet(f"me/messages/{messageId}/attachments")
|
||||||
|
if not isinstance(page, dict) or "error" in page:
|
||||||
|
return
|
||||||
|
|
||||||
|
extractorRegistry = ExtractorRegistry()
|
||||||
|
chunkerRegistry = ChunkerRegistry()
|
||||||
|
|
||||||
|
for attachment in page.get("value", []) or []:
|
||||||
|
if attachment.get("@odata.type") != "#microsoft.graph.fileAttachment":
|
||||||
|
continue
|
||||||
|
if attachment.get("isInline"):
|
||||||
|
continue
|
||||||
|
size = int(attachment.get("size") or 0)
|
||||||
|
if size and size > limits.maxAttachmentBytes:
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
continue
|
||||||
|
contentBytesB64 = attachment.get("contentBytes")
|
||||||
|
if not contentBytesB64:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
rawBytes = base64.b64decode(contentBytesB64)
|
||||||
|
except Exception:
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
continue
|
||||||
|
fileName = attachment.get("name") or "attachment"
|
||||||
|
mimeType = attachment.get("contentType") or "application/octet-stream"
|
||||||
|
attachmentId = attachment.get("id") or fileName
|
||||||
|
syntheticId = _syntheticAttachmentId(connectionId, messageId, attachmentId)
|
||||||
|
|
||||||
|
try:
|
||||||
|
extracted = runExtraction(
|
||||||
|
extractorRegistry, chunkerRegistry,
|
||||||
|
rawBytes, fileName, mimeType,
|
||||||
|
ExtractionOptions(mergeStrategy=None),
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("outlook attachment extract %s failed: %s", attachmentId, exc)
|
||||||
|
result.failed += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
contentObjects: List[Dict[str, Any]] = []
|
||||||
|
for part in getattr(extracted, "parts", None) or []:
|
||||||
|
data = getattr(part, "data", None) or ""
|
||||||
|
if not data or not str(data).strip():
|
||||||
|
continue
|
||||||
|
typeGroup = getattr(part, "typeGroup", "text") or "text"
|
||||||
|
contentType = "text"
|
||||||
|
if typeGroup == "image":
|
||||||
|
contentType = "image"
|
||||||
|
elif typeGroup in ("binary", "container"):
|
||||||
|
contentType = "other"
|
||||||
|
contentObjects.append({
|
||||||
|
"contentObjectId": getattr(part, "id", ""),
|
||||||
|
"contentType": contentType,
|
||||||
|
"data": data,
|
||||||
|
"contextRef": {
|
||||||
|
"containerPath": fileName,
|
||||||
|
"location": getattr(part, "label", None) or "attachment",
|
||||||
|
**(getattr(part, "metadata", None) or {}),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if not contentObjects:
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
await knowledgeService.requestIngestion(
|
||||||
|
IngestionJob(
|
||||||
|
sourceKind="outlook_attachment",
|
||||||
|
sourceId=syntheticId,
|
||||||
|
fileName=fileName,
|
||||||
|
mimeType=mimeType,
|
||||||
|
userId=userId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
contentObjects=contentObjects,
|
||||||
|
neutralize=limits.neutralize,
|
||||||
|
provenance={
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": "msft",
|
||||||
|
"service": "outlook",
|
||||||
|
"parentId": parentSyntheticId,
|
||||||
|
"externalItemId": attachmentId,
|
||||||
|
"parentMessageId": messageId,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
result.attachmentsIndexed += 1
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("outlook attachment ingest %s failed: %s", attachmentId, exc)
|
||||||
|
result.failed += 1
|
||||||
|
|
||||||
|
|
||||||
|
def _finalizeResult(connectionId: str, result: OutlookBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||||
|
durationMs = int((time.time() - startMs) * 1000)
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.done part=outlook connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d attachments=%d failed=%d durationMs=%d",
|
||||||
|
connectionId,
|
||||||
|
result.indexed, result.skippedDuplicate, result.skippedPolicy,
|
||||||
|
result.attachmentsIndexed, result.failed, durationMs,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.done",
|
||||||
|
"part": "outlook",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"indexed": result.indexed,
|
||||||
|
"skippedDup": result.skippedDuplicate,
|
||||||
|
"skippedPolicy": result.skippedPolicy,
|
||||||
|
"attachmentsIndexed": result.attachmentsIndexed,
|
||||||
|
"failed": result.failed,
|
||||||
|
"durationMs": durationMs,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"connectionId": result.connectionId,
|
||||||
|
"indexed": result.indexed,
|
||||||
|
"skippedDuplicate": result.skippedDuplicate,
|
||||||
|
"skippedPolicy": result.skippedPolicy,
|
||||||
|
"attachmentsIndexed": result.attachmentsIndexed,
|
||||||
|
"failed": result.failed,
|
||||||
|
"durationMs": durationMs,
|
||||||
|
"errors": result.errors[:20],
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,433 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""SharePoint bootstrap for the unified knowledge ingestion lane.
|
||||||
|
|
||||||
|
Walks the SharePoint drive(s) reachable via a UserConnection, downloads each
|
||||||
|
file-like item, runs the standard content extraction pipeline and hands the
|
||||||
|
result to `KnowledgeService.requestIngestion`. Idempotency is provided by the
|
||||||
|
ingestion façade itself; repeat bootstraps therefore produce
|
||||||
|
`ingestion.skipped.duplicate` for every unchanged item because we pass the
|
||||||
|
Graph `eTag` as `contentVersion`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
|
|
||||||
|
from modules.datamodels.datamodelExtraction import ExtractionOptions
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
MAX_ITEMS_DEFAULT = 500
|
||||||
|
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
|
||||||
|
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
|
||||||
|
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
|
||||||
|
MAX_DEPTH_DEFAULT = 4
|
||||||
|
MAX_SITES_DEFAULT = 3
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SharepointBootstrapLimits:
|
||||||
|
maxItems: int = MAX_ITEMS_DEFAULT
|
||||||
|
maxBytes: int = MAX_BYTES_DEFAULT
|
||||||
|
maxFileSize: int = MAX_FILE_SIZE_DEFAULT
|
||||||
|
skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
|
||||||
|
maxDepth: int = MAX_DEPTH_DEFAULT
|
||||||
|
maxSites: int = MAX_SITES_DEFAULT
|
||||||
|
# Pass-through to IngestionJob.neutralize
|
||||||
|
neutralize: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SharepointBootstrapResult:
|
||||||
|
connectionId: str
|
||||||
|
indexed: int = 0
|
||||||
|
skippedDuplicate: int = 0
|
||||||
|
skippedPolicy: int = 0
|
||||||
|
failed: int = 0
|
||||||
|
bytesProcessed: int = 0
|
||||||
|
errors: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
|
||||||
|
"""Deterministic synthetic FileContentIndex id for a SharePoint item.
|
||||||
|
|
||||||
|
Stable across bootstraps → idempotency works; independent of file name so
|
||||||
|
moves/renames don't duplicate chunks.
|
||||||
|
"""
|
||||||
|
token = hashlib.sha256(f"{connectionId}:{externalItemId}".encode("utf-8")).hexdigest()[:16]
|
||||||
|
return f"sp:{connectionId[:8]}:{token}"
|
||||||
|
|
||||||
|
|
||||||
|
def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Translate ExtractionResult → content objects accepted by requestIngestion."""
|
||||||
|
parts = getattr(extracted, "parts", None) or []
|
||||||
|
out: List[Dict[str, Any]] = []
|
||||||
|
for part in parts:
|
||||||
|
data = getattr(part, "data", None) or ""
|
||||||
|
if not data or not str(data).strip():
|
||||||
|
continue
|
||||||
|
typeGroup = getattr(part, "typeGroup", "text") or "text"
|
||||||
|
contentType = "text"
|
||||||
|
if typeGroup == "image":
|
||||||
|
contentType = "image"
|
||||||
|
elif typeGroup in ("binary", "container"):
|
||||||
|
contentType = "other"
|
||||||
|
out.append({
|
||||||
|
"contentObjectId": getattr(part, "id", ""),
|
||||||
|
"contentType": contentType,
|
||||||
|
"data": data,
|
||||||
|
"contextRef": {
|
||||||
|
"containerPath": fileName,
|
||||||
|
"location": getattr(part, "label", None) or "file",
|
||||||
|
**(getattr(part, "metadata", None) or {}),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
async def bootstrapSharepoint(
|
||||||
|
connectionId: str,
|
||||||
|
*,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
|
||||||
|
adapter: Any = None,
|
||||||
|
connection: Any = None,
|
||||||
|
knowledgeService: Any = None,
|
||||||
|
limits: Optional[SharepointBootstrapLimits] = None,
|
||||||
|
runExtractionFn: Optional[Callable[..., Any]] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Enumerate SharePoint drives and ingest every reachable file via the façade.
|
||||||
|
|
||||||
|
Parameters allow injection for tests; production callers pass only
|
||||||
|
`connectionId` (and optionally a progressCb) and everything else is
|
||||||
|
resolved against the registered services.
|
||||||
|
"""
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||||
|
prefs = loadConnectionPrefs(connectionId)
|
||||||
|
|
||||||
|
if not limits:
|
||||||
|
limits = SharepointBootstrapLimits(neutralize=prefs.neutralizeBeforeEmbed)
|
||||||
|
|
||||||
|
startMs = time.time()
|
||||||
|
result = SharepointBootstrapResult(connectionId=connectionId)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.started part=sharepoint connectionId=%s",
|
||||||
|
connectionId,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.started",
|
||||||
|
"part": "sharepoint",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if adapter is None or knowledgeService is None or connection is None:
|
||||||
|
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
|
||||||
|
if runExtractionFn is None:
|
||||||
|
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||||
|
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
|
||||||
|
ExtractorRegistry, ChunkerRegistry,
|
||||||
|
)
|
||||||
|
extractorRegistry = ExtractorRegistry()
|
||||||
|
chunkerRegistry = ChunkerRegistry()
|
||||||
|
|
||||||
|
def runExtractionFn(bytesData, name, mime, options): # type: ignore[no-redef]
|
||||||
|
return runExtraction(extractorRegistry, chunkerRegistry, bytesData, name, mime, options)
|
||||||
|
|
||||||
|
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
|
||||||
|
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
sites = await adapter.browse("/", limit=limits.maxSites)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("sharepoint site discovery failed for %s: %s", connectionId, exc, exc_info=True)
|
||||||
|
result.errors.append(f"site_discovery: {exc}")
|
||||||
|
return _finalizeResult(connectionId, result, startMs)
|
||||||
|
|
||||||
|
for site in sites[: limits.maxSites]:
|
||||||
|
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||||
|
break
|
||||||
|
sitePath = getattr(site, "path", "") or ""
|
||||||
|
try:
|
||||||
|
await _walkFolder(
|
||||||
|
adapter=adapter,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
runExtractionFn=runExtractionFn,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
folderPath=sitePath,
|
||||||
|
depth=0,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("sharepoint walk failed for site %s: %s", sitePath, exc, exc_info=True)
|
||||||
|
result.errors.append(f"walk({sitePath}): {exc}")
|
||||||
|
|
||||||
|
return _finalizeResult(connectionId, result, startMs)
|
||||||
|
|
||||||
|
|
||||||
|
async def _resolveDependencies(connectionId: str):
|
||||||
|
"""Load connection, instantiate SharepointAdapter, and build a KnowledgeService.
|
||||||
|
|
||||||
|
Runs with root privileges: bootstrap is a system operation triggered by an
|
||||||
|
authenticated user via callback; it must not be gated by a per-user
|
||||||
|
service-center context.
|
||||||
|
"""
|
||||||
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
||||||
|
from modules.auth import TokenManager
|
||||||
|
from modules.connectors.providerMsft.connectorMsft import MsftConnector
|
||||||
|
from modules.serviceCenter import getService
|
||||||
|
from modules.serviceCenter.context import ServiceCenterContext
|
||||||
|
from modules.security.rootAccess import getRootUser
|
||||||
|
|
||||||
|
rootInterface = getRootInterface()
|
||||||
|
connection = rootInterface.getUserConnectionById(connectionId)
|
||||||
|
if connection is None:
|
||||||
|
raise ValueError(f"UserConnection not found: {connectionId}")
|
||||||
|
|
||||||
|
token = TokenManager().getFreshToken(connectionId)
|
||||||
|
if not token or not token.tokenAccess:
|
||||||
|
raise ValueError(f"No valid token for connection {connectionId}")
|
||||||
|
|
||||||
|
provider = MsftConnector(connection, token.tokenAccess)
|
||||||
|
adapter = provider.getServiceAdapter("sharepoint")
|
||||||
|
|
||||||
|
rootUser = getRootUser()
|
||||||
|
ctx = ServiceCenterContext(
|
||||||
|
user=rootUser,
|
||||||
|
mandate_id=str(getattr(connection, "mandateId", "") or ""),
|
||||||
|
)
|
||||||
|
knowledgeService = getService("knowledge", ctx)
|
||||||
|
return adapter, connection, knowledgeService
|
||||||
|
|
||||||
|
|
||||||
|
async def _walkFolder(
|
||||||
|
*,
|
||||||
|
adapter,
|
||||||
|
knowledgeService,
|
||||||
|
runExtractionFn,
|
||||||
|
connectionId: str,
|
||||||
|
mandateId: str,
|
||||||
|
userId: str,
|
||||||
|
folderPath: str,
|
||||||
|
depth: int,
|
||||||
|
limits: SharepointBootstrapLimits,
|
||||||
|
result: SharepointBootstrapResult,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||||
|
) -> None:
|
||||||
|
if depth > limits.maxDepth:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
entries = await adapter.browse(folderPath)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("sharepoint browse %s failed: %s", folderPath, exc)
|
||||||
|
result.errors.append(f"browse({folderPath}): {exc}")
|
||||||
|
return
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
if result.indexed + result.skippedDuplicate >= limits.maxItems:
|
||||||
|
return
|
||||||
|
if result.bytesProcessed >= limits.maxBytes:
|
||||||
|
return
|
||||||
|
|
||||||
|
entryPath = getattr(entry, "path", "") or ""
|
||||||
|
if getattr(entry, "isFolder", False):
|
||||||
|
await _walkFolder(
|
||||||
|
adapter=adapter,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
runExtractionFn=runExtractionFn,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
folderPath=entryPath,
|
||||||
|
depth=depth + 1,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
mimeType = getattr(entry, "mimeType", None) or "application/octet-stream"
|
||||||
|
if any(mimeType.startswith(prefix) for prefix in limits.skipMimePrefixes):
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
continue
|
||||||
|
size = int(getattr(entry, "size", 0) or 0)
|
||||||
|
if size and size > limits.maxFileSize:
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
metadata = getattr(entry, "metadata", {}) or {}
|
||||||
|
externalItemId = metadata.get("id") or entryPath
|
||||||
|
revision = metadata.get("revision") or metadata.get("lastModifiedDateTime")
|
||||||
|
|
||||||
|
await _ingestOne(
|
||||||
|
adapter=adapter,
|
||||||
|
knowledgeService=knowledgeService,
|
||||||
|
runExtractionFn=runExtractionFn,
|
||||||
|
connectionId=connectionId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
userId=userId,
|
||||||
|
entry=entry,
|
||||||
|
entryPath=entryPath,
|
||||||
|
mimeType=mimeType,
|
||||||
|
externalItemId=externalItemId,
|
||||||
|
revision=revision,
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=progressCb,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _ingestOne(
|
||||||
|
*,
|
||||||
|
adapter,
|
||||||
|
knowledgeService,
|
||||||
|
runExtractionFn,
|
||||||
|
connectionId: str,
|
||||||
|
mandateId: str,
|
||||||
|
userId: str,
|
||||||
|
entry,
|
||||||
|
entryPath: str,
|
||||||
|
mimeType: str,
|
||||||
|
externalItemId: str,
|
||||||
|
revision: Optional[str],
|
||||||
|
limits: SharepointBootstrapLimits,
|
||||||
|
result: SharepointBootstrapResult,
|
||||||
|
progressCb: Optional[Callable[[int, Optional[str]], None]],
|
||||||
|
) -> None:
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
||||||
|
syntheticFileId = _syntheticFileId(connectionId, externalItemId)
|
||||||
|
fileName = getattr(entry, "name", "") or externalItemId
|
||||||
|
|
||||||
|
try:
|
||||||
|
fileBytes = await adapter.download(entryPath)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("sharepoint download %s failed: %s", entryPath, exc)
|
||||||
|
result.failed += 1
|
||||||
|
result.errors.append(f"download({entryPath}): {exc}")
|
||||||
|
return
|
||||||
|
if not fileBytes:
|
||||||
|
result.failed += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
result.bytesProcessed += len(fileBytes)
|
||||||
|
|
||||||
|
try:
|
||||||
|
extracted = runExtractionFn(
|
||||||
|
fileBytes, fileName, mimeType,
|
||||||
|
ExtractionOptions(mergeStrategy=None),
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("sharepoint extraction %s failed: %s", entryPath, exc)
|
||||||
|
result.failed += 1
|
||||||
|
result.errors.append(f"extract({entryPath}): {exc}")
|
||||||
|
return
|
||||||
|
|
||||||
|
contentObjects = _toContentObjects(extracted, fileName)
|
||||||
|
if not contentObjects:
|
||||||
|
result.skippedPolicy += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
provenance: Dict[str, Any] = {
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"authority": "msft",
|
||||||
|
"service": "sharepoint",
|
||||||
|
"externalItemId": externalItemId,
|
||||||
|
"externalPath": entryPath,
|
||||||
|
"revision": revision,
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
handle = await knowledgeService.requestIngestion(
|
||||||
|
IngestionJob(
|
||||||
|
sourceKind="sharepoint_item",
|
||||||
|
sourceId=syntheticFileId,
|
||||||
|
fileName=fileName,
|
||||||
|
mimeType=mimeType,
|
||||||
|
userId=userId,
|
||||||
|
mandateId=mandateId,
|
||||||
|
contentObjects=contentObjects,
|
||||||
|
contentVersion=revision,
|
||||||
|
neutralize=limits.neutralize,
|
||||||
|
provenance=provenance,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("sharepoint ingestion %s failed: %s", entryPath, exc, exc_info=True)
|
||||||
|
result.failed += 1
|
||||||
|
result.errors.append(f"ingest({entryPath}): {exc}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if handle.status == "duplicate":
|
||||||
|
result.skippedDuplicate += 1
|
||||||
|
elif handle.status == "indexed":
|
||||||
|
result.indexed += 1
|
||||||
|
else:
|
||||||
|
result.failed += 1
|
||||||
|
if handle.error:
|
||||||
|
result.errors.append(f"ingest({entryPath}): {handle.error}")
|
||||||
|
|
||||||
|
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
|
||||||
|
processed = result.indexed + result.skippedDuplicate
|
||||||
|
try:
|
||||||
|
progressCb(
|
||||||
|
min(90, 10 + int(80 * processed / max(1, limits.maxItems))),
|
||||||
|
f"sharepoint processed={processed}",
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.progress part=sharepoint processed=%d skippedDup=%d failed=%d",
|
||||||
|
processed, result.skippedDuplicate, result.failed,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.progress",
|
||||||
|
"part": "sharepoint",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"processed": processed,
|
||||||
|
"skippedDup": result.skippedDuplicate,
|
||||||
|
"failed": result.failed,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Yield so the event loop can interleave other tasks (download/extract are
|
||||||
|
# CPU-ish and extraction uses sync libs; cooperative scheduling prevents
|
||||||
|
# starving other workers).
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
|
||||||
|
def _finalizeResult(connectionId: str, result: SharepointBootstrapResult, startMs: float) -> Dict[str, Any]:
|
||||||
|
durationMs = int((time.time() - startMs) * 1000)
|
||||||
|
logger.info(
|
||||||
|
"ingestion.connection.bootstrap.done part=sharepoint connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d",
|
||||||
|
connectionId,
|
||||||
|
result.indexed, result.skippedDuplicate, result.skippedPolicy, result.failed,
|
||||||
|
durationMs,
|
||||||
|
extra={
|
||||||
|
"event": "ingestion.connection.bootstrap.done",
|
||||||
|
"part": "sharepoint",
|
||||||
|
"connectionId": connectionId,
|
||||||
|
"indexed": result.indexed,
|
||||||
|
"skippedDup": result.skippedDuplicate,
|
||||||
|
"skippedPolicy": result.skippedPolicy,
|
||||||
|
"failed": result.failed,
|
||||||
|
"durationMs": durationMs,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"connectionId": result.connectionId,
|
||||||
|
"indexed": result.indexed,
|
||||||
|
"skippedDuplicate": result.skippedDuplicate,
|
||||||
|
"skippedPolicy": result.skippedPolicy,
|
||||||
|
"failed": result.failed,
|
||||||
|
"bytesProcessed": result.bytesProcessed,
|
||||||
|
"durationMs": durationMs,
|
||||||
|
"errors": result.errors[:20],
|
||||||
|
}
|
||||||
107
modules/serviceCenter/services/serviceKnowledge/subTextClean.py
Normal file
107
modules/serviceCenter/services/serviceKnowledge/subTextClean.py
Normal file
|
|
@ -0,0 +1,107 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Text normalisation utilities used by knowledge ingestion.
|
||||||
|
|
||||||
|
The email body cleaning logic is intentionally regex-based and works on plain
|
||||||
|
text after an HTML→text pass so we never store unsanitised HTML/JS in the
|
||||||
|
knowledge store and retrieval stays robust (no extraneous markup tokens
|
||||||
|
eating embedding budget).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
DEFAULT_MAX_CHARS = 8000
|
||||||
|
|
||||||
|
|
||||||
|
_QUOTE_MARKER_PATTERNS = [
|
||||||
|
re.compile(r"^\s*(?:On\s.+?\swrote:)\s*$", re.MULTILINE | re.IGNORECASE),
|
||||||
|
re.compile(r"^\s*(?:Am\s.+?\sschrieb.+?:)\s*$", re.MULTILINE | re.IGNORECASE),
|
||||||
|
re.compile(r"^\s*-{2,}\s*Original\s*Message\s*-{2,}\s*$", re.MULTILINE | re.IGNORECASE),
|
||||||
|
re.compile(r"^\s*-{2,}\s*Urspr.+Nachricht\s*-{2,}\s*$", re.MULTILINE | re.IGNORECASE),
|
||||||
|
re.compile(r"^\s*From:\s+.+$", re.MULTILINE | re.IGNORECASE),
|
||||||
|
re.compile(r"^\s*Von:\s+.+$", re.MULTILINE | re.IGNORECASE),
|
||||||
|
re.compile(r"^\s*Sent:\s+.+$", re.MULTILINE | re.IGNORECASE),
|
||||||
|
re.compile(r"^\s*Gesendet:\s+.+$", re.MULTILINE | re.IGNORECASE),
|
||||||
|
]
|
||||||
|
|
||||||
|
_SIGNATURE_MARKERS = [
|
||||||
|
re.compile(r"^\s*-{2,}\s*$", re.MULTILINE),
|
||||||
|
re.compile(r"^\s*—\s*$", re.MULTILINE),
|
||||||
|
re.compile(r"^\s*Best regards\b.*$", re.MULTILINE | re.IGNORECASE),
|
||||||
|
re.compile(r"^\s*Kind regards\b.*$", re.MULTILINE | re.IGNORECASE),
|
||||||
|
re.compile(r"^\s*Mit freundlichen Gr[üu]ßen\b.*$", re.MULTILINE | re.IGNORECASE),
|
||||||
|
re.compile(r"^\s*Viele Gr[üu]ße\b.*$", re.MULTILINE | re.IGNORECASE),
|
||||||
|
re.compile(r"^\s*Best,\s*$", re.MULTILINE | re.IGNORECASE),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _htmlToText(html: str) -> str:
|
||||||
|
"""Prefer BeautifulSoup when available, fall back to regex."""
|
||||||
|
try:
|
||||||
|
from bs4 import BeautifulSoup # type: ignore
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
for tag in soup(["script", "style", "head"]):
|
||||||
|
tag.decompose()
|
||||||
|
for br in soup.find_all(["br"]):
|
||||||
|
br.replace_with("\n")
|
||||||
|
for p in soup.find_all(["p", "div", "li", "tr"]):
|
||||||
|
p.append("\n")
|
||||||
|
text = soup.get_text()
|
||||||
|
except Exception:
|
||||||
|
# Minimal fallback: strip tags crudely.
|
||||||
|
text = re.sub(r"<br\s*/?>", "\n", html, flags=re.IGNORECASE)
|
||||||
|
text = re.sub(r"</(?:p|div|li|tr)>", "\n", text, flags=re.IGNORECASE)
|
||||||
|
text = re.sub(r"<[^>]+>", "", text)
|
||||||
|
# Collapse non-breaking + zero-width whitespace.
|
||||||
|
text = text.replace("\u00a0", " ").replace("\u200b", "")
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def _stripQuotedThread(text: str) -> str:
|
||||||
|
"""Remove reply-chain content so only the author's own contribution remains."""
|
||||||
|
earliest = len(text)
|
||||||
|
for pattern in _QUOTE_MARKER_PATTERNS:
|
||||||
|
match = pattern.search(text)
|
||||||
|
if match and match.start() < earliest:
|
||||||
|
earliest = match.start()
|
||||||
|
# Drop any block starting with "> " quoted lines (often Gmail/Thunderbird).
|
||||||
|
quotedBlock = re.search(r"^(?:\s*>.*\n?)+", text, re.MULTILINE)
|
||||||
|
if quotedBlock and quotedBlock.start() < earliest:
|
||||||
|
earliest = quotedBlock.start()
|
||||||
|
return text[:earliest].rstrip()
|
||||||
|
|
||||||
|
|
||||||
|
def _stripSignature(text: str) -> str:
|
||||||
|
earliest = len(text)
|
||||||
|
for pattern in _SIGNATURE_MARKERS:
|
||||||
|
match = pattern.search(text)
|
||||||
|
if match and match.start() < earliest:
|
||||||
|
earliest = match.start()
|
||||||
|
return text[:earliest].rstrip()
|
||||||
|
|
||||||
|
|
||||||
|
def _collapseWhitespace(text: str) -> str:
|
||||||
|
text = re.sub(r"[ \t]+", " ", text)
|
||||||
|
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def cleanEmailBody(html: str, maxChars: Optional[int] = DEFAULT_MAX_CHARS) -> str:
|
||||||
|
"""Return a compact plain-text view of an email body suitable for embedding.
|
||||||
|
|
||||||
|
Steps: HTML → text, remove quoted reply chain, remove signature, collapse
|
||||||
|
whitespace, truncate to maxChars. Always returns a string (possibly empty).
|
||||||
|
"""
|
||||||
|
if not html:
|
||||||
|
return ""
|
||||||
|
text = _htmlToText(html) if "<" in html and ">" in html else html
|
||||||
|
text = _stripQuotedThread(text)
|
||||||
|
text = _stripSignature(text)
|
||||||
|
text = _collapseWhitespace(text)
|
||||||
|
if maxChars and len(text) > maxChars:
|
||||||
|
text = text[:maxChars].rstrip() + "…"
|
||||||
|
return text
|
||||||
|
|
@ -100,12 +100,18 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
# Update progress - preparing parameters
|
# Update progress - preparing parameters
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
|
self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
|
||||||
|
|
||||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
from modules.datamodels.datamodelDocref import (
|
||||||
|
DocumentReferenceList,
|
||||||
|
coerceDocumentReferenceList,
|
||||||
|
)
|
||||||
|
|
||||||
documentListParam = parameters.get("documentList")
|
documentListParam = parameters.get("documentList")
|
||||||
inline_content_parts: Optional[List[ContentPart]] = None
|
inline_content_parts: Optional[List[ContentPart]] = None
|
||||||
|
|
||||||
# Handle inline ActionDocuments (e.g. from SharePoint/email in automation2 – no persistence)
|
# Inline ActionDocuments (SharePoint/email in automation2, no
|
||||||
|
# persistence) are list[ActionDocument-like dict] -- handled
|
||||||
|
# separately because they carry pre-extracted content. Everything
|
||||||
|
# else is normalised through the tolerant coercer.
|
||||||
is_inline = (
|
is_inline = (
|
||||||
isinstance(documentListParam, list)
|
isinstance(documentListParam, list)
|
||||||
and len(documentListParam) > 0
|
and len(documentListParam) > 0
|
||||||
|
|
@ -117,28 +123,12 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"ai.process: Extracted {len(inline_content_parts)} ContentParts from {len(documentListParam)} inline ActionDocuments (no persistence)"
|
f"ai.process: Extracted {len(inline_content_parts)} ContentParts from {len(documentListParam)} inline ActionDocuments (no persistence)"
|
||||||
)
|
)
|
||||||
elif documentListParam is None:
|
|
||||||
documentList = DocumentReferenceList(references=[])
|
|
||||||
logger.debug(f"ai.process: documentList is None, using empty DocumentReferenceList")
|
|
||||||
elif isinstance(documentListParam, DocumentReferenceList):
|
|
||||||
documentList = documentListParam
|
|
||||||
logger.info(f"ai.process: Received DocumentReferenceList with {len(documentList.references)} references")
|
|
||||||
for idx, ref in enumerate(documentList.references):
|
|
||||||
logger.info(f" Reference {idx + 1}: documentId={ref.documentId}, type={type(ref).__name__}")
|
|
||||||
elif isinstance(documentListParam, str):
|
|
||||||
documentList = DocumentReferenceList.from_string_list([documentListParam])
|
|
||||||
logger.info(f"ai.process: Converted string to DocumentReferenceList with {len(documentList.references)} references")
|
|
||||||
elif isinstance(documentListParam, list):
|
|
||||||
first = documentListParam[0] if documentListParam else None
|
|
||||||
logger.info(
|
|
||||||
f"ai.process: documentList is list of {len(documentListParam)} items, "
|
|
||||||
f"first type={type(first).__name__}, has_documentData={_is_action_document_like(first) if first else False}"
|
|
||||||
)
|
|
||||||
documentList = DocumentReferenceList.from_string_list(documentListParam)
|
|
||||||
logger.info(f"ai.process: Converted list to DocumentReferenceList with {len(documentList.references)} references")
|
|
||||||
else:
|
else:
|
||||||
logger.error(f"Invalid documentList type: {type(documentListParam)}")
|
documentList = coerceDocumentReferenceList(documentListParam)
|
||||||
documentList = DocumentReferenceList(references=[])
|
logger.info(
|
||||||
|
f"ai.process: Coerced documentList ({type(documentListParam).__name__}) "
|
||||||
|
f"to DocumentReferenceList with {len(documentList.references)} references"
|
||||||
|
)
|
||||||
|
|
||||||
# Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only.
|
# Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only.
|
||||||
resultType = parameters.get("resultType")
|
resultType = parameters.get("resultType")
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,10 @@ import logging
|
||||||
import time
|
import time
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
from modules.datamodels.datamodelDocref import (
|
||||||
|
DocumentReferenceList,
|
||||||
|
coerceDocumentReferenceList,
|
||||||
|
)
|
||||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
|
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -16,20 +19,17 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||||
operationId = f"context_extract_{workflowId}_{int(time.time())}"
|
operationId = f"context_extract_{workflowId}_{int(time.time())}"
|
||||||
|
|
||||||
# Extract documentList from parameters dict
|
|
||||||
documentListParam = parameters.get("documentList")
|
documentListParam = parameters.get("documentList")
|
||||||
if not documentListParam:
|
if not documentListParam:
|
||||||
return ActionResult.isFailure(error="documentList is required")
|
return ActionResult.isFailure(error="documentList is required")
|
||||||
|
|
||||||
# Convert to DocumentReferenceList if needed
|
documentList = coerceDocumentReferenceList(documentListParam)
|
||||||
if isinstance(documentListParam, DocumentReferenceList):
|
if not documentList.references:
|
||||||
documentList = documentListParam
|
return ActionResult.isFailure(
|
||||||
elif isinstance(documentListParam, str):
|
error=f"documentList could not be parsed (type={type(documentListParam).__name__}); "
|
||||||
documentList = DocumentReferenceList.from_string_list([documentListParam])
|
f"expected DocumentReferenceList, list of strings/dicts, or "
|
||||||
elif isinstance(documentListParam, list):
|
f"a wrapper dict like {{'documents': [...]}}"
|
||||||
documentList = DocumentReferenceList.from_string_list(documentListParam)
|
)
|
||||||
else:
|
|
||||||
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
|
|
||||||
|
|
||||||
# Start progress tracking
|
# Start progress tracking
|
||||||
parentOperationId = parameters.get('parentOperationId')
|
parentOperationId = parameters.get('parentOperationId')
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,10 @@ import logging
|
||||||
import time
|
import time
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
from modules.datamodels.datamodelDocref import (
|
||||||
|
DocumentReferenceList,
|
||||||
|
coerceDocumentReferenceList,
|
||||||
|
)
|
||||||
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
|
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -26,20 +29,15 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
if not neutralizationEnabled:
|
if not neutralizationEnabled:
|
||||||
logger.info("Neutralization is not enabled, returning documents unchanged")
|
logger.info("Neutralization is not enabled, returning documents unchanged")
|
||||||
# Return original documents if neutralization is disabled
|
# Return original documents if neutralization is disabled
|
||||||
# Get documents from documentList
|
|
||||||
documentListParam = parameters.get("documentList")
|
documentListParam = parameters.get("documentList")
|
||||||
if not documentListParam:
|
if not documentListParam:
|
||||||
return ActionResult.isFailure(error="documentList is required")
|
return ActionResult.isFailure(error="documentList is required")
|
||||||
|
|
||||||
# Convert to DocumentReferenceList if needed
|
documentList = coerceDocumentReferenceList(documentListParam)
|
||||||
if isinstance(documentListParam, DocumentReferenceList):
|
if not documentList.references:
|
||||||
documentList = documentListParam
|
return ActionResult.isFailure(
|
||||||
elif isinstance(documentListParam, str):
|
error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
|
||||||
documentList = DocumentReferenceList.from_string_list([documentListParam])
|
)
|
||||||
elif isinstance(documentListParam, list):
|
|
||||||
documentList = DocumentReferenceList.from_string_list(documentListParam)
|
|
||||||
else:
|
|
||||||
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
|
|
||||||
|
|
||||||
# Get ChatDocuments from documentList
|
# Get ChatDocuments from documentList
|
||||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||||
|
|
@ -65,20 +63,15 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
|
|
||||||
return ActionResult.isSuccess(documents=actionDocuments)
|
return ActionResult.isSuccess(documents=actionDocuments)
|
||||||
|
|
||||||
# Extract documentList from parameters dict
|
|
||||||
documentListParam = parameters.get("documentList")
|
documentListParam = parameters.get("documentList")
|
||||||
if not documentListParam:
|
if not documentListParam:
|
||||||
return ActionResult.isFailure(error="documentList is required")
|
return ActionResult.isFailure(error="documentList is required")
|
||||||
|
|
||||||
# Convert to DocumentReferenceList if needed
|
documentList = coerceDocumentReferenceList(documentListParam)
|
||||||
if isinstance(documentListParam, DocumentReferenceList):
|
if not documentList.references:
|
||||||
documentList = documentListParam
|
return ActionResult.isFailure(
|
||||||
elif isinstance(documentListParam, str):
|
error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
|
||||||
documentList = DocumentReferenceList.from_string_list([documentListParam])
|
)
|
||||||
elif isinstance(documentListParam, list):
|
|
||||||
documentList = DocumentReferenceList.from_string_list(documentListParam)
|
|
||||||
else:
|
|
||||||
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
|
|
||||||
|
|
||||||
# Start progress tracking
|
# Start progress tracking
|
||||||
parentOperationId = parameters.get('parentOperationId')
|
parentOperationId = parameters.get('parentOperationId')
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,9 @@ from modules.datamodels.datamodelChat import ActionResult, ActionItem, TaskStep
|
||||||
from modules.datamodels.datamodelChat import ChatWorkflow
|
from modules.datamodels.datamodelChat import ChatWorkflow
|
||||||
from modules.workflows.processing.shared.methodDiscovery import methods
|
from modules.workflows.processing.shared.methodDiscovery import methods
|
||||||
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
|
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
|
||||||
|
from modules.workflows.processing.shared.parameterValidation import (
|
||||||
|
InvalidActionParameterError, validateAndCoerceParameters,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -20,20 +23,32 @@ class ActionExecutor:
|
||||||
|
|
||||||
|
|
||||||
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""Execute a method action"""
|
"""Execute a method action with validated/coerced parameters.
|
||||||
|
|
||||||
|
Parameter validation is centralised here so the contract holds for
|
||||||
|
every execution path (agent tool calls, workflow graph nodes,
|
||||||
|
REST routes) — actions can rely on declared types without
|
||||||
|
defensive isinstance branches.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
if methodName not in methods:
|
if methodName not in methods:
|
||||||
raise ValueError(f"Unknown method: {methodName}")
|
raise ValueError(f"Unknown method: {methodName}")
|
||||||
|
|
||||||
method = methods[methodName]
|
method = methods[methodName]
|
||||||
if actionName not in method['actions']:
|
if actionName not in method['actions']:
|
||||||
raise ValueError(f"Unknown action: {actionName} for method {methodName}")
|
raise ValueError(f"Unknown action: {actionName} for method {methodName}")
|
||||||
|
|
||||||
action = method['actions'][actionName]
|
action = method['actions'][actionName]
|
||||||
|
|
||||||
# Execute the action
|
actionDef = method['instance']._actions.get(actionName)
|
||||||
|
if actionDef is not None:
|
||||||
|
parameters = validateAndCoerceParameters(actionDef, parameters or {})
|
||||||
|
|
||||||
return await action['method'](parameters)
|
return await action['method'](parameters)
|
||||||
|
|
||||||
|
except InvalidActionParameterError as e:
|
||||||
|
logger.error(f"Invalid parameters for {methodName}.{actionName}: {e}")
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}")
|
logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
|
||||||
198
modules/workflows/processing/shared/parameterValidation.py
Normal file
198
modules/workflows/processing/shared/parameterValidation.py
Normal file
|
|
@ -0,0 +1,198 @@
|
||||||
|
# Copyright (c) 2026 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Universal parameter validation + coercion for workflow actions.
|
||||||
|
|
||||||
|
Workflow actions historically received their ``parameters`` as a raw
|
||||||
|
``Dict[str, Any]`` with no enforcement of the declared parameter schema.
|
||||||
|
That implicit contract masked two whole classes of bugs:
|
||||||
|
|
||||||
|
1. **Type confusion at the agent boundary.** The agent's tool schema
|
||||||
|
(Phase-3 Typed Action Architecture) exposes ``FeatureInstanceRef`` /
|
||||||
|
``ConnectionRef`` etc. as typed *objects* with ``id`` plus a
|
||||||
|
discriminator (``featureCode`` / ``authority``) so the LLM can pick
|
||||||
|
the right instance among several. The action implementations, however,
|
||||||
|
use the value as a bare UUID string in ``recordFilter={"col": <value>}``.
|
||||||
|
Without normalization Postgres fails with "can't adapt type 'dict'",
|
||||||
|
the connector's previous swallow-and-return-[] hid the failure, and the
|
||||||
|
action returned the misleading "no record found" error.
|
||||||
|
|
||||||
|
2. **Unchecked optional flags.** ``forceRefresh`` arriving as the string
|
||||||
|
``"true"`` instead of a real bool, ``periodMonth`` arriving as ``"12"``
|
||||||
|
instead of ``12``, etc. Every action grew its own ad-hoc coercion code.
|
||||||
|
|
||||||
|
This module centralises validation and coercion at exactly one boundary:
|
||||||
|
``ActionExecutor.executeAction``. By the time the action body runs, the
|
||||||
|
``parameters`` dict is guaranteed to satisfy the declared schema.
|
||||||
|
|
||||||
|
Unknown extra keys (e.g. ``parentOperationId`` injected by the executor,
|
||||||
|
``expectedDocumentFormats`` from action items) are passed through
|
||||||
|
untouched — the schema only constrains *declared* parameters.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidActionParameterError(ValueError):
|
||||||
|
"""Raised when a declared action parameter is missing, malformed, or
|
||||||
|
cannot be coerced into the declared type.
|
||||||
|
|
||||||
|
The message identifies the action and parameter so the agent and
|
||||||
|
workflow log can pinpoint the offending call instead of getting an
|
||||||
|
opaque downstream "no record found" or "can't adapt type 'X'".
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, actionId: str, paramName: str, reason: str):
|
||||||
|
super().__init__(f"{actionId}.{paramName}: {reason}")
|
||||||
|
self.actionId = actionId
|
||||||
|
self.paramName = paramName
|
||||||
|
self.reason = reason
|
||||||
|
|
||||||
|
|
||||||
|
_TRUE_STRINGS = {"true", "1", "yes", "on"}
|
||||||
|
_FALSE_STRINGS = {"false", "0", "no", "off", ""}
|
||||||
|
|
||||||
|
|
||||||
|
def _isRefSchema(typeStr: str) -> bool:
|
||||||
|
"""A declared type is a Ref-Schema iff its name ends with ``Ref`` AND it
|
||||||
|
resolves to a PORT_TYPE_CATALOG schema with an ``id`` field.
|
||||||
|
|
||||||
|
The catalog is imported lazily to keep this module light at startup.
|
||||||
|
"""
|
||||||
|
if not typeStr or not typeStr.endswith("Ref"):
|
||||||
|
return False
|
||||||
|
from modules.features.graphicalEditor.portTypes import PORT_TYPE_CATALOG
|
||||||
|
schema = PORT_TYPE_CATALOG.get(typeStr)
|
||||||
|
if schema is None:
|
||||||
|
return False
|
||||||
|
return any(f.name == "id" for f in schema.fields)
|
||||||
|
|
||||||
|
|
||||||
|
def _coerceRef(actionId: str, paramName: str, value: Any) -> Optional[str]:
|
||||||
|
"""Collapse a Ref payload to its ``id`` string.
|
||||||
|
|
||||||
|
Accepts:
|
||||||
|
* already a string → returned as-is (workflow execution path),
|
||||||
|
* dict with non-empty ``id`` field → returns the id (agent path),
|
||||||
|
* ``None`` → returned as-is so optional Ref params stay optional.
|
||||||
|
"""
|
||||||
|
if value is None or isinstance(value, str):
|
||||||
|
return value
|
||||||
|
if isinstance(value, dict):
|
||||||
|
refId = value.get("id")
|
||||||
|
if isinstance(refId, str) and refId:
|
||||||
|
return refId
|
||||||
|
raise InvalidActionParameterError(
|
||||||
|
actionId, paramName,
|
||||||
|
f"Ref payload missing or empty 'id' field: {value!r}",
|
||||||
|
)
|
||||||
|
raise InvalidActionParameterError(
|
||||||
|
actionId, paramName,
|
||||||
|
f"Ref must be a string id or {{'id': ...}} dict, got {type(value).__name__}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _coercePrimitive(actionId: str, paramName: str, value: Any, typeStr: str) -> Any:
|
||||||
|
"""Best-effort coercion of primitive types from string-form payloads.
|
||||||
|
|
||||||
|
The agent's JSON tool calls deliver everything as strings/numbers; the
|
||||||
|
workflow executor passes through raw template values which are also
|
||||||
|
often strings. Coercing here removes ad-hoc ``isinstance(x, str)``
|
||||||
|
branches inside every action.
|
||||||
|
"""
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
if typeStr == "bool":
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return value
|
||||||
|
if isinstance(value, str):
|
||||||
|
lower = value.strip().lower()
|
||||||
|
if lower in _TRUE_STRINGS:
|
||||||
|
return True
|
||||||
|
if lower in _FALSE_STRINGS:
|
||||||
|
return False
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
return bool(value)
|
||||||
|
raise InvalidActionParameterError(
|
||||||
|
actionId, paramName, f"cannot coerce {value!r} to bool",
|
||||||
|
)
|
||||||
|
if typeStr == "int":
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return int(value)
|
||||||
|
if isinstance(value, int):
|
||||||
|
return value
|
||||||
|
if isinstance(value, str) and value.strip():
|
||||||
|
try:
|
||||||
|
return int(value.strip(), 10)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
if isinstance(value, float) and value.is_integer():
|
||||||
|
return int(value)
|
||||||
|
raise InvalidActionParameterError(
|
||||||
|
actionId, paramName, f"cannot coerce {value!r} to int",
|
||||||
|
)
|
||||||
|
if typeStr == "float":
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
return float(value)
|
||||||
|
if isinstance(value, str) and value.strip():
|
||||||
|
try:
|
||||||
|
return float(value.strip())
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
raise InvalidActionParameterError(
|
||||||
|
actionId, paramName, f"cannot coerce {value!r} to float",
|
||||||
|
)
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def validateAndCoerceParameters(actionDef, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Validate and coerce ``parameters`` against ``actionDef.parameters``.
|
||||||
|
|
||||||
|
Behaviour per declared parameter:
|
||||||
|
|
||||||
|
* **Missing + required** → raises ``InvalidActionParameterError``.
|
||||||
|
* **Missing + optional** → left absent (action uses its own default).
|
||||||
|
* **Present + Ref-Schema (e.g. FeatureInstanceRef)** → ``{id: ..., ...}``
|
||||||
|
collapsed to the bare id string; pass-through if already a string.
|
||||||
|
* **Present + primitive (bool/int/float)** → coerced from common
|
||||||
|
string forms (e.g. ``"true"`` → ``True``).
|
||||||
|
* **Present + other types** (catalog objects, ``str``, ``Any``,
|
||||||
|
containers) → passed through untouched.
|
||||||
|
|
||||||
|
Unknown keys (e.g. ``parentOperationId``, ``expectedDocumentFormats``,
|
||||||
|
ad-hoc fields injected by the executor) are passed through unchanged.
|
||||||
|
|
||||||
|
Returns a new dict (does not mutate the caller's parameters).
|
||||||
|
"""
|
||||||
|
if not parameters:
|
||||||
|
parameters = {}
|
||||||
|
actionId = getattr(actionDef, "actionId", None) or "<unknown.action>"
|
||||||
|
declared = getattr(actionDef, "parameters", {}) or {}
|
||||||
|
|
||||||
|
coerced: Dict[str, Any] = dict(parameters)
|
||||||
|
|
||||||
|
for paramName, paramSchema in declared.items():
|
||||||
|
typeStr = getattr(paramSchema, "type", None) or "Any"
|
||||||
|
required = bool(getattr(paramSchema, "required", False))
|
||||||
|
|
||||||
|
if paramName not in coerced or coerced[paramName] is None:
|
||||||
|
if required:
|
||||||
|
raise InvalidActionParameterError(
|
||||||
|
actionId, paramName, "required parameter missing",
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
rawValue = coerced[paramName]
|
||||||
|
|
||||||
|
if _isRefSchema(typeStr):
|
||||||
|
coerced[paramName] = _coerceRef(actionId, paramName, rawValue)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if typeStr in ("bool", "int", "float"):
|
||||||
|
coerced[paramName] = _coercePrimitive(actionId, paramName, rawValue, typeStr)
|
||||||
|
continue
|
||||||
|
|
||||||
|
return coerced
|
||||||
0
tests/unit/aicore/__init__.py
Normal file
0
tests/unit/aicore/__init__.py
Normal file
66
tests/unit/aicore/test_aicorePluginOpenai_temperature.py
Normal file
66
tests/unit/aicore/test_aicorePluginOpenai_temperature.py
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
# Copyright (c) 2026 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Unit tests: temperature handling for OpenAI chat-completions models.
|
||||||
|
|
||||||
|
Historical regression: every payload sent ``temperature=0.2``. After the
|
||||||
|
GPT-5 launch OpenAI rejects any non-default temperature for the GPT-5.x
|
||||||
|
and o-series (o1/o3/o4) reasoning models with HTTP 400::
|
||||||
|
|
||||||
|
"Unsupported value: 'temperature' does not support 0.2 with this
|
||||||
|
model. Only the default (1) value is supported."
|
||||||
|
|
||||||
|
The fix is a single helper, ``_supportsCustomTemperature``, that is
|
||||||
|
consulted before adding the field to the outgoing payload. These tests
|
||||||
|
pin the contract:
|
||||||
|
|
||||||
|
* legacy chat models (gpt-4o, gpt-4o-mini, gpt-4.1, gpt-3.5-*) keep
|
||||||
|
honoring custom temperatures,
|
||||||
|
* every gpt-5.x and o1/o3/o4 variant must omit the field entirely.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from modules.aicore.aicorePluginOpenai import _supportsCustomTemperature
|
||||||
|
|
||||||
|
|
||||||
|
class TestSupportsCustomTemperature:
|
||||||
|
"""Pure model-name classification - no network, no payload assembly."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"modelName",
|
||||||
|
[
|
||||||
|
"gpt-4o",
|
||||||
|
"gpt-4o-mini",
|
||||||
|
"gpt-4.1",
|
||||||
|
"gpt-3.5-turbo",
|
||||||
|
"text-embedding-3-small",
|
||||||
|
"dall-e-3",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def testLegacyModelsAcceptCustomTemperature(self, modelName):
|
||||||
|
assert _supportsCustomTemperature(modelName) is True
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"modelName",
|
||||||
|
[
|
||||||
|
"gpt-5",
|
||||||
|
"gpt-5.4",
|
||||||
|
"gpt-5.4-mini",
|
||||||
|
"gpt-5.4-nano",
|
||||||
|
"gpt-5.5",
|
||||||
|
"GPT-5.5",
|
||||||
|
"o1",
|
||||||
|
"o1-mini",
|
||||||
|
"o3",
|
||||||
|
"o3-mini",
|
||||||
|
"o4-mini",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def testReasoningModelsRejectCustomTemperature(self, modelName):
|
||||||
|
assert _supportsCustomTemperature(modelName) is False
|
||||||
|
|
||||||
|
def testEmptyOrNoneModelDefaultsToSupported(self):
|
||||||
|
# Defensive: unknown/empty names should not silently break legacy paths.
|
||||||
|
assert _supportsCustomTemperature("") is True
|
||||||
|
assert _supportsCustomTemperature(None) is True
|
||||||
0
tests/unit/connectors/__init__.py
Normal file
0
tests/unit/connectors/__init__.py
Normal file
158
tests/unit/connectors/test_connectorDbPostgre_failLoud.py
Normal file
158
tests/unit/connectors/test_connectorDbPostgre_failLoud.py
Normal file
|
|
@ -0,0 +1,158 @@
|
||||||
|
# Copyright (c) 2026 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Unit tests: PostgreSQL connector raises DatabaseQueryError on real failures.
|
||||||
|
|
||||||
|
Historical regression: ``getRecordset`` and friends used to swallow every
|
||||||
|
exception (``except Exception: log; return []``), which turned every kind of
|
||||||
|
broken query into "no rows found". That hid bugs like:
|
||||||
|
|
||||||
|
* dict passed where Postgres expected a UUID string ("can't adapt type 'dict'"),
|
||||||
|
* missing/renamed columns after an incomplete schema migration,
|
||||||
|
* dropped tables, lost connections, etc.
|
||||||
|
|
||||||
|
These tests pin the new contract: empty result sets still return ``[]`` /
|
||||||
|
``None`` (normal), but any exception inside the query path propagates as
|
||||||
|
``DatabaseQueryError`` with the table name attached. The transaction is
|
||||||
|
rolled back so the connection is usable for subsequent queries.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import psycopg2.errors
|
||||||
|
|
||||||
|
from modules.connectors.connectorDbPostgre import (
|
||||||
|
DatabaseConnector,
|
||||||
|
DatabaseQueryError,
|
||||||
|
_rollbackQuietly,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DummyTable:
|
||||||
|
"""Stand-in for a Pydantic model so we can drive the connector without a real DB.
|
||||||
|
|
||||||
|
The connector reads ``model_class.__name__`` to derive the SQL table name,
|
||||||
|
so the class name itself becomes the asserted table name in tests.
|
||||||
|
"""
|
||||||
|
model_fields = {}
|
||||||
|
|
||||||
|
|
||||||
|
def _makeConnector(cursorBehavior):
|
||||||
|
"""Build a ``DatabaseConnector`` skeleton with mocked connection/cursor.
|
||||||
|
|
||||||
|
``cursorBehavior`` is a callable invoked with the cursor mock so the test
|
||||||
|
can configure ``execute``/``fetchall``/``fetchone`` per scenario.
|
||||||
|
"""
|
||||||
|
connector = DatabaseConnector.__new__(DatabaseConnector)
|
||||||
|
cursor = MagicMock()
|
||||||
|
cursorContext = MagicMock()
|
||||||
|
cursorContext.__enter__ = MagicMock(return_value=cursor)
|
||||||
|
cursorContext.__exit__ = MagicMock(return_value=False)
|
||||||
|
|
||||||
|
connection = MagicMock()
|
||||||
|
connection.cursor.return_value = cursorContext
|
||||||
|
connector.connection = connection
|
||||||
|
|
||||||
|
connector._ensureTableExists = MagicMock(return_value=True)
|
||||||
|
connector._systemTableName = "_system"
|
||||||
|
|
||||||
|
cursorBehavior(cursor)
|
||||||
|
return connector, connection, cursor
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetRecordsetFailLoud:
|
||||||
|
def test_emptyResultStillReturnsList(self):
|
||||||
|
"""No rows → []; this is the normal happy path, not a failure."""
|
||||||
|
def behavior(cursor):
|
||||||
|
cursor.execute.return_value = None
|
||||||
|
cursor.fetchall.return_value = []
|
||||||
|
connector, connection, _ = _makeConnector(behavior)
|
||||||
|
|
||||||
|
result = connector.getRecordset(DummyTable)
|
||||||
|
assert result == []
|
||||||
|
connection.rollback.assert_not_called()
|
||||||
|
|
||||||
|
def test_dictAdaptErrorRaisesDatabaseQueryError(self):
|
||||||
|
"""Reproduces the Trustee bug: passing a dict in WHERE → can't adapt → raise."""
|
||||||
|
def behavior(cursor):
|
||||||
|
cursor.execute.side_effect = psycopg2.ProgrammingError(
|
||||||
|
"can't adapt type 'dict'"
|
||||||
|
)
|
||||||
|
connector, connection, _ = _makeConnector(behavior)
|
||||||
|
|
||||||
|
with pytest.raises(DatabaseQueryError) as excinfo:
|
||||||
|
connector.getRecordset(
|
||||||
|
DummyTable,
|
||||||
|
recordFilter={"featureInstanceId": {"id": "uuid", "featureCode": "trustee"}},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert excinfo.value.table == "DummyTable"
|
||||||
|
assert "can't adapt type 'dict'" in str(excinfo.value)
|
||||||
|
assert isinstance(excinfo.value.original, psycopg2.ProgrammingError)
|
||||||
|
connection.rollback.assert_called_once()
|
||||||
|
|
||||||
|
def test_missingColumnRaisesDatabaseQueryError(self):
|
||||||
|
def behavior(cursor):
|
||||||
|
cursor.execute.side_effect = psycopg2.errors.UndefinedColumn(
|
||||||
|
'column "wat" does not exist'
|
||||||
|
)
|
||||||
|
connector, connection, _ = _makeConnector(behavior)
|
||||||
|
|
||||||
|
with pytest.raises(DatabaseQueryError) as excinfo:
|
||||||
|
connector.getRecordset(DummyTable, recordFilter={"wat": "x"})
|
||||||
|
|
||||||
|
assert "wat" in str(excinfo.value)
|
||||||
|
connection.rollback.assert_called_once()
|
||||||
|
|
||||||
|
def test_operationalErrorRaisesDatabaseQueryError(self):
|
||||||
|
"""Connection lost mid-query is also a real failure that must propagate."""
|
||||||
|
def behavior(cursor):
|
||||||
|
cursor.execute.side_effect = psycopg2.OperationalError("connection lost")
|
||||||
|
connector, connection, _ = _makeConnector(behavior)
|
||||||
|
|
||||||
|
with pytest.raises(DatabaseQueryError):
|
||||||
|
connector.getRecordset(DummyTable)
|
||||||
|
connection.rollback.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetRecordFailLoud:
|
||||||
|
def test_recordNotFoundReturnsNone(self):
|
||||||
|
"""`fetchone()` returning None is "row missing", not an error."""
|
||||||
|
def behavior(cursor):
|
||||||
|
cursor.execute.return_value = None
|
||||||
|
cursor.fetchone.return_value = None
|
||||||
|
connector, connection, _ = _makeConnector(behavior)
|
||||||
|
|
||||||
|
result = connector.getRecord(DummyTable, "missing-id")
|
||||||
|
assert result is None
|
||||||
|
connection.rollback.assert_not_called()
|
||||||
|
|
||||||
|
def test_queryErrorRaisesDatabaseQueryError(self):
|
||||||
|
def behavior(cursor):
|
||||||
|
cursor.execute.side_effect = psycopg2.errors.UndefinedTable(
|
||||||
|
'relation "DummyTable" does not exist'
|
||||||
|
)
|
||||||
|
connector, connection, _ = _makeConnector(behavior)
|
||||||
|
|
||||||
|
with pytest.raises(DatabaseQueryError) as excinfo:
|
||||||
|
connector.getRecord(DummyTable, "any-id")
|
||||||
|
|
||||||
|
assert excinfo.value.table == "DummyTable"
|
||||||
|
connection.rollback.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
class TestRollbackQuietly:
|
||||||
|
def test_rollsBackOnLiveConnection(self):
|
||||||
|
connection = MagicMock()
|
||||||
|
_rollbackQuietly(connection)
|
||||||
|
connection.rollback.assert_called_once()
|
||||||
|
|
||||||
|
def test_swallowsRollbackError(self):
|
||||||
|
"""Rollback failure must not mask the original query error."""
|
||||||
|
connection = MagicMock()
|
||||||
|
connection.rollback.side_effect = RuntimeError("rollback failed")
|
||||||
|
_rollbackQuietly(connection)
|
||||||
|
|
||||||
|
def test_noopOnNoneConnection(self):
|
||||||
|
_rollbackQuietly(None)
|
||||||
|
|
@ -125,3 +125,10 @@ class TestConvertParameterSchema:
|
||||||
schema = _convertParameterSchema(actionParams)
|
schema = _convertParameterSchema(actionParams)
|
||||||
assert schema["properties"]["connection"]["type"] == "object"
|
assert schema["properties"]["connection"]["type"] == "object"
|
||||||
assert "id" in schema["properties"]["connection"]["properties"]
|
assert "id" in schema["properties"]["connection"]["properties"]
|
||||||
|
|
||||||
|
|
||||||
|
# Ref-payload normalization (collapsing `{id: ..., featureCode: ...}` to the
|
||||||
|
# bare id string) is no longer the adapter's job — it moved to the central
|
||||||
|
# `parameterValidation.validateAndCoerceParameters` invoked by
|
||||||
|
# `ActionExecutor.executeAction`. Tests for that contract live in
|
||||||
|
# `tests/unit/workflows/test_parameterValidation.py`.
|
||||||
|
|
|
||||||
203
tests/unit/services/test_bootstrap_clickup.py
Normal file
203
tests/unit/services/test_bootstrap_clickup.py
Normal file
|
|
@ -0,0 +1,203 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Bootstrap ClickUp tests with a fake service + knowledge service.
|
||||||
|
|
||||||
|
Verifies:
|
||||||
|
- Teams → spaces → lists (folderless + folder-based) → tasks traversal.
|
||||||
|
- Each task produces a `requestIngestion` call with `sourceKind="clickup_task"`
|
||||||
|
and header + description content-objects.
|
||||||
|
- `date_updated` is forwarded as contentVersion → idempotency.
|
||||||
|
- Recency filter drops tasks older than `maxAgeDays`.
|
||||||
|
- maxWorkspaces / maxListsPerWorkspace / maxTasks caps are respected.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup import (
|
||||||
|
bootstrapClickup,
|
||||||
|
ClickupBootstrapLimits,
|
||||||
|
_syntheticTaskId,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _nowMs(offsetDays: int = 0) -> str:
|
||||||
|
return str(int((time.time() + offsetDays * 86400) * 1000))
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeClickupService:
|
||||||
|
"""Records API calls; serves a canned 1-team / 1-space / 1-list / 2-task layout."""
|
||||||
|
|
||||||
|
def __init__(self, taskCount=2, oldTask=False):
|
||||||
|
self._taskCount = taskCount
|
||||||
|
self._oldTask = oldTask # when True, the second task is 400 days old
|
||||||
|
self.calls = []
|
||||||
|
|
||||||
|
async def getAuthorizedTeams(self):
|
||||||
|
self.calls.append(("getAuthorizedTeams",))
|
||||||
|
return {"teams": [{"id": "team-1", "name": "Acme"}]}
|
||||||
|
|
||||||
|
async def getSpaces(self, team_id: str):
|
||||||
|
self.calls.append(("getSpaces", team_id))
|
||||||
|
return {"spaces": [{"id": "space-1", "name": "Engineering"}]}
|
||||||
|
|
||||||
|
async def getFolderlessLists(self, space_id: str):
|
||||||
|
self.calls.append(("getFolderlessLists", space_id))
|
||||||
|
return {"lists": [{"id": "list-1", "name": "Sprint 1"}]}
|
||||||
|
|
||||||
|
async def getFolders(self, space_id: str):
|
||||||
|
self.calls.append(("getFolders", space_id))
|
||||||
|
return {"folders": [{"id": "folder-1", "name": "Subproject"}]}
|
||||||
|
|
||||||
|
async def getListsInFolder(self, folder_id: str):
|
||||||
|
self.calls.append(("getListsInFolder", folder_id))
|
||||||
|
return {"lists": [{"id": "list-2", "name": "Sub-tasks"}]}
|
||||||
|
|
||||||
|
async def getTasksInList(self, list_id: str, *, page=0, include_closed=False, subtasks=True):
|
||||||
|
self.calls.append(("getTasksInList", list_id, page, include_closed))
|
||||||
|
if page > 0:
|
||||||
|
return {"tasks": []}
|
||||||
|
tasks = []
|
||||||
|
for i in range(self._taskCount):
|
||||||
|
tid = f"{list_id}-task-{i}"
|
||||||
|
offsetDays = -400 if (self._oldTask and i == 1) else 0
|
||||||
|
tasks.append({
|
||||||
|
"id": tid,
|
||||||
|
"name": f"Task {i} of {list_id}",
|
||||||
|
"description": f"Plain description for task {i}",
|
||||||
|
"text_content": f"Rich content for task {i}",
|
||||||
|
"status": {"status": "open" if i == 0 else "closed"},
|
||||||
|
"assignees": [{"username": "alice"}],
|
||||||
|
"tags": [{"name": "urgent"}],
|
||||||
|
"date_updated": _nowMs(offsetDays),
|
||||||
|
"date_created": _nowMs(-1),
|
||||||
|
"url": f"https://app.clickup.com/t/{tid}",
|
||||||
|
})
|
||||||
|
return {"tasks": tasks}
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeKnowledgeService:
|
||||||
|
def __init__(self, duplicateIds=None):
|
||||||
|
self.calls = []
|
||||||
|
self._duplicates = duplicateIds or set()
|
||||||
|
|
||||||
|
async def requestIngestion(self, job):
|
||||||
|
self.calls.append(job)
|
||||||
|
status = "duplicate" if job.sourceId in self._duplicates else "indexed"
|
||||||
|
return SimpleNamespace(
|
||||||
|
jobId=job.sourceId, status=status, contentHash="h",
|
||||||
|
fileId=job.sourceId, index=None, error=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _adapter(svc):
|
||||||
|
return SimpleNamespace(_svc=svc)
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_walks_team_space_lists_and_tasks():
|
||||||
|
svc = _FakeClickupService(taskCount=2)
|
||||||
|
knowledge = _FakeKnowledgeService()
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapClickup(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=_adapter(svc),
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
limits=ClickupBootstrapLimits(maxAgeDays=None),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
# 2 lists (folderless list-1 + folder's list-2) × 2 tasks each = 4 tasks
|
||||||
|
assert result["indexed"] == 4
|
||||||
|
assert result["workspaces"] == 1
|
||||||
|
assert result["lists"] == 2
|
||||||
|
sourceIds = {c.sourceId for c in knowledge.calls}
|
||||||
|
assert len(sourceIds) == 4
|
||||||
|
for job in knowledge.calls:
|
||||||
|
assert job.sourceKind == "clickup_task"
|
||||||
|
assert job.mimeType == "application/vnd.clickup.task+json"
|
||||||
|
assert job.mandateId == "m1"
|
||||||
|
assert job.provenance["connectionId"] == "c1"
|
||||||
|
assert job.provenance["authority"] == "clickup"
|
||||||
|
assert job.provenance["teamId"] == "team-1"
|
||||||
|
assert job.contentVersion # numeric millisecond string
|
||||||
|
# At least the header content-object is present.
|
||||||
|
ids = [co["contentObjectId"] for co in job.contentObjects]
|
||||||
|
assert "header" in ids
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_reports_duplicates_on_second_run():
|
||||||
|
svc = _FakeClickupService(taskCount=1)
|
||||||
|
duplicates = {
|
||||||
|
_syntheticTaskId("c1", "list-1-task-0"),
|
||||||
|
_syntheticTaskId("c1", "list-2-task-0"),
|
||||||
|
}
|
||||||
|
knowledge = _FakeKnowledgeService(duplicateIds=duplicates)
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapClickup(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=_adapter(svc),
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
limits=ClickupBootstrapLimits(maxAgeDays=None),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["indexed"] == 0
|
||||||
|
assert result["skippedDuplicate"] == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_skips_tasks_older_than_maxAgeDays():
|
||||||
|
svc = _FakeClickupService(taskCount=2, oldTask=True)
|
||||||
|
knowledge = _FakeKnowledgeService()
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapClickup(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=_adapter(svc),
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
limits=ClickupBootstrapLimits(maxAgeDays=180),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
# 2 lists × (1 recent + 1 skipped old) = 2 indexed + 2 skippedPolicy
|
||||||
|
assert result["indexed"] == 2
|
||||||
|
assert result["skippedPolicy"] == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_maxTasks_caps_ingestion():
|
||||||
|
svc = _FakeClickupService(taskCount=2)
|
||||||
|
knowledge = _FakeKnowledgeService()
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapClickup(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=_adapter(svc),
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
limits=ClickupBootstrapLimits(maxAgeDays=None, maxTasks=3),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["indexed"] == 3
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_bootstrap_walks_team_space_lists_and_tasks()
|
||||||
|
test_bootstrap_reports_duplicates_on_second_run()
|
||||||
|
test_bootstrap_skips_tasks_older_than_maxAgeDays()
|
||||||
|
test_bootstrap_maxTasks_caps_ingestion()
|
||||||
|
print("OK — bootstrapClickup tests passed")
|
||||||
225
tests/unit/services/test_bootstrap_gdrive.py
Normal file
225
tests/unit/services/test_bootstrap_gdrive.py
Normal file
|
|
@ -0,0 +1,225 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Bootstrap Google Drive tests with a fake adapter + knowledge service.
|
||||||
|
|
||||||
|
Verifies:
|
||||||
|
- Drive walk traverses root → subfolders, respecting `maxDepth`.
|
||||||
|
- Every file triggers `requestIngestion` with `sourceKind="gdrive_item"`.
|
||||||
|
- Duplicate runs (same modifiedTime revision) report `skippedDuplicate`.
|
||||||
|
- Provenance carries `authority="google"` and the Drive file id.
|
||||||
|
- Recency filter skips files older than `maxAgeDays`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGdrive import (
|
||||||
|
bootstrapGdrive,
|
||||||
|
GdriveBootstrapLimits,
|
||||||
|
_syntheticFileId,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _ExtEntry:
|
||||||
|
name: str
|
||||||
|
path: str
|
||||||
|
isFolder: bool = False
|
||||||
|
size: Optional[int] = None
|
||||||
|
mimeType: Optional[str] = None
|
||||||
|
metadata: Dict[str, Any] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _today_iso(offsetDays: int = 0) -> str:
|
||||||
|
return (datetime.now(timezone.utc) + timedelta(days=offsetDays)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeDriveAdapter:
|
||||||
|
"""Minimal DriveAdapter stand-in.
|
||||||
|
|
||||||
|
Layout:
|
||||||
|
"/" (root) → 2 files + 1 folder (sub)
|
||||||
|
"/sub_id" → 1 file
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, recent_only: bool = True):
|
||||||
|
self.downloaded: List[str] = []
|
||||||
|
self._recent = _today_iso(0)
|
||||||
|
self._old = _today_iso(-400)
|
||||||
|
self._recent_only = recent_only
|
||||||
|
|
||||||
|
async def browse(self, path: str, filter=None, limit=None):
|
||||||
|
if path in ("/", "", "root"):
|
||||||
|
return [
|
||||||
|
_ExtEntry(
|
||||||
|
name="f1.txt", path="/f1", size=20,
|
||||||
|
mimeType="text/plain",
|
||||||
|
metadata={"id": "f1", "modifiedTime": self._recent},
|
||||||
|
),
|
||||||
|
_ExtEntry(
|
||||||
|
name="f2.txt", path="/f2", size=20,
|
||||||
|
mimeType="text/plain",
|
||||||
|
metadata={"id": "f2", "modifiedTime": self._recent if self._recent_only else self._old},
|
||||||
|
),
|
||||||
|
_ExtEntry(
|
||||||
|
name="Subfolder", path="/sub_id", isFolder=True,
|
||||||
|
mimeType="application/vnd.google-apps.folder",
|
||||||
|
metadata={"id": "sub_id", "modifiedTime": self._recent},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
if path == "/sub_id":
|
||||||
|
return [
|
||||||
|
_ExtEntry(
|
||||||
|
name="f3.txt", path="/f3", size=20,
|
||||||
|
mimeType="text/plain",
|
||||||
|
metadata={"id": "f3", "modifiedTime": self._recent},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def download(self, path: str) -> bytes:
|
||||||
|
self.downloaded.append(path)
|
||||||
|
return path.encode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeKnowledgeService:
|
||||||
|
def __init__(self, duplicateIds=None):
|
||||||
|
self.calls: List[SimpleNamespace] = []
|
||||||
|
self._duplicateIds = duplicateIds or set()
|
||||||
|
|
||||||
|
async def requestIngestion(self, job):
|
||||||
|
self.calls.append(job)
|
||||||
|
status = "duplicate" if job.sourceId in self._duplicateIds else "indexed"
|
||||||
|
return SimpleNamespace(
|
||||||
|
jobId=f"{job.sourceKind}:{job.sourceId}",
|
||||||
|
status=status, contentHash="h",
|
||||||
|
fileId=job.sourceId, index=None, error=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _fakeRunExtraction(data, name, mime, options):
|
||||||
|
return SimpleNamespace(
|
||||||
|
parts=[
|
||||||
|
SimpleNamespace(
|
||||||
|
id="p1",
|
||||||
|
data=data.decode("utf-8") if isinstance(data, bytes) else str(data),
|
||||||
|
typeGroup="text",
|
||||||
|
label="page:1",
|
||||||
|
metadata={"pageIndex": 0},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_walks_drive_and_subfolders():
|
||||||
|
adapter = _FakeDriveAdapter()
|
||||||
|
knowledge = _FakeKnowledgeService()
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapGdrive(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=adapter,
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
runExtractionFn=_fakeRunExtraction,
|
||||||
|
limits=GdriveBootstrapLimits(maxAgeDays=None),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert len(knowledge.calls) == 3
|
||||||
|
sourceIds = {c.sourceId for c in knowledge.calls}
|
||||||
|
assert sourceIds == {
|
||||||
|
_syntheticFileId("c1", "f1"),
|
||||||
|
_syntheticFileId("c1", "f2"),
|
||||||
|
_syntheticFileId("c1", "f3"),
|
||||||
|
}
|
||||||
|
assert result["indexed"] == 3
|
||||||
|
assert result["skippedDuplicate"] == 0
|
||||||
|
assert adapter.downloaded == ["/f1", "/f2", "/f3"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_reports_duplicates_on_second_run():
|
||||||
|
adapter = _FakeDriveAdapter()
|
||||||
|
duplicateIds = {
|
||||||
|
_syntheticFileId("c1", "f1"),
|
||||||
|
_syntheticFileId("c1", "f2"),
|
||||||
|
_syntheticFileId("c1", "f3"),
|
||||||
|
}
|
||||||
|
knowledge = _FakeKnowledgeService(duplicateIds=duplicateIds)
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapGdrive(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=adapter,
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
runExtractionFn=_fakeRunExtraction,
|
||||||
|
limits=GdriveBootstrapLimits(maxAgeDays=None),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["indexed"] == 0
|
||||||
|
assert result["skippedDuplicate"] == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_skips_files_older_than_maxAgeDays():
|
||||||
|
adapter = _FakeDriveAdapter(recent_only=False) # f2 is 400 days old
|
||||||
|
knowledge = _FakeKnowledgeService()
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapGdrive(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=adapter,
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
runExtractionFn=_fakeRunExtraction,
|
||||||
|
limits=GdriveBootstrapLimits(maxAgeDays=180),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["indexed"] == 2 # f1, f3
|
||||||
|
assert result["skippedPolicy"] == 1 # f2 filtered out
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_passes_connection_provenance():
|
||||||
|
adapter = _FakeDriveAdapter()
|
||||||
|
knowledge = _FakeKnowledgeService()
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapGdrive(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=adapter,
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
runExtractionFn=_fakeRunExtraction,
|
||||||
|
limits=GdriveBootstrapLimits(maxAgeDays=None),
|
||||||
|
)
|
||||||
|
|
||||||
|
asyncio.run(_run())
|
||||||
|
for job in knowledge.calls:
|
||||||
|
assert job.sourceKind == "gdrive_item"
|
||||||
|
assert job.mandateId == "m1"
|
||||||
|
assert job.provenance["connectionId"] == "c1"
|
||||||
|
assert job.provenance["authority"] == "google"
|
||||||
|
assert job.provenance["service"] == "drive"
|
||||||
|
assert job.contentVersion # modifiedTime ISO string
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_bootstrap_walks_drive_and_subfolders()
|
||||||
|
test_bootstrap_reports_duplicates_on_second_run()
|
||||||
|
test_bootstrap_skips_files_older_than_maxAgeDays()
|
||||||
|
test_bootstrap_passes_connection_provenance()
|
||||||
|
print("OK — bootstrapGdrive tests passed")
|
||||||
240
tests/unit/services/test_bootstrap_gmail.py
Normal file
240
tests/unit/services/test_bootstrap_gmail.py
Normal file
|
|
@ -0,0 +1,240 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Bootstrap Gmail tests with a fake googleGet + knowledge service.
|
||||||
|
|
||||||
|
Verifies:
|
||||||
|
- Default labels (INBOX + SENT) are traversed.
|
||||||
|
- Each message produces a requestIngestion call with sourceKind=gmail_message
|
||||||
|
and structured contentObjects (header / snippet / body).
|
||||||
|
- Pagination via `nextPageToken` is followed.
|
||||||
|
- historyId is forwarded as contentVersion → idempotency.
|
||||||
|
- MIME body extraction walks nested parts (multipart/alternative).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
|
||||||
|
bootstrapGmail,
|
||||||
|
GmailBootstrapLimits,
|
||||||
|
_syntheticMessageId,
|
||||||
|
_buildContentObjects,
|
||||||
|
_walkPayloadForBody,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _b64url(text: str) -> str:
|
||||||
|
return base64.urlsafe_b64encode(text.encode("utf-8")).decode("ascii").rstrip("=")
|
||||||
|
|
||||||
|
|
||||||
|
def _msg(mid: str, subject: str = "Hi", body: str = "Hello world", historyId: str = "h1"):
|
||||||
|
return {
|
||||||
|
"id": mid,
|
||||||
|
"threadId": f"thread-{mid}",
|
||||||
|
"historyId": historyId,
|
||||||
|
"internalDate": "1700000000000",
|
||||||
|
"snippet": body[:120],
|
||||||
|
"payload": {
|
||||||
|
"headers": [
|
||||||
|
{"name": "Subject", "value": subject},
|
||||||
|
{"name": "From", "value": "Alice <a@x.com>"},
|
||||||
|
{"name": "To", "value": "Bob <b@x.com>"},
|
||||||
|
{"name": "Date", "value": "Tue, 01 Jan 2025 10:00:00 +0000"},
|
||||||
|
],
|
||||||
|
"mimeType": "text/plain",
|
||||||
|
"body": {"data": _b64url(body), "size": len(body)},
|
||||||
|
"parts": [],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeGoogleGet:
|
||||||
|
"""Records URLs + returns the wired-up page or message response."""
|
||||||
|
|
||||||
|
def __init__(self, messages_by_label, paginated_label=None, page2=None):
|
||||||
|
self._messages = messages_by_label
|
||||||
|
self._paginated = paginated_label
|
||||||
|
self._page2 = page2 or []
|
||||||
|
self._served_first_page = set()
|
||||||
|
self.requested = []
|
||||||
|
|
||||||
|
async def __call__(self, url: str):
|
||||||
|
self.requested.append(url)
|
||||||
|
# List page: contains `/users/me/messages?labelIds=...`
|
||||||
|
if "/users/me/messages?" in url:
|
||||||
|
for label, msgs in self._messages.items():
|
||||||
|
if f"labelIds={label}" in url:
|
||||||
|
if (
|
||||||
|
label == self._paginated
|
||||||
|
and label not in self._served_first_page
|
||||||
|
):
|
||||||
|
self._served_first_page.add(label)
|
||||||
|
return {
|
||||||
|
"messages": [{"id": m["id"]} for m in msgs],
|
||||||
|
"nextPageToken": "token-2",
|
||||||
|
}
|
||||||
|
if label == self._paginated and "pageToken=token-2" in url:
|
||||||
|
return {
|
||||||
|
"messages": [{"id": m["id"]} for m in self._page2],
|
||||||
|
}
|
||||||
|
return {"messages": [{"id": m["id"]} for m in msgs]}
|
||||||
|
return {"messages": []}
|
||||||
|
# Detail fetch: /users/me/messages/{id}?format=full
|
||||||
|
if "/users/me/messages/" in url and "format=full" in url:
|
||||||
|
msgId = url.split("/users/me/messages/")[-1].split("?")[0]
|
||||||
|
for msgs in self._messages.values():
|
||||||
|
for m in msgs:
|
||||||
|
if m["id"] == msgId:
|
||||||
|
return m
|
||||||
|
for m in self._page2:
|
||||||
|
if m["id"] == msgId:
|
||||||
|
return m
|
||||||
|
return {"error": "not found"}
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeKnowledgeService:
|
||||||
|
def __init__(self, duplicateIds=None):
|
||||||
|
self.calls = []
|
||||||
|
self._duplicates = duplicateIds or set()
|
||||||
|
|
||||||
|
async def requestIngestion(self, job):
|
||||||
|
self.calls.append(job)
|
||||||
|
status = "duplicate" if job.sourceId in self._duplicates else "indexed"
|
||||||
|
return SimpleNamespace(
|
||||||
|
jobId=job.sourceId, status=status, contentHash="h",
|
||||||
|
fileId=job.sourceId, index=None, error=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_buildContentObjects_emits_header_snippet_body():
|
||||||
|
parts = _buildContentObjects(_msg("m1", body="Hello\nWorld"), maxBodyChars=8000)
|
||||||
|
ids = [p["contentObjectId"] for p in parts]
|
||||||
|
assert ids == ["header", "snippet", "body"]
|
||||||
|
header = parts[0]["data"]
|
||||||
|
assert "Subject: Hi" in header
|
||||||
|
assert "From: Alice <a@x.com>" in header
|
||||||
|
assert "To: Bob <b@x.com>" in header
|
||||||
|
|
||||||
|
|
||||||
|
def test_walkPayloadForBody_prefers_plain_over_html():
|
||||||
|
payload = {
|
||||||
|
"mimeType": "multipart/alternative",
|
||||||
|
"parts": [
|
||||||
|
{"mimeType": "text/plain", "body": {"data": _b64url("plain body")}},
|
||||||
|
{"mimeType": "text/html", "body": {"data": _b64url("<p>html body</p>")}},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
bodies = _walkPayloadForBody(payload)
|
||||||
|
assert bodies["text"] == "plain body"
|
||||||
|
assert bodies["html"] == "<p>html body</p>"
|
||||||
|
|
||||||
|
|
||||||
|
def test_walkPayloadForBody_falls_back_to_html():
|
||||||
|
payload = {
|
||||||
|
"mimeType": "multipart/alternative",
|
||||||
|
"parts": [
|
||||||
|
{"mimeType": "text/html", "body": {"data": _b64url("<p>only html</p>")}},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
bodies = _walkPayloadForBody(payload)
|
||||||
|
assert bodies["text"] == ""
|
||||||
|
assert "only html" in bodies["html"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_gmail_indexes_messages_from_inbox_and_sent():
|
||||||
|
fake_get = _FakeGoogleGet({
|
||||||
|
"INBOX": [_msg("m1"), _msg("m2")],
|
||||||
|
"SENT": [_msg("m3")],
|
||||||
|
})
|
||||||
|
knowledge = _FakeKnowledgeService()
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapGmail(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=SimpleNamespace(_token="t"),
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
limits=GmailBootstrapLimits(maxAgeDays=None),
|
||||||
|
googleGetFn=fake_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["indexed"] == 3
|
||||||
|
sourceIds = {c.sourceId for c in knowledge.calls}
|
||||||
|
assert sourceIds == {
|
||||||
|
_syntheticMessageId("c1", "m1"),
|
||||||
|
_syntheticMessageId("c1", "m2"),
|
||||||
|
_syntheticMessageId("c1", "m3"),
|
||||||
|
}
|
||||||
|
for job in knowledge.calls:
|
||||||
|
assert job.sourceKind == "gmail_message"
|
||||||
|
assert job.mimeType == "message/rfc822"
|
||||||
|
assert job.provenance["connectionId"] == "c1"
|
||||||
|
assert job.provenance["authority"] == "google"
|
||||||
|
assert job.provenance["service"] == "gmail"
|
||||||
|
assert job.contentVersion == "h1"
|
||||||
|
assert any(co["contentObjectId"] == "header" for co in job.contentObjects)
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_gmail_follows_pagination():
|
||||||
|
fake_get = _FakeGoogleGet(
|
||||||
|
messages_by_label={"INBOX": [_msg("m1")], "SENT": []},
|
||||||
|
paginated_label="INBOX",
|
||||||
|
page2=[_msg("m2"), _msg("m3")],
|
||||||
|
)
|
||||||
|
knowledge = _FakeKnowledgeService()
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapGmail(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=SimpleNamespace(_token="t"),
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
limits=GmailBootstrapLimits(maxAgeDays=None),
|
||||||
|
googleGetFn=fake_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["indexed"] == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_gmail_reports_duplicates():
|
||||||
|
fake_get = _FakeGoogleGet({"INBOX": [_msg("m1"), _msg("m2")], "SENT": []})
|
||||||
|
duplicates = {
|
||||||
|
_syntheticMessageId("c1", "m1"),
|
||||||
|
_syntheticMessageId("c1", "m2"),
|
||||||
|
}
|
||||||
|
knowledge = _FakeKnowledgeService(duplicateIds=duplicates)
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapGmail(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=SimpleNamespace(_token="t"),
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
limits=GmailBootstrapLimits(maxAgeDays=None),
|
||||||
|
googleGetFn=fake_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["indexed"] == 0
|
||||||
|
assert result["skippedDuplicate"] == 2
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_buildContentObjects_emits_header_snippet_body()
|
||||||
|
test_walkPayloadForBody_prefers_plain_over_html()
|
||||||
|
test_walkPayloadForBody_falls_back_to_html()
|
||||||
|
test_bootstrap_gmail_indexes_messages_from_inbox_and_sent()
|
||||||
|
test_bootstrap_gmail_follows_pagination()
|
||||||
|
test_bootstrap_gmail_reports_duplicates()
|
||||||
|
print("OK — bootstrapGmail tests passed")
|
||||||
190
tests/unit/services/test_bootstrap_outlook.py
Normal file
190
tests/unit/services/test_bootstrap_outlook.py
Normal file
|
|
@ -0,0 +1,190 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Bootstrap Outlook tests with a fake adapter + knowledge service.
|
||||||
|
|
||||||
|
Verifies:
|
||||||
|
- Well-known folders (inbox, sentitems) are discovered via Graph.
|
||||||
|
- Each message produces a `requestIngestion` call with sourceKind=outlook_message
|
||||||
|
and structured contentObjects (header / snippet / body).
|
||||||
|
- Pagination via `@odata.nextLink` is followed.
|
||||||
|
- changeKey is forwarded as contentVersion → idempotency.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncOutlook import (
|
||||||
|
bootstrapOutlook,
|
||||||
|
OutlookBootstrapLimits,
|
||||||
|
_syntheticMessageId,
|
||||||
|
_buildContentObjects,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeOutlookAdapter:
|
||||||
|
def __init__(self, messages_by_folder, paginated_folder=None, page2=None):
|
||||||
|
self._folders = {"inbox": "INBOX-ID", "sentitems": "SENT-ID"}
|
||||||
|
self._messages = messages_by_folder
|
||||||
|
self._paginated_folder = paginated_folder
|
||||||
|
self._page2 = page2 or []
|
||||||
|
self.requested_endpoints = []
|
||||||
|
|
||||||
|
async def _graphGet(self, endpoint: str):
|
||||||
|
self.requested_endpoints.append(endpoint)
|
||||||
|
if endpoint.startswith("me/mailFolders/") and "/messages" not in endpoint:
|
||||||
|
wellKnown = endpoint.split("/")[-1]
|
||||||
|
fid = self._folders.get(wellKnown)
|
||||||
|
if not fid:
|
||||||
|
return {"error": "not found"}
|
||||||
|
return {"id": fid, "displayName": wellKnown}
|
||||||
|
# message page request: e.g. me/mailFolders/INBOX-ID/messages?...
|
||||||
|
for fid, messages in self._messages.items():
|
||||||
|
if f"me/mailFolders/{fid}/messages" in endpoint:
|
||||||
|
page = {"value": messages}
|
||||||
|
if fid == self._paginated_folder and "skiptoken" not in endpoint:
|
||||||
|
page["@odata.nextLink"] = (
|
||||||
|
"https://graph.microsoft.com/v1.0/"
|
||||||
|
f"me/mailFolders/{fid}/messages?$skiptoken=abc"
|
||||||
|
)
|
||||||
|
elif fid == self._paginated_folder and "skiptoken" in endpoint:
|
||||||
|
page = {"value": self._page2}
|
||||||
|
return page
|
||||||
|
return {"value": []}
|
||||||
|
|
||||||
|
async def browse(self, path):
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeKnowledgeService:
|
||||||
|
def __init__(self, duplicateIds=None):
|
||||||
|
self.calls = []
|
||||||
|
self._duplicates = duplicateIds or set()
|
||||||
|
|
||||||
|
async def requestIngestion(self, job):
|
||||||
|
self.calls.append(job)
|
||||||
|
status = "duplicate" if job.sourceId in self._duplicates else "indexed"
|
||||||
|
return SimpleNamespace(
|
||||||
|
jobId=job.sourceId, status=status, contentHash="h",
|
||||||
|
fileId=job.sourceId, index=None, error=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _msg(mid: str, subject: str = "Hi", change: str = "ck1"):
|
||||||
|
return {
|
||||||
|
"id": mid,
|
||||||
|
"subject": subject,
|
||||||
|
"from": {"emailAddress": {"name": "Alice", "address": "a@x.com"}},
|
||||||
|
"toRecipients": [{"emailAddress": {"name": "Bob", "address": "b@x.com"}}],
|
||||||
|
"ccRecipients": [],
|
||||||
|
"receivedDateTime": "2025-01-01T10:00:00Z",
|
||||||
|
"bodyPreview": "Hello world",
|
||||||
|
"body": {"contentType": "text", "content": "Hello world\nThis is the body."},
|
||||||
|
"internetMessageId": f"<{mid}@local>",
|
||||||
|
"hasAttachments": False,
|
||||||
|
"changeKey": change,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_buildContentObjects_emits_header_snippet_body():
|
||||||
|
parts = _buildContentObjects(_msg("m1"), maxBodyChars=8000)
|
||||||
|
ids = [p["contentObjectId"] for p in parts]
|
||||||
|
assert ids == ["header", "snippet", "body"]
|
||||||
|
header = parts[0]["data"]
|
||||||
|
assert "Subject: Hi" in header
|
||||||
|
assert "From: Alice <a@x.com>" in header
|
||||||
|
assert "To: Bob <b@x.com>" in header
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
|
||||||
|
adapter = _FakeOutlookAdapter({
|
||||||
|
"INBOX-ID": [_msg("m1"), _msg("m2")],
|
||||||
|
"SENT-ID": [_msg("m3")],
|
||||||
|
})
|
||||||
|
knowledge = _FakeKnowledgeService()
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapOutlook(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=adapter,
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
limits=OutlookBootstrapLimits(maxAgeDays=None),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["indexed"] == 3
|
||||||
|
sourceIds = {c.sourceId for c in knowledge.calls}
|
||||||
|
assert sourceIds == {
|
||||||
|
_syntheticMessageId("c1", "m1"),
|
||||||
|
_syntheticMessageId("c1", "m2"),
|
||||||
|
_syntheticMessageId("c1", "m3"),
|
||||||
|
}
|
||||||
|
for job in knowledge.calls:
|
||||||
|
assert job.sourceKind == "outlook_message"
|
||||||
|
assert job.mimeType == "message/rfc822"
|
||||||
|
assert job.provenance["connectionId"] == "c1"
|
||||||
|
assert job.provenance["service"] == "outlook"
|
||||||
|
assert job.contentVersion == "ck1"
|
||||||
|
assert any(co["contentObjectId"] == "header" for co in job.contentObjects)
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_outlook_follows_pagination():
|
||||||
|
adapter = _FakeOutlookAdapter(
|
||||||
|
messages_by_folder={"INBOX-ID": [_msg("m1")], "SENT-ID": []},
|
||||||
|
paginated_folder="INBOX-ID",
|
||||||
|
page2=[_msg("m2"), _msg("m3")],
|
||||||
|
)
|
||||||
|
knowledge = _FakeKnowledgeService()
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapOutlook(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=adapter,
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
limits=OutlookBootstrapLimits(maxAgeDays=None),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["indexed"] == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_outlook_reports_duplicates():
|
||||||
|
adapter = _FakeOutlookAdapter({
|
||||||
|
"INBOX-ID": [_msg("m1"), _msg("m2")],
|
||||||
|
"SENT-ID": [],
|
||||||
|
})
|
||||||
|
duplicates = {
|
||||||
|
_syntheticMessageId("c1", "m1"),
|
||||||
|
_syntheticMessageId("c1", "m2"),
|
||||||
|
}
|
||||||
|
knowledge = _FakeKnowledgeService(duplicateIds=duplicates)
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapOutlook(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=adapter,
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
limits=OutlookBootstrapLimits(maxAgeDays=None),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["indexed"] == 0
|
||||||
|
assert result["skippedDuplicate"] == 2
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_buildContentObjects_emits_header_snippet_body()
|
||||||
|
test_bootstrap_outlook_indexes_messages_from_inbox_and_sent()
|
||||||
|
test_bootstrap_outlook_follows_pagination()
|
||||||
|
test_bootstrap_outlook_reports_duplicates()
|
||||||
|
print("OK — bootstrapOutlook tests passed")
|
||||||
209
tests/unit/services/test_bootstrap_sharepoint.py
Normal file
209
tests/unit/services/test_bootstrap_sharepoint.py
Normal file
|
|
@ -0,0 +1,209 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Bootstrap SharePoint tests with a fake adapter + knowledge service.
|
||||||
|
|
||||||
|
Verifies:
|
||||||
|
- Every discovered file triggers `requestIngestion`.
|
||||||
|
- Duplicate runs (same eTag revisions) report `skippedDuplicate`.
|
||||||
|
- Synthetic fileIds are stable across runs so idempotency works end-to-end.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
|
||||||
|
bootstrapSharepoint,
|
||||||
|
_syntheticFileId,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _ExtEntry:
|
||||||
|
name: str
|
||||||
|
path: str
|
||||||
|
isFolder: bool = False
|
||||||
|
size: Optional[int] = None
|
||||||
|
mimeType: Optional[str] = None
|
||||||
|
metadata: Dict[str, Any] = None
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeSpAdapter:
|
||||||
|
"""Minimal SharepointAdapter stand-in.
|
||||||
|
|
||||||
|
Layout:
|
||||||
|
"/" → 1 site
|
||||||
|
"/sites/site-1" → 2 files (f1, f2) + 1 folder (sub)
|
||||||
|
"/sites/site-1/sub" → 1 file (f3)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.downloaded: List[str] = []
|
||||||
|
|
||||||
|
async def browse(self, path: str, filter=None, limit=None):
|
||||||
|
if path == "/":
|
||||||
|
return [
|
||||||
|
_ExtEntry(
|
||||||
|
name="Site 1",
|
||||||
|
path="/sites/site-1",
|
||||||
|
isFolder=True,
|
||||||
|
metadata={"id": "site-1"},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
if path == "/sites/site-1":
|
||||||
|
return [
|
||||||
|
_ExtEntry(
|
||||||
|
name="f1.txt", path="/sites/site-1/f1.txt",
|
||||||
|
mimeType="text/plain", size=20,
|
||||||
|
metadata={"id": "f1", "revision": "etag-f1"},
|
||||||
|
),
|
||||||
|
_ExtEntry(
|
||||||
|
name="f2.txt", path="/sites/site-1/f2.txt",
|
||||||
|
mimeType="text/plain", size=20,
|
||||||
|
metadata={"id": "f2", "revision": "etag-f2"},
|
||||||
|
),
|
||||||
|
_ExtEntry(
|
||||||
|
name="sub", path="/sites/site-1/sub",
|
||||||
|
isFolder=True, metadata={"id": "sub"},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
if path == "/sites/site-1/sub":
|
||||||
|
return [
|
||||||
|
_ExtEntry(
|
||||||
|
name="f3.txt", path="/sites/site-1/sub/f3.txt",
|
||||||
|
mimeType="text/plain", size=20,
|
||||||
|
metadata={"id": "f3", "revision": "etag-f3"},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def download(self, path: str) -> bytes:
|
||||||
|
self.downloaded.append(path)
|
||||||
|
return path.encode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeKnowledgeService:
|
||||||
|
"""Records requestIngestion calls and returns the scripted handles."""
|
||||||
|
|
||||||
|
def __init__(self, duplicateIds=None):
|
||||||
|
self.calls: List[SimpleNamespace] = []
|
||||||
|
self._duplicateIds = duplicateIds or set()
|
||||||
|
|
||||||
|
async def requestIngestion(self, job):
|
||||||
|
self.calls.append(job)
|
||||||
|
status = "duplicate" if job.sourceId in self._duplicateIds else "indexed"
|
||||||
|
return SimpleNamespace(
|
||||||
|
jobId=f"{job.sourceKind}:{job.sourceId}",
|
||||||
|
status=status,
|
||||||
|
contentHash="h",
|
||||||
|
fileId=job.sourceId,
|
||||||
|
index=None,
|
||||||
|
error=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _fakeRunExtraction(data, name, mime, options):
|
||||||
|
"""Produce a single synthetic text part so `_toContentObjects` returns one."""
|
||||||
|
return SimpleNamespace(
|
||||||
|
parts=[
|
||||||
|
SimpleNamespace(
|
||||||
|
id="p1",
|
||||||
|
data=data.decode("utf-8") if isinstance(data, bytes) else str(data),
|
||||||
|
typeGroup="text",
|
||||||
|
label="page:1",
|
||||||
|
metadata={"pageIndex": 0},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_walks_sites_and_subfolders():
|
||||||
|
adapter = _FakeSpAdapter()
|
||||||
|
knowledge = _FakeKnowledgeService()
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapSharepoint(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=adapter,
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
runExtractionFn=_fakeRunExtraction,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert len(knowledge.calls) == 3
|
||||||
|
sourceIds = {c.sourceId for c in knowledge.calls}
|
||||||
|
assert sourceIds == {
|
||||||
|
_syntheticFileId("c1", "f1"),
|
||||||
|
_syntheticFileId("c1", "f2"),
|
||||||
|
_syntheticFileId("c1", "f3"),
|
||||||
|
}
|
||||||
|
assert result["indexed"] == 3
|
||||||
|
assert result["skippedDuplicate"] == 0
|
||||||
|
assert adapter.downloaded == [
|
||||||
|
"/sites/site-1/f1.txt",
|
||||||
|
"/sites/site-1/f2.txt",
|
||||||
|
"/sites/site-1/sub/f3.txt",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_reports_duplicates_on_second_run():
|
||||||
|
adapter = _FakeSpAdapter()
|
||||||
|
duplicateIds = {
|
||||||
|
_syntheticFileId("c1", "f1"),
|
||||||
|
_syntheticFileId("c1", "f2"),
|
||||||
|
_syntheticFileId("c1", "f3"),
|
||||||
|
}
|
||||||
|
knowledge = _FakeKnowledgeService(duplicateIds=duplicateIds)
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapSharepoint(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=adapter,
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
runExtractionFn=_fakeRunExtraction,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["indexed"] == 0
|
||||||
|
assert result["skippedDuplicate"] == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_passes_connection_provenance():
|
||||||
|
adapter = _FakeSpAdapter()
|
||||||
|
knowledge = _FakeKnowledgeService()
|
||||||
|
connection = SimpleNamespace(mandateId="m1", userId="u1")
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await bootstrapSharepoint(
|
||||||
|
connectionId="c1",
|
||||||
|
adapter=adapter,
|
||||||
|
connection=connection,
|
||||||
|
knowledgeService=knowledge,
|
||||||
|
runExtractionFn=_fakeRunExtraction,
|
||||||
|
)
|
||||||
|
|
||||||
|
asyncio.run(_run())
|
||||||
|
for job in knowledge.calls:
|
||||||
|
assert job.sourceKind == "sharepoint_item"
|
||||||
|
assert job.mandateId == "m1"
|
||||||
|
assert job.provenance["connectionId"] == "c1"
|
||||||
|
assert job.provenance["authority"] == "msft"
|
||||||
|
assert job.provenance["service"] == "sharepoint"
|
||||||
|
assert job.contentVersion and job.contentVersion.startswith("etag-")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_bootstrap_walks_sites_and_subfolders()
|
||||||
|
test_bootstrap_reports_duplicates_on_second_run()
|
||||||
|
test_bootstrap_passes_connection_provenance()
|
||||||
|
print("OK — bootstrapSharepoint tests passed")
|
||||||
110
tests/unit/services/test_clean_email_body.py
Normal file
110
tests/unit/services/test_clean_email_body.py
Normal file
|
|
@ -0,0 +1,110 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Unit tests for cleanEmailBody.
|
||||||
|
|
||||||
|
Covers: HTML→text normalisation, quoted-reply removal, signature removal,
|
||||||
|
whitespace collapse and truncation. The utility is used during Outlook
|
||||||
|
bootstrap; buggy cleaning would leak quoted threads / signatures into every
|
||||||
|
embedding.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subTextClean import (
|
||||||
|
cleanEmailBody,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_strips_html_tags_and_scripts():
|
||||||
|
html = (
|
||||||
|
"<html><head><style>body{}</style></head>"
|
||||||
|
"<body><p>Hello <b>world</b></p>"
|
||||||
|
"<script>alert('x')</script></body></html>"
|
||||||
|
)
|
||||||
|
cleaned = cleanEmailBody(html)
|
||||||
|
assert "Hello" in cleaned
|
||||||
|
assert "world" in cleaned
|
||||||
|
assert "<" not in cleaned
|
||||||
|
assert "alert" not in cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def test_strips_quoted_reply_english():
|
||||||
|
body = (
|
||||||
|
"Actual answer from me.\n\n"
|
||||||
|
"On Mon, 1 Jan 2024 at 10:00, Someone <s@x.com> wrote:\n"
|
||||||
|
"> Original question?\n"
|
||||||
|
"> Second line.\n"
|
||||||
|
)
|
||||||
|
cleaned = cleanEmailBody(body)
|
||||||
|
assert "Actual answer" in cleaned
|
||||||
|
assert "Original question" not in cleaned
|
||||||
|
assert "wrote:" not in cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def test_strips_quoted_reply_german():
|
||||||
|
body = (
|
||||||
|
"Meine Antwort.\n\n"
|
||||||
|
"Am 1. Januar 2024 um 10:00 schrieb Max Muster <m@x.com>:\n"
|
||||||
|
"> Ursprüngliche Frage?\n"
|
||||||
|
)
|
||||||
|
cleaned = cleanEmailBody(body)
|
||||||
|
assert "Meine Antwort" in cleaned
|
||||||
|
assert "Ursprüngliche Frage" not in cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def test_strips_signature_after_dashes():
|
||||||
|
body = (
|
||||||
|
"Kurze Nachricht.\n"
|
||||||
|
"\n"
|
||||||
|
"--\n"
|
||||||
|
"Max Muster\n"
|
||||||
|
"Vorstand, Beispiel GmbH\n"
|
||||||
|
)
|
||||||
|
cleaned = cleanEmailBody(body)
|
||||||
|
assert "Kurze Nachricht" in cleaned
|
||||||
|
assert "Beispiel GmbH" not in cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def test_strips_signature_salutation_de():
|
||||||
|
body = (
|
||||||
|
"Die eigentliche Information steht hier.\n\n"
|
||||||
|
"Mit freundlichen Grüßen\n"
|
||||||
|
"Max Muster"
|
||||||
|
)
|
||||||
|
cleaned = cleanEmailBody(body)
|
||||||
|
assert "eigentliche Information" in cleaned
|
||||||
|
assert "Max Muster" not in cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def test_truncate_to_max_chars():
|
||||||
|
body = "abc " * 5000
|
||||||
|
cleaned = cleanEmailBody(body, maxChars=200)
|
||||||
|
assert len(cleaned) <= 201 # includes trailing ellipsis
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_input_returns_empty_string():
|
||||||
|
assert cleanEmailBody("") == ""
|
||||||
|
assert cleanEmailBody(None) == "" # type: ignore[arg-type]
|
||||||
|
|
||||||
|
|
||||||
|
def test_collapses_whitespace():
|
||||||
|
body = "A lot of spaces\n\n\n\nand blank lines"
|
||||||
|
cleaned = cleanEmailBody(body)
|
||||||
|
assert " " not in cleaned
|
||||||
|
assert "\n\n\n" not in cleaned
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_strips_html_tags_and_scripts()
|
||||||
|
test_strips_quoted_reply_english()
|
||||||
|
test_strips_quoted_reply_german()
|
||||||
|
test_strips_signature_after_dashes()
|
||||||
|
test_strips_signature_salutation_de()
|
||||||
|
test_truncate_to_max_chars()
|
||||||
|
test_empty_input_returns_empty_string()
|
||||||
|
test_collapses_whitespace()
|
||||||
|
print("OK — cleanEmailBody tests passed")
|
||||||
119
tests/unit/services/test_connection_purge.py
Normal file
119
tests/unit/services/test_connection_purge.py
Normal file
|
|
@ -0,0 +1,119 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Purge tests for KnowledgeObjects.deleteFileContentIndexByConnectionId.
|
||||||
|
|
||||||
|
Ensures that a `connection.revoked` event wipes every FileContentIndex + chunk
|
||||||
|
linked to the given connectionId while leaving entries from other connections
|
||||||
|
(or upload-files with connectionId=None) intact.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
|
||||||
|
|
||||||
|
from modules.datamodels.datamodelKnowledge import FileContentIndex, ContentChunk
|
||||||
|
from modules.interfaces.interfaceDbKnowledge import KnowledgeObjects
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeDb:
|
||||||
|
"""Minimal in-memory stand-in for ``KnowledgeObjects.db``.
|
||||||
|
|
||||||
|
Supports just the subset of APIs that deleteFileContentIndexByConnectionId
|
||||||
|
touches: getRecordset(FileContentIndex|ContentChunk, ...) + recordDelete.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.indexRows: dict = {}
|
||||||
|
self.chunks: dict = {}
|
||||||
|
|
||||||
|
def addIndex(self, row: dict) -> None:
|
||||||
|
self.indexRows[row["id"]] = row
|
||||||
|
|
||||||
|
def addChunk(self, row: dict) -> None:
|
||||||
|
self.chunks[row["id"]] = row
|
||||||
|
|
||||||
|
def getRecordset(self, modelClass, recordFilter=None, **_):
|
||||||
|
filter_ = recordFilter or {}
|
||||||
|
if modelClass is FileContentIndex:
|
||||||
|
rows = list(self.indexRows.values())
|
||||||
|
elif modelClass is ContentChunk:
|
||||||
|
rows = list(self.chunks.values())
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def match(row):
|
||||||
|
for k, v in filter_.items():
|
||||||
|
if row.get(k) != v:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
return [r for r in rows if match(r)]
|
||||||
|
|
||||||
|
def recordDelete(self, modelClass, recordId):
|
||||||
|
if modelClass is FileContentIndex:
|
||||||
|
return self.indexRows.pop(recordId, None) is not None
|
||||||
|
if modelClass is ContentChunk:
|
||||||
|
return self.chunks.pop(recordId, None) is not None
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _buildKnowledge():
|
||||||
|
"""Instantiate KnowledgeObjects without triggering the real DB bootstrap."""
|
||||||
|
ko = KnowledgeObjects.__new__(KnowledgeObjects)
|
||||||
|
ko.currentUser = None
|
||||||
|
ko.userId = None
|
||||||
|
ko._scopeCache = {}
|
||||||
|
ko.db = _FakeDb()
|
||||||
|
return ko
|
||||||
|
|
||||||
|
|
||||||
|
def test_purge_by_connection_removes_only_matching_rows():
|
||||||
|
ko = _buildKnowledge()
|
||||||
|
ko.db.addIndex({"id": "sp1", "connectionId": "cx", "mandateId": "m1", "sourceKind": "sharepoint_item"})
|
||||||
|
ko.db.addIndex({"id": "sp2", "connectionId": "cx", "mandateId": "m1", "sourceKind": "sharepoint_item"})
|
||||||
|
ko.db.addIndex({"id": "upload", "connectionId": None, "mandateId": "m1", "sourceKind": "file"})
|
||||||
|
ko.db.addIndex({"id": "other", "connectionId": "cy", "mandateId": "m1", "sourceKind": "outlook_message"})
|
||||||
|
ko.db.addChunk({"id": "c1", "fileId": "sp1"})
|
||||||
|
ko.db.addChunk({"id": "c2", "fileId": "sp1"})
|
||||||
|
ko.db.addChunk({"id": "c3", "fileId": "sp2"})
|
||||||
|
ko.db.addChunk({"id": "c4", "fileId": "upload"})
|
||||||
|
ko.db.addChunk({"id": "c5", "fileId": "other"})
|
||||||
|
|
||||||
|
result = ko.deleteFileContentIndexByConnectionId("cx")
|
||||||
|
|
||||||
|
assert result == {"indexRows": 2, "chunks": 3}
|
||||||
|
assert "sp1" not in ko.db.indexRows
|
||||||
|
assert "sp2" not in ko.db.indexRows
|
||||||
|
assert "upload" in ko.db.indexRows
|
||||||
|
assert "other" in ko.db.indexRows
|
||||||
|
assert set(ko.db.chunks.keys()) == {"c4", "c5"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_purge_with_empty_connection_id_is_a_noop():
|
||||||
|
ko = _buildKnowledge()
|
||||||
|
ko.db.addIndex({"id": "sp1", "connectionId": "cx"})
|
||||||
|
ko.db.addChunk({"id": "c1", "fileId": "sp1"})
|
||||||
|
|
||||||
|
result = ko.deleteFileContentIndexByConnectionId("")
|
||||||
|
|
||||||
|
assert result == {"indexRows": 0, "chunks": 0}
|
||||||
|
assert "sp1" in ko.db.indexRows
|
||||||
|
|
||||||
|
|
||||||
|
def test_purge_unknown_connection_returns_zero():
|
||||||
|
ko = _buildKnowledge()
|
||||||
|
ko.db.addIndex({"id": "sp1", "connectionId": "cx"})
|
||||||
|
|
||||||
|
result = ko.deleteFileContentIndexByConnectionId("nope")
|
||||||
|
|
||||||
|
assert result == {"indexRows": 0, "chunks": 0}
|
||||||
|
assert "sp1" in ko.db.indexRows
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_purge_by_connection_removes_only_matching_rows()
|
||||||
|
test_purge_with_empty_connection_id_is_a_noop()
|
||||||
|
test_purge_unknown_connection_returns_zero()
|
||||||
|
print("OK — connection-purge tests passed")
|
||||||
124
tests/unit/services/test_extraction_merge_strategy.py
Normal file
124
tests/unit/services/test_extraction_merge_strategy.py
Normal file
|
|
@ -0,0 +1,124 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Test that runExtraction preserves per-part granularity when mergeStrategy=None.
|
||||||
|
|
||||||
|
The default MergeStrategy concatenates all text parts into a single ContentPart, which
|
||||||
|
collapses multi-page documents into one blob. This destroys RAG retrieval because every
|
||||||
|
document ends up as a single ContentChunk with a "blurred average" embedding.
|
||||||
|
|
||||||
|
Ingestion pipelines (requestIngestion callers) MUST pass mergeStrategy=None to preserve
|
||||||
|
per-page / per-section chunks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
|
||||||
|
|
||||||
|
from modules.datamodels.datamodelExtraction import (
|
||||||
|
ContentPart,
|
||||||
|
ExtractionOptions,
|
||||||
|
MergeStrategy,
|
||||||
|
)
|
||||||
|
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
|
||||||
|
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
|
||||||
|
ChunkerRegistry,
|
||||||
|
Extractor,
|
||||||
|
ExtractorRegistry,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeMultiPagePdfExtractor(Extractor):
|
||||||
|
"""Emits one text ContentPart per simulated page."""
|
||||||
|
|
||||||
|
def __init__(self, pageCount: int = 10):
|
||||||
|
self.pageCount = pageCount
|
||||||
|
|
||||||
|
def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
|
||||||
|
return mimeType == "application/pdf"
|
||||||
|
|
||||||
|
def getSupportedExtensions(self):
|
||||||
|
return [".pdf"]
|
||||||
|
|
||||||
|
def getSupportedMimeTypes(self):
|
||||||
|
return ["application/pdf"]
|
||||||
|
|
||||||
|
def extract(self, fileBytes: bytes, context):
|
||||||
|
return [
|
||||||
|
ContentPart(
|
||||||
|
id=f"page-{i}",
|
||||||
|
parentId=None,
|
||||||
|
label=f"page_{i + 1}",
|
||||||
|
typeGroup="text",
|
||||||
|
mimeType="text/plain",
|
||||||
|
data=f"Page {i + 1} content — distinct semantic anchor #{i}",
|
||||||
|
metadata={"pageIndex": i, "size": 64},
|
||||||
|
)
|
||||||
|
for i in range(self.pageCount)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _buildRegistry(pageCount: int) -> ExtractorRegistry:
|
||||||
|
registry = ExtractorRegistry()
|
||||||
|
fake = _FakeMultiPagePdfExtractor(pageCount)
|
||||||
|
registry.register("application/pdf", fake)
|
||||||
|
registry.register("pdf", fake)
|
||||||
|
return registry
|
||||||
|
|
||||||
|
|
||||||
|
def test_default_options_merge_all_text_parts_into_one():
|
||||||
|
"""Regression safeguard: default ExtractionOptions still merges (legacy behaviour).
|
||||||
|
|
||||||
|
Non-ingestion callers (AI processing, summarization) rely on this default.
|
||||||
|
"""
|
||||||
|
registry = _buildRegistry(pageCount=5)
|
||||||
|
extracted = runExtraction(
|
||||||
|
registry, ChunkerRegistry(), b"", "sample.pdf", "application/pdf",
|
||||||
|
ExtractionOptions(),
|
||||||
|
)
|
||||||
|
textParts = [p for p in extracted.parts if p.typeGroup == "text"]
|
||||||
|
assert len(textParts) == 1, (
|
||||||
|
f"Default options should merge all text parts into one, got {len(textParts)}"
|
||||||
|
)
|
||||||
|
assert "Page 1" in textParts[0].data and "Page 5" in textParts[0].data, (
|
||||||
|
"Merged text should contain content from all pages"
|
||||||
|
)
|
||||||
|
print("test_default_options_merge_all_text_parts_into_one [PASS]")
|
||||||
|
|
||||||
|
|
||||||
|
def test_merge_none_preserves_all_text_parts():
|
||||||
|
"""Core fix: mergeStrategy=None preserves per-page granularity for RAG ingestion."""
|
||||||
|
registry = _buildRegistry(pageCount=500)
|
||||||
|
extracted = runExtraction(
|
||||||
|
registry, ChunkerRegistry(), b"", "sample.pdf", "application/pdf",
|
||||||
|
ExtractionOptions(mergeStrategy=None),
|
||||||
|
)
|
||||||
|
textParts = [p for p in extracted.parts if p.typeGroup == "text"]
|
||||||
|
assert len(textParts) == 500, (
|
||||||
|
f"mergeStrategy=None should preserve all 500 text parts, got {len(textParts)}"
|
||||||
|
)
|
||||||
|
assert textParts[0].label == "page_1"
|
||||||
|
assert textParts[-1].label == "page_500"
|
||||||
|
print("test_merge_none_preserves_all_text_parts [PASS]")
|
||||||
|
|
||||||
|
|
||||||
|
def test_explicit_merge_strategy_still_merges():
|
||||||
|
"""Callers can still opt in to merging by passing an explicit MergeStrategy."""
|
||||||
|
registry = _buildRegistry(pageCount=3)
|
||||||
|
extracted = runExtraction(
|
||||||
|
registry, ChunkerRegistry(), b"", "sample.pdf", "application/pdf",
|
||||||
|
ExtractionOptions(mergeStrategy=MergeStrategy()),
|
||||||
|
)
|
||||||
|
textParts = [p for p in extracted.parts if p.typeGroup == "text"]
|
||||||
|
assert len(textParts) == 1, (
|
||||||
|
f"Explicit MergeStrategy should merge, got {len(textParts)} parts"
|
||||||
|
)
|
||||||
|
print("test_explicit_merge_strategy_still_merges [PASS]")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_default_options_merge_all_text_parts_into_one()
|
||||||
|
test_merge_none_preserves_all_text_parts()
|
||||||
|
test_explicit_merge_strategy_still_merges()
|
||||||
|
print("\nAll merge-strategy tests passed.")
|
||||||
81
tests/unit/services/test_ingestion_hash_stability.py
Normal file
81
tests/unit/services/test_ingestion_hash_stability.py
Normal file
|
|
@ -0,0 +1,81 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Test that _computeIngestionHash is stable across re-extractions of the same source.
|
||||||
|
|
||||||
|
Extractors generate fresh contentObjectIds (uuid.uuid4()) per run. The ingestion
|
||||||
|
hash MUST therefore be derived from content (contentType + data + order) only —
|
||||||
|
otherwise idempotency (AC4) silently fails: every re-extraction looks "new" and
|
||||||
|
triggers full re-embedding.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import (
|
||||||
|
_computeIngestionHash,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _makeObjects(seed: str = "alpha"):
|
||||||
|
"""Build a synthetic contentObjects list as routeDataFiles._autoIndexFile would."""
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"contentObjectId": str(uuid.uuid4()),
|
||||||
|
"contentType": "text",
|
||||||
|
"data": f"Page 1 of {seed}",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"contentObjectId": str(uuid.uuid4()),
|
||||||
|
"contentType": "text",
|
||||||
|
"data": f"Page 2 of {seed}",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"contentObjectId": str(uuid.uuid4()),
|
||||||
|
"contentType": "binary",
|
||||||
|
"data": "<image-bytes-as-b64>",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_hash_stable_across_uuid_regeneration():
|
||||||
|
"""Same content + different contentObjectIds → same hash."""
|
||||||
|
a = _makeObjects("alpha")
|
||||||
|
b = _makeObjects("alpha") # identical data, fresh UUIDs
|
||||||
|
assert [o["contentObjectId"] for o in a] != [o["contentObjectId"] for o in b]
|
||||||
|
assert _computeIngestionHash(a) == _computeIngestionHash(b)
|
||||||
|
|
||||||
|
|
||||||
|
def test_hash_changes_when_data_changes():
|
||||||
|
a = _makeObjects("alpha")
|
||||||
|
b = _makeObjects("beta")
|
||||||
|
assert _computeIngestionHash(a) != _computeIngestionHash(b)
|
||||||
|
|
||||||
|
|
||||||
|
def test_hash_is_order_sensitive():
|
||||||
|
"""Reordered pages produce a different hash (different document)."""
|
||||||
|
a = _makeObjects("alpha")
|
||||||
|
b = list(reversed(a))
|
||||||
|
assert _computeIngestionHash(a) != _computeIngestionHash(b)
|
||||||
|
|
||||||
|
|
||||||
|
def test_hash_distinguishes_text_vs_binary_with_same_payload():
|
||||||
|
a = [{"contentObjectId": "x", "contentType": "text", "data": "hello"}]
|
||||||
|
b = [{"contentObjectId": "x", "contentType": "binary", "data": "hello"}]
|
||||||
|
assert _computeIngestionHash(a) != _computeIngestionHash(b)
|
||||||
|
|
||||||
|
|
||||||
|
def test_hash_handles_empty_input():
|
||||||
|
assert _computeIngestionHash([]) == _computeIngestionHash([])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_hash_stable_across_uuid_regeneration()
|
||||||
|
test_hash_changes_when_data_changes()
|
||||||
|
test_hash_is_order_sensitive()
|
||||||
|
test_hash_distinguishes_text_vs_binary_with_same_payload()
|
||||||
|
test_hash_handles_empty_input()
|
||||||
|
print("OK — all 5 ingestion-hash stability tests passed")
|
||||||
235
tests/unit/services/test_knowledge_ingest_consumer.py
Normal file
235
tests/unit/services/test_knowledge_ingest_consumer.py
Normal file
|
|
@ -0,0 +1,235 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Unit tests for KnowledgeIngestionConsumer event dispatch.
|
||||||
|
|
||||||
|
- `connection.established` → enqueue a `connection.bootstrap` job.
|
||||||
|
- `connection.revoked` → synchronous purge via KnowledgeObjects.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge import subConnectorIngestConsumer as consumer
|
||||||
|
|
||||||
|
|
||||||
|
def _resetRegistration(monkeypatch):
|
||||||
|
"""Force the module-level guard to register fresh in each test."""
|
||||||
|
monkeypatch.setattr(consumer, "_registered", False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_onConnectionEstablished_enqueues_bootstrap(monkeypatch):
|
||||||
|
startedJobs = []
|
||||||
|
|
||||||
|
async def _fakeStartJob(jobType, payload, **kwargs):
|
||||||
|
startedJobs.append({"jobType": jobType, "payload": payload, "kwargs": kwargs})
|
||||||
|
return "job-1"
|
||||||
|
|
||||||
|
monkeypatch.setattr(consumer, "startJob", _fakeStartJob)
|
||||||
|
consumer._onConnectionEstablished(
|
||||||
|
connectionId="c1", authority="msft", userId="u1"
|
||||||
|
)
|
||||||
|
# Drain pending tasks created by the consumer.
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
try:
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
# If the consumer created a Task on a closed loop the fake startJob
|
||||||
|
# was still called synchronously via asyncio.run — in either case we
|
||||||
|
# check the recorded call.
|
||||||
|
finally:
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
assert len(startedJobs) == 1
|
||||||
|
assert startedJobs[0]["jobType"] == consumer.BOOTSTRAP_JOB_TYPE
|
||||||
|
assert startedJobs[0]["payload"]["connectionId"] == "c1"
|
||||||
|
assert startedJobs[0]["payload"]["authority"] == "msft"
|
||||||
|
assert startedJobs[0]["kwargs"]["triggeredBy"] == "u1"
|
||||||
|
|
||||||
|
|
||||||
|
def test_onConnectionEstablished_ignores_missing_id(monkeypatch):
|
||||||
|
called = []
|
||||||
|
|
||||||
|
async def _fakeStartJob(*a, **kw):
|
||||||
|
called.append(1)
|
||||||
|
return "x"
|
||||||
|
|
||||||
|
monkeypatch.setattr(consumer, "startJob", _fakeStartJob)
|
||||||
|
consumer._onConnectionEstablished(connectionId="", authority="msft")
|
||||||
|
assert called == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_onConnectionRevoked_runs_sync_purge(monkeypatch):
|
||||||
|
class _FakeKnowledge:
|
||||||
|
def __init__(self):
|
||||||
|
self.calls = []
|
||||||
|
|
||||||
|
def deleteFileContentIndexByConnectionId(self, cid):
|
||||||
|
self.calls.append(cid)
|
||||||
|
return {"indexRows": 2, "chunks": 5}
|
||||||
|
|
||||||
|
fakeKnow = _FakeKnowledge()
|
||||||
|
|
||||||
|
def _fakeGetInterface(_user=None):
|
||||||
|
return fakeKnow
|
||||||
|
|
||||||
|
monkeypatch.setattr(consumer, "getKnowledgeInterface", _fakeGetInterface)
|
||||||
|
consumer._onConnectionRevoked(
|
||||||
|
connectionId="c1", authority="msft", userId="u1", reason="disconnected"
|
||||||
|
)
|
||||||
|
assert fakeKnow.calls == ["c1"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_onConnectionRevoked_ignores_missing_id(monkeypatch):
|
||||||
|
seen = []
|
||||||
|
|
||||||
|
def _fakeGetInterface(_user=None):
|
||||||
|
class _K:
|
||||||
|
def deleteFileContentIndexByConnectionId(self, cid):
|
||||||
|
seen.append(cid)
|
||||||
|
return {"indexRows": 0, "chunks": 0}
|
||||||
|
|
||||||
|
return _K()
|
||||||
|
|
||||||
|
monkeypatch.setattr(consumer, "getKnowledgeInterface", _fakeGetInterface)
|
||||||
|
consumer._onConnectionRevoked(connectionId="")
|
||||||
|
assert seen == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_job_skips_unsupported_authority(monkeypatch):
|
||||||
|
async def _run():
|
||||||
|
result = await consumer._bootstrapJobHandler(
|
||||||
|
{"payload": {"connectionId": "c1", "authority": "slack"}},
|
||||||
|
lambda *_: None,
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert result["skipped"] is True
|
||||||
|
assert result["authority"] == "slack"
|
||||||
|
assert result["reason"] == "unsupported_authority"
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_job_dispatches_msft_parts(monkeypatch):
|
||||||
|
calls = {"sp": 0, "ol": 0}
|
||||||
|
|
||||||
|
async def _fakeSp(connectionId, progressCb=None):
|
||||||
|
calls["sp"] += 1
|
||||||
|
return {"indexed": 1}
|
||||||
|
|
||||||
|
async def _fakeOl(connectionId, progressCb=None):
|
||||||
|
calls["ol"] += 1
|
||||||
|
return {"indexed": 2}
|
||||||
|
|
||||||
|
fakeSharepoint = types.ModuleType("subConnectorSyncSharepoint")
|
||||||
|
fakeSharepoint.bootstrapSharepoint = _fakeSp
|
||||||
|
fakeOutlook = types.ModuleType("subConnectorSyncOutlook")
|
||||||
|
fakeOutlook.bootstrapOutlook = _fakeOl
|
||||||
|
monkeypatch.setitem(
|
||||||
|
sys.modules,
|
||||||
|
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint",
|
||||||
|
fakeSharepoint,
|
||||||
|
)
|
||||||
|
monkeypatch.setitem(
|
||||||
|
sys.modules,
|
||||||
|
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncOutlook",
|
||||||
|
fakeOutlook,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await consumer._bootstrapJobHandler(
|
||||||
|
{"payload": {"connectionId": "c1", "authority": "msft"}},
|
||||||
|
lambda *_: None,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert calls == {"sp": 1, "ol": 1}
|
||||||
|
assert result["sharepoint"] == {"indexed": 1}
|
||||||
|
assert result["outlook"] == {"indexed": 2}
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_job_dispatches_google_parts(monkeypatch):
|
||||||
|
calls = {"gd": 0, "gm": 0}
|
||||||
|
|
||||||
|
async def _fakeGd(connectionId, progressCb=None):
|
||||||
|
calls["gd"] += 1
|
||||||
|
return {"indexed": 7}
|
||||||
|
|
||||||
|
async def _fakeGm(connectionId, progressCb=None):
|
||||||
|
calls["gm"] += 1
|
||||||
|
return {"indexed": 11}
|
||||||
|
|
||||||
|
fakeGdrive = types.ModuleType("subConnectorSyncGdrive")
|
||||||
|
fakeGdrive.bootstrapGdrive = _fakeGd
|
||||||
|
fakeGmail = types.ModuleType("subConnectorSyncGmail")
|
||||||
|
fakeGmail.bootstrapGmail = _fakeGm
|
||||||
|
monkeypatch.setitem(
|
||||||
|
sys.modules,
|
||||||
|
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGdrive",
|
||||||
|
fakeGdrive,
|
||||||
|
)
|
||||||
|
monkeypatch.setitem(
|
||||||
|
sys.modules,
|
||||||
|
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail",
|
||||||
|
fakeGmail,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await consumer._bootstrapJobHandler(
|
||||||
|
{"payload": {"connectionId": "c1", "authority": "google"}},
|
||||||
|
lambda *_: None,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert calls == {"gd": 1, "gm": 1}
|
||||||
|
assert result["drive"] == {"indexed": 7}
|
||||||
|
assert result["gmail"] == {"indexed": 11}
|
||||||
|
|
||||||
|
|
||||||
|
def test_bootstrap_job_dispatches_clickup_part(monkeypatch):
|
||||||
|
calls = {"cu": 0}
|
||||||
|
|
||||||
|
async def _fakeCu(connectionId, progressCb=None):
|
||||||
|
calls["cu"] += 1
|
||||||
|
return {"indexed": 4}
|
||||||
|
|
||||||
|
fakeClickup = types.ModuleType("subConnectorSyncClickup")
|
||||||
|
fakeClickup.bootstrapClickup = _fakeCu
|
||||||
|
monkeypatch.setitem(
|
||||||
|
sys.modules,
|
||||||
|
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup",
|
||||||
|
fakeClickup,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
return await consumer._bootstrapJobHandler(
|
||||||
|
{"payload": {"connectionId": "c1", "authority": "clickup"}},
|
||||||
|
lambda *_: None,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(_run())
|
||||||
|
assert calls == {"cu": 1}
|
||||||
|
assert result["clickup"] == {"indexed": 4}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Usable without pytest fixtures for a quick smoke run.
|
||||||
|
class _MP:
|
||||||
|
def __init__(self):
|
||||||
|
self.undos = []
|
||||||
|
|
||||||
|
def setattr(self, target, name_or_value, value=None):
|
||||||
|
if value is None:
|
||||||
|
# target is an object, name_or_value is value → no, original signature
|
||||||
|
raise SystemExit("use pytest monkeypatch in CLI")
|
||||||
|
self.undos.append((target, name_or_value, getattr(target, name_or_value)))
|
||||||
|
setattr(target, name_or_value, value)
|
||||||
|
|
||||||
|
def setitem(self, mapping, key, value):
|
||||||
|
self.undos.append((mapping, key, mapping.get(key)))
|
||||||
|
mapping[key] = value
|
||||||
|
|
||||||
|
print("Run via pytest: pytest tests/unit/services/test_knowledge_ingest_consumer.py")
|
||||||
298
tests/unit/services/test_p1d_consent_prefs.py
Normal file
298
tests/unit/services/test_p1d_consent_prefs.py
Normal file
|
|
@ -0,0 +1,298 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Unit tests for P1d: consent gating, preference parsing, and walker behaviour.
|
||||||
|
|
||||||
|
Tests
|
||||||
|
-----
|
||||||
|
1. Bootstrap runner skips when ``knowledgeIngestionEnabled=False``.
|
||||||
|
2. ``loadConnectionPrefs`` returns safe defaults when preferences are absent.
|
||||||
|
3. ``loadConnectionPrefs`` maps all §2.6 keys correctly from a full prefs dict.
|
||||||
|
4. Gmail walker passes ``neutralize=True`` and ``mailContentDepth`` to IngestionJob.
|
||||||
|
5. Gmail walker produces only a header content-object when depth="metadata".
|
||||||
|
6. ClickUp walker skips description when scope="titles".
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
|
import unittest
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# 1. Bootstrap runner consent gate
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestBootstrapConsentGate(unittest.TestCase):
|
||||||
|
"""_bootstrapJobHandler must no-op when knowledgeIngestionEnabled is False."""
|
||||||
|
|
||||||
|
def _makeJob(self, connectionId="c-test", authority="google"):
|
||||||
|
return {"payload": {"connectionId": connectionId, "authority": authority}}
|
||||||
|
|
||||||
|
def _makeConn(self, enabled: bool):
|
||||||
|
conn = MagicMock()
|
||||||
|
conn.knowledgeIngestionEnabled = enabled
|
||||||
|
return conn
|
||||||
|
|
||||||
|
def test_skips_when_consent_disabled(self):
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge import subConnectorIngestConsumer as sut
|
||||||
|
|
||||||
|
fake_root = MagicMock()
|
||||||
|
fake_root.getUserConnectionById.return_value = self._makeConn(False)
|
||||||
|
|
||||||
|
with patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=fake_root):
|
||||||
|
result = asyncio.get_event_loop().run_until_complete(
|
||||||
|
sut._bootstrapJobHandler(self._makeJob(), lambda *a: None)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result.get("skipped") is True
|
||||||
|
assert result.get("reason") == "consent_disabled"
|
||||||
|
fake_root.getUserConnectionById.assert_called_once_with("c-test")
|
||||||
|
|
||||||
|
def test_proceeds_when_consent_enabled(self):
|
||||||
|
"""When consent is enabled, the handler should call at least one walker."""
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge import subConnectorIngestConsumer as sut
|
||||||
|
|
||||||
|
fake_root = MagicMock()
|
||||||
|
fake_root.getUserConnectionById.return_value = self._makeConn(True)
|
||||||
|
|
||||||
|
# Patch the inner walker so it doesn't do real I/O.
|
||||||
|
async def _fakeBootstrap(**kwargs):
|
||||||
|
return {"indexed": 0}
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=fake_root),
|
||||||
|
patch(
|
||||||
|
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGdrive.bootstrapGdrive",
|
||||||
|
new=AsyncMock(return_value={"indexed": 0}),
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail.bootstrapGmail",
|
||||||
|
new=AsyncMock(return_value={"indexed": 0}),
|
||||||
|
),
|
||||||
|
):
|
||||||
|
result = asyncio.get_event_loop().run_until_complete(
|
||||||
|
sut._bootstrapJobHandler(self._makeJob(authority="google"), lambda *a: None)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should not have 'skipped' at the top level.
|
||||||
|
assert result.get("skipped") is not True
|
||||||
|
assert result.get("authority") == "google"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# 2 + 3. loadConnectionPrefs
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestLoadConnectionPrefs(unittest.TestCase):
|
||||||
|
def _makeConn(self, prefs: Optional[Dict[str, Any]]):
|
||||||
|
conn = MagicMock()
|
||||||
|
conn.knowledgePreferences = prefs
|
||||||
|
return conn
|
||||||
|
|
||||||
|
def _mockRoot(self, prefs):
|
||||||
|
root = MagicMock()
|
||||||
|
root.getUserConnectionById.return_value = self._makeConn(prefs)
|
||||||
|
return root
|
||||||
|
|
||||||
|
def test_returns_safe_defaults_when_prefs_none(self):
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import (
|
||||||
|
ConnectionIngestionPrefs,
|
||||||
|
loadConnectionPrefs,
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=self._mockRoot(None)):
|
||||||
|
prefs = loadConnectionPrefs("x")
|
||||||
|
|
||||||
|
assert prefs.neutralizeBeforeEmbed is False
|
||||||
|
assert prefs.mailContentDepth == "full"
|
||||||
|
assert prefs.mailIndexAttachments is False
|
||||||
|
assert prefs.maxAgeDays == 90
|
||||||
|
assert prefs.clickupScope == "title_description"
|
||||||
|
assert prefs.gmailEnabled is True
|
||||||
|
assert prefs.driveEnabled is True
|
||||||
|
|
||||||
|
def test_maps_all_keys(self):
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||||
|
|
||||||
|
raw = {
|
||||||
|
"neutralizeBeforeEmbed": True,
|
||||||
|
"mailContentDepth": "metadata",
|
||||||
|
"mailIndexAttachments": True,
|
||||||
|
"filesIndexBinaries": False,
|
||||||
|
"clickupScope": "with_comments",
|
||||||
|
"maxAgeDays": 30,
|
||||||
|
"surfaceToggles": {
|
||||||
|
"google": {"gmail": False, "drive": True},
|
||||||
|
"msft": {"sharepoint": False, "outlook": True},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=self._mockRoot(raw)):
|
||||||
|
prefs = loadConnectionPrefs("x")
|
||||||
|
|
||||||
|
assert prefs.neutralizeBeforeEmbed is True
|
||||||
|
assert prefs.mailContentDepth == "metadata"
|
||||||
|
assert prefs.mailIndexAttachments is True
|
||||||
|
assert prefs.filesIndexBinaries is False
|
||||||
|
assert prefs.clickupScope == "with_comments"
|
||||||
|
assert prefs.maxAgeDays == 30
|
||||||
|
assert prefs.gmailEnabled is False
|
||||||
|
assert prefs.driveEnabled is True
|
||||||
|
assert prefs.sharepointEnabled is False
|
||||||
|
assert prefs.outlookEnabled is True
|
||||||
|
|
||||||
|
def test_invalid_depth_falls_back_to_default(self):
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
|
||||||
|
|
||||||
|
raw = {"mailContentDepth": "everything_please"}
|
||||||
|
|
||||||
|
with patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=self._mockRoot(raw)):
|
||||||
|
prefs = loadConnectionPrefs("x")
|
||||||
|
|
||||||
|
assert prefs.mailContentDepth == "full"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# 4. Gmail walker passes neutralize + mailContentDepth to IngestionJob
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestGmailWalkerPrefs(unittest.TestCase):
|
||||||
|
def _make_message(self, *, subject="Test", snippet="hello", body_text="full body"):
|
||||||
|
import base64
|
||||||
|
encoded = base64.urlsafe_b64encode(body_text.encode()).decode()
|
||||||
|
return {
|
||||||
|
"id": "msg-1",
|
||||||
|
"historyId": "h-42",
|
||||||
|
"threadId": "t-1",
|
||||||
|
"snippet": snippet,
|
||||||
|
"payload": {
|
||||||
|
"mimeType": "multipart/alternative",
|
||||||
|
"headers": [
|
||||||
|
{"name": "Subject", "value": subject},
|
||||||
|
{"name": "From", "value": "alice@example.com"},
|
||||||
|
{"name": "To", "value": "bob@example.com"},
|
||||||
|
{"name": "Date", "value": "Mon, 20 Apr 2026 10:00:00 +0000"},
|
||||||
|
],
|
||||||
|
"parts": [
|
||||||
|
{
|
||||||
|
"mimeType": "text/plain",
|
||||||
|
"body": {"data": encoded},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_neutralize_flag_forwarded(self):
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
|
||||||
|
GmailBootstrapLimits,
|
||||||
|
_ingestMessage,
|
||||||
|
GmailBootstrapResult,
|
||||||
|
)
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
|
||||||
|
|
||||||
|
captured_jobs = []
|
||||||
|
|
||||||
|
async def fake_requestIngestion(job: IngestionJob):
|
||||||
|
captured_jobs.append(job)
|
||||||
|
return MagicMock(status="indexed", error=None)
|
||||||
|
|
||||||
|
ks = MagicMock()
|
||||||
|
ks.requestIngestion = fake_requestIngestion
|
||||||
|
|
||||||
|
limits = GmailBootstrapLimits(neutralize=True, mailContentDepth="full")
|
||||||
|
result = GmailBootstrapResult(connectionId="c-1")
|
||||||
|
|
||||||
|
asyncio.get_event_loop().run_until_complete(
|
||||||
|
_ingestMessage(
|
||||||
|
googleGetFn=AsyncMock(return_value={}),
|
||||||
|
knowledgeService=ks,
|
||||||
|
connectionId="c-1",
|
||||||
|
mandateId="",
|
||||||
|
userId="u-1",
|
||||||
|
labelId="INBOX",
|
||||||
|
message=self._make_message(),
|
||||||
|
limits=limits,
|
||||||
|
result=result,
|
||||||
|
progressCb=None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(captured_jobs) == 1
|
||||||
|
assert captured_jobs[0].neutralize is True
|
||||||
|
|
||||||
|
def test_metadata_depth_yields_only_header(self):
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
|
||||||
|
_buildContentObjects,
|
||||||
|
)
|
||||||
|
|
||||||
|
message = self._make_message(snippet="hi", body_text="should be excluded")
|
||||||
|
parts = _buildContentObjects(message, maxBodyChars=4000, mailContentDepth="metadata")
|
||||||
|
ids = [p["contentObjectId"] for p in parts]
|
||||||
|
assert ids == ["header"]
|
||||||
|
|
||||||
|
def test_snippet_depth_yields_header_and_snippet(self):
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
|
||||||
|
_buildContentObjects,
|
||||||
|
)
|
||||||
|
|
||||||
|
message = self._make_message(snippet="hi", body_text="should be excluded")
|
||||||
|
parts = _buildContentObjects(message, maxBodyChars=4000, mailContentDepth="snippet")
|
||||||
|
ids = [p["contentObjectId"] for p in parts]
|
||||||
|
assert "header" in ids
|
||||||
|
assert "snippet" in ids
|
||||||
|
assert "body" not in ids
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# 5. ClickUp walker respects clickupScope="titles"
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestClickupWalkerScope(unittest.TestCase):
|
||||||
|
def _make_task(self):
|
||||||
|
return {
|
||||||
|
"id": "task-1",
|
||||||
|
"name": "Ship feature X",
|
||||||
|
"date_updated": "1713888000000",
|
||||||
|
"description": "This should be omitted",
|
||||||
|
"text_content": "Also omitted",
|
||||||
|
"status": {"status": "open"},
|
||||||
|
"assignees": [],
|
||||||
|
"tags": [],
|
||||||
|
"list": {"name": "Backlog"},
|
||||||
|
"folder": {},
|
||||||
|
"space": {"name": "Engineering"},
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_titles_scope_omits_description(self):
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup import (
|
||||||
|
ClickupBootstrapLimits,
|
||||||
|
_buildContentObjects,
|
||||||
|
)
|
||||||
|
|
||||||
|
limits = ClickupBootstrapLimits(clickupScope="titles")
|
||||||
|
parts = _buildContentObjects(self._make_task(), limits)
|
||||||
|
ids = [p["contentObjectId"] for p in parts]
|
||||||
|
assert ids == ["header"]
|
||||||
|
assert "description" not in ids
|
||||||
|
|
||||||
|
def test_with_description_scope_includes_description(self):
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup import (
|
||||||
|
ClickupBootstrapLimits,
|
||||||
|
_buildContentObjects,
|
||||||
|
)
|
||||||
|
|
||||||
|
limits = ClickupBootstrapLimits(clickupScope="title_description")
|
||||||
|
parts = _buildContentObjects(self._make_task(), limits)
|
||||||
|
ids = [p["contentObjectId"] for p in parts]
|
||||||
|
assert "header" in ids
|
||||||
|
assert "description" in ids
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
206
tests/unit/workflows/test_parameterValidation.py
Normal file
206
tests/unit/workflows/test_parameterValidation.py
Normal file
|
|
@ -0,0 +1,206 @@
|
||||||
|
# Copyright (c) 2026 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""Unit tests: universal action parameter validation + coercion.
|
||||||
|
|
||||||
|
This is the single source of truth for the action parameter contract:
|
||||||
|
every workflow action (called via the agent, the workflow graph, or REST)
|
||||||
|
runs through ``validateAndCoerceParameters`` before its body executes.
|
||||||
|
|
||||||
|
The tests pin three groups of behaviour:
|
||||||
|
|
||||||
|
1. **Required-parameter enforcement** — missing required params raise a
|
||||||
|
typed ``InvalidActionParameterError`` instead of an opaque downstream
|
||||||
|
error.
|
||||||
|
2. **Ref-payload normalization** — the agent's typed tool schema delivers
|
||||||
|
``FeatureInstanceRef`` as ``{id: ..., featureCode: ...}``, but actions
|
||||||
|
expect a bare UUID string. Collapsing happens here, not in N action
|
||||||
|
bodies.
|
||||||
|
3. **Primitive coercion** — ``"true"``/``"12"``/``"3.14"`` from JSON-shaped
|
||||||
|
payloads are coerced to bool/int/float, removing ad-hoc branches.
|
||||||
|
|
||||||
|
Unknown extra keys (e.g. ``parentOperationId``) flow through unchanged so
|
||||||
|
the executor can keep injecting cross-cutting context.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from modules.datamodels.datamodelWorkflowActions import (
|
||||||
|
WorkflowActionDefinition, WorkflowActionParameter,
|
||||||
|
)
|
||||||
|
from modules.shared.frontendTypes import FrontendType
|
||||||
|
from modules.workflows.processing.shared.parameterValidation import (
|
||||||
|
InvalidActionParameterError, validateAndCoerceParameters,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _makeActionDef(actionId: str = "trustee.refreshAccountingData", **paramDefs) -> WorkflowActionDefinition:
|
||||||
|
"""Build a real WorkflowActionDefinition; we only care about parameters."""
|
||||||
|
parameters = {
|
||||||
|
name: WorkflowActionParameter(
|
||||||
|
name=name,
|
||||||
|
type=spec["type"],
|
||||||
|
frontendType=FrontendType.TEXT,
|
||||||
|
required=spec.get("required", False),
|
||||||
|
description=spec.get("description", ""),
|
||||||
|
)
|
||||||
|
for name, spec in paramDefs.items()
|
||||||
|
}
|
||||||
|
return WorkflowActionDefinition(
|
||||||
|
actionId=actionId,
|
||||||
|
description="Test action",
|
||||||
|
parameters=parameters,
|
||||||
|
execute=lambda *_a, **_kw: None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestRequiredEnforcement:
|
||||||
|
def test_missingRequiredRaises(self):
|
||||||
|
actionDef = _makeActionDef(
|
||||||
|
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
|
||||||
|
)
|
||||||
|
with pytest.raises(InvalidActionParameterError) as excinfo:
|
||||||
|
validateAndCoerceParameters(actionDef, {})
|
||||||
|
assert excinfo.value.paramName == "featureInstanceId"
|
||||||
|
assert "required" in excinfo.value.reason.lower()
|
||||||
|
assert "trustee.refreshAccountingData.featureInstanceId" in str(excinfo.value)
|
||||||
|
|
||||||
|
def test_optionalMissingIsFine(self):
|
||||||
|
actionDef = _makeActionDef(
|
||||||
|
forceRefresh={"type": "bool", "required": False},
|
||||||
|
)
|
||||||
|
result = validateAndCoerceParameters(actionDef, {})
|
||||||
|
assert result == {}
|
||||||
|
|
||||||
|
def test_requiredNoneCountsAsMissing(self):
|
||||||
|
"""Explicit ``None`` for a required param is missing, not "unset"."""
|
||||||
|
actionDef = _makeActionDef(
|
||||||
|
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
|
||||||
|
)
|
||||||
|
with pytest.raises(InvalidActionParameterError):
|
||||||
|
validateAndCoerceParameters(actionDef, {"featureInstanceId": None})
|
||||||
|
|
||||||
|
|
||||||
|
class TestRefNormalization:
|
||||||
|
"""Trustee bug regression: agent passed `{id: ..., featureCode: ...}` and
|
||||||
|
Postgres failed with "can't adapt type 'dict'", which the connector
|
||||||
|
silently turned into "no record found"."""
|
||||||
|
|
||||||
|
def test_collapsesDictWithIdToString(self):
|
||||||
|
actionDef = _makeActionDef(
|
||||||
|
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
|
||||||
|
)
|
||||||
|
result = validateAndCoerceParameters(actionDef, {
|
||||||
|
"featureInstanceId": {
|
||||||
|
"id": "b7574103-f4a3-4894-8c23-74bd0d0e83a5",
|
||||||
|
"featureCode": "trustee",
|
||||||
|
"label": "Demo AG",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
assert result["featureInstanceId"] == "b7574103-f4a3-4894-8c23-74bd0d0e83a5"
|
||||||
|
|
||||||
|
def test_passThroughString(self):
|
||||||
|
"""Workflow execution path passes a plain UUID; must not break."""
|
||||||
|
actionDef = _makeActionDef(
|
||||||
|
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
|
||||||
|
)
|
||||||
|
uuid = "b7574103-f4a3-4894-8c23-74bd0d0e83a5"
|
||||||
|
result = validateAndCoerceParameters(actionDef, {"featureInstanceId": uuid})
|
||||||
|
assert result["featureInstanceId"] == uuid
|
||||||
|
|
||||||
|
def test_dictWithoutIdRaises(self):
|
||||||
|
actionDef = _makeActionDef(
|
||||||
|
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
|
||||||
|
)
|
||||||
|
with pytest.raises(InvalidActionParameterError) as excinfo:
|
||||||
|
validateAndCoerceParameters(actionDef, {
|
||||||
|
"featureInstanceId": {"featureCode": "trustee", "label": "Demo"},
|
||||||
|
})
|
||||||
|
assert "id" in excinfo.value.reason
|
||||||
|
|
||||||
|
def test_otherDictTypeRaises(self):
|
||||||
|
actionDef = _makeActionDef(
|
||||||
|
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
|
||||||
|
)
|
||||||
|
with pytest.raises(InvalidActionParameterError):
|
||||||
|
validateAndCoerceParameters(actionDef, {"featureInstanceId": 12345})
|
||||||
|
|
||||||
|
def test_connectionRefAlsoCollapses(self):
|
||||||
|
"""Same logic applies to every Ref-Schema, not just FeatureInstanceRef."""
|
||||||
|
actionDef = _makeActionDef(
|
||||||
|
actionId="msft.readEmails",
|
||||||
|
connection={"type": "ConnectionRef", "required": True},
|
||||||
|
)
|
||||||
|
result = validateAndCoerceParameters(actionDef, {
|
||||||
|
"connection": {"id": "conn-uuid-123", "authority": "msft", "label": "Outlook"},
|
||||||
|
})
|
||||||
|
assert result["connection"] == "conn-uuid-123"
|
||||||
|
|
||||||
|
|
||||||
|
class TestPrimitiveCoercion:
|
||||||
|
def test_boolFromTrueString(self):
|
||||||
|
actionDef = _makeActionDef(forceRefresh={"type": "bool", "required": False})
|
||||||
|
result = validateAndCoerceParameters(actionDef, {"forceRefresh": "true"})
|
||||||
|
assert result["forceRefresh"] is True
|
||||||
|
|
||||||
|
def test_boolFromFalseString(self):
|
||||||
|
actionDef = _makeActionDef(forceRefresh={"type": "bool", "required": False})
|
||||||
|
result = validateAndCoerceParameters(actionDef, {"forceRefresh": "false"})
|
||||||
|
assert result["forceRefresh"] is False
|
||||||
|
|
||||||
|
def test_boolPassthrough(self):
|
||||||
|
actionDef = _makeActionDef(forceRefresh={"type": "bool", "required": False})
|
||||||
|
assert validateAndCoerceParameters(actionDef, {"forceRefresh": True})["forceRefresh"] is True
|
||||||
|
|
||||||
|
def test_boolBadValueRaises(self):
|
||||||
|
actionDef = _makeActionDef(forceRefresh={"type": "bool", "required": False})
|
||||||
|
with pytest.raises(InvalidActionParameterError):
|
||||||
|
validateAndCoerceParameters(actionDef, {"forceRefresh": "maybe"})
|
||||||
|
|
||||||
|
def test_intFromString(self):
|
||||||
|
actionDef = _makeActionDef(periodMonth={"type": "int", "required": False})
|
||||||
|
assert validateAndCoerceParameters(actionDef, {"periodMonth": "12"})["periodMonth"] == 12
|
||||||
|
|
||||||
|
def test_intBadValueRaises(self):
|
||||||
|
actionDef = _makeActionDef(periodMonth={"type": "int", "required": False})
|
||||||
|
with pytest.raises(InvalidActionParameterError):
|
||||||
|
validateAndCoerceParameters(actionDef, {"periodMonth": "twelve"})
|
||||||
|
|
||||||
|
def test_floatFromString(self):
|
||||||
|
actionDef = _makeActionDef(threshold={"type": "float", "required": False})
|
||||||
|
assert validateAndCoerceParameters(actionDef, {"threshold": "0.75"})["threshold"] == 0.75
|
||||||
|
|
||||||
|
|
||||||
|
class TestUnknownAndOtherTypes:
|
||||||
|
def test_unknownKeysPassThrough(self):
|
||||||
|
"""The executor injects parentOperationId, expectedDocumentFormats, etc.
|
||||||
|
Validation must not strip them."""
|
||||||
|
actionDef = _makeActionDef(
|
||||||
|
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
|
||||||
|
)
|
||||||
|
result = validateAndCoerceParameters(actionDef, {
|
||||||
|
"featureInstanceId": "uuid-123",
|
||||||
|
"parentOperationId": "action_xyz",
|
||||||
|
"expectedDocumentFormats": ["pdf", "txt"],
|
||||||
|
})
|
||||||
|
assert result["parentOperationId"] == "action_xyz"
|
||||||
|
assert result["expectedDocumentFormats"] == ["pdf", "txt"]
|
||||||
|
|
||||||
|
def test_strParamsAreUntouched(self):
|
||||||
|
actionDef = _makeActionDef(dateFrom={"type": "str", "required": False})
|
||||||
|
assert validateAndCoerceParameters(actionDef, {"dateFrom": "2025-01-01"})["dateFrom"] == "2025-01-01"
|
||||||
|
|
||||||
|
def test_listParamsAreUntouched(self):
|
||||||
|
actionDef = _makeActionDef(documentList={"type": "List[ActionDocument]", "required": False})
|
||||||
|
docs = [{"name": "a"}, {"name": "b"}]
|
||||||
|
assert validateAndCoerceParameters(actionDef, {"documentList": docs})["documentList"] is docs
|
||||||
|
|
||||||
|
def test_doesNotMutateInput(self):
|
||||||
|
"""validateAndCoerceParameters must return a new dict."""
|
||||||
|
actionDef = _makeActionDef(
|
||||||
|
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
|
||||||
|
)
|
||||||
|
original = {"featureInstanceId": {"id": "uuid", "featureCode": "trustee"}}
|
||||||
|
result = validateAndCoerceParameters(actionDef, original)
|
||||||
|
assert isinstance(original["featureInstanceId"], dict)
|
||||||
|
assert result["featureInstanceId"] == "uuid"
|
||||||
Loading…
Reference in a new issue