feat: app-scheduler ausgebaut um nachts bestehende connections zu indexieren

added neutralization option to indexing new connections
feat: frontend consent integration
2026-04-29 14:39:40 +02:00 · 2026-04-29 14:39:40 +02:00 · 2026-04-29 14:39:40 +02:00 · 2026-04-29 14:39:40 +02:00 · 2026-04-29 14:39:40 +02:00 · 2026-04-29 14:39:40 +02:00
76 changed files with 9400 additions and 942 deletions
--- a/app.py
+++ b/app.py
@ -405,6 +405,16 @@ async def lifespan(app: FastAPI):
    except Exception as e:
        logger.warning(f"BackgroundJob recovery failed (non-critical): {e}")
    # Subscribe knowledge ingestion to connection lifecycle events so OAuth
    # connect/disconnect reliably trigger bootstrap/purge.
    try:
        from modules.serviceCenter.services.serviceKnowledge.subConnectorIngestConsumer import (
            registerKnowledgeIngestionConsumer,
        )
        registerKnowledgeIngestionConsumer()
    except Exception as e:
        logger.warning(f"KnowledgeIngestionConsumer registration failed (non-critical): {e}")
    yield
    # ---  Stop Managers ---
--- a/env_dev.20260428_213450.backup
+++ b/env_dev.20260428_213450.backup
@ -0,0 +1,107 @@
 # Development Environment Configuration
 # System Configuration
 APP_ENV_TYPE = dev
 APP_ENV_LABEL = Development Instance Patrick
 APP_API_URL = http://localhost:8000
 APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/notes/key.txt
 APP_INIT_PASS_ADMIN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEeFFtRGtQeVUtcjlrU3dab1ZxUm9WSks0MlJVYUtERFlqUElHemZrOGNENk1tcmJNX3Vxc01UMDhlNU40VzZZRVBpUGNmT3podzZrOGhOeEJIUEt4eVlSWG5UYXA3d09DVXlLT21Kb1JYSUU9
 APP_INIT_PASS_EVENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERzZjNm56WGVBdjJTeG5Udjd6OGQwUVotYXUzQjJ1YVNyVXVBa3NZVml3ODU0MVNkZjhWWmJwNUFkc19BcHlHMTU1Q3BRcHU0cDBoZkFlR2l6UEZQU3d2U3MtMDh5UDZteGFoQ0EyMUE1ckE9
 # PostgreSQL DB Host
 DB_HOST=localhost
 DB_USER=poweron_dev
 DB_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEcUIxNEFfQ2xnS0RrSC1KNnUxTlVvTGZoMHgzaEI4Z3NlVzVROTVLak5Ubi1vaEZubFZaMTFKMGd6MXAxekN2d2NvMy1hRjg2UVhybktlcFA5anZ1WjFlQmZhcXdwaGhWdzRDc3ExeUhzWTg9
 DB_PORT=5432
 # Security Configuration
 APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2ZlUFRlcFdOZ001RnlzM2FhLWhRV2tjWWFhaWQwQ3hkcUFvbThMcndxSjFpYTdfRV9OZGhTcksxbXFTZWg5MDZvOHpCVXBHcDJYaHlJM0tyNWRZckZsVHpQcmxTZHJoZUs1M3lfU2ljRnJaTmNSQ0w0X085OXI0QW80M2xfQnJqZmZ6VEh3TUltX0xzeE42SGtZPQ==
 APP_TOKEN_EXPIRY=300
 # CORS Configuration
 APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron-center.net
 # Logging configuration
 APP_LOGGING_LOG_LEVEL = DEBUG
 APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs
 APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
 APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
 APP_LOGGING_CONSOLE_ENABLED = True
 APP_LOGGING_FILE_ENABLED = True
 APP_LOGGING_ROTATION_SIZE = 10485760
 APP_LOGGING_BACKUP_COUNT = 5
 # OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP.
 Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
 Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
 Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback
 Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
 Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
 Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback
 Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
 Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
 Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback
 Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
 Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
 Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
 Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
 Service_CLICKUP_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd4ZWVBeHVtRnpIT0VBN0tSZDhLRmFmN05DOVBOelJtLWhkVnJDRVBqUkh3bDFTZFRWaWQ1cWowdGNLUk5IQzlGN1J6RFVCaW8zRnBwLVBnclJfdWgxV3pVRzFEV2lwcW5Rc19Xa1ROWXNJcUF0ajZaYUxOUXk0WHRsRmJLM25FaHV5T2IxdV92ZW1nRjhzaGpwU0l2Wm9FTkRnY2lJVjhuNHUwT29salAxYV8wPQ==
 Service_CLICKUP_OAUTH_REDIRECT_URI = http://localhost:8000/api/clickup/auth/connect/callback
 # Infomaniak OAuth -- Data App (kDrive + Mail)
 Service_INFOMANIAK_DATA_CLIENT_ID = abd71a95-7c67-465a-b7ab-963cc5eccb4b
 Service_INFOMANIAK_DATA_CLIENT_SECRET = jwaEZza0VnmAHA1vIQJcpaCC1O4ND6IS0mkQ0GGiVlmof7XHxUcl9YMl7TbtEINz
 Service_INFOMANIAK_OAUTH_REDIRECT_URI = http://localhost:8000/api/infomaniak/auth/connect/callback
 # Stripe Billing (both end with _SECRET for encryption script)
 STRIPE_SECRET_KEY_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5aHNGejgzQmpTdmprdzQxR19KZkh3MlhYUTNseFN3WnlaWjh2SDZyalN6aU9xSktkbUQwUnZrVnlvbGVRQm4yZFdiRU5aSEk5WVJuUnR4VUwtTm9OVk1WWmJQeU5QaDdib0hfVWV5U1BfYTFXRmdoOWdnOWxkb3JFQmF3bm45UjFUVUxmWGtGRkFKUGd6bmhpQlFnaVI3Q2lLdDlsY1VESk1vOEM0ZFBJNW1qcVZ0N2tPYmRLNmVKajZ2M3o3S05lWnRRVG5LdkRseW4wQ3VjNHNQZTZUdz09
 STRIPE_WEBHOOK_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5dDJMSHBrVk8wTzJhU2xzTTZCZWdvWmU2NGI2WklfRXRJZVUzaVYyOU9GLUZsalUwa2lPdEgtUHo0dVVvRDU1cy1saHJyU0Rxa2xQZjBuakExQzk3bmxBcU9WbEIxUEtpR1JoUFMxZG9ISGRZUXFhdFpSMGxvQUV3a0VLQllfUUtCOHZwTGdteV9rYTFOazBfSlN3ekNWblFpakJlZVlCTmNkWWQ4Sm01a1RCWTlnTlFHWVA0MkZYMlprUExrWFN2V0NVU1BTd1NKczFJbVo3VHpLdlc4UT09
 STRIPE_API_VERSION = 2026-01-28.clover
 STRIPE_AUTOMATIC_TAX_ENABLED = false
 STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
 # AI configuration
 Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQnBaSnM4TWFRRmxVQmNQblVIYmc1Y0Q3aW9zZUtDWlNWdGZjbFpncGp2NHN2QjkxMWxibUJnZDBId252MWk5TXN3Yk14ajFIdi1CTkx2ZWx2QzF5OFR6LUx5azQ3dnNLaXJBOHNxc0tlWmtZcTFVelF4eXBSM2JkbHd2eTM0VHNXdHNtVUprZWtPVzctNlJsZHNmM20tU1N6Q1Q2cHFYSi1tNlhZNDNabTVuaEVGWmIydEhadTcyMlBURmw2aUJxOF9GTzR0dTZiNGZfOFlHaVpPZ1A1LXhhOEFtN1J5TEVNNWtMcGpyNkMzSl8xRnZsaTF1WTZrOUZmb0cxVURjSGFLS2dIYTQyZEJtTm90bEYxVWxNNXVPdTVjaVhYbXhxT3JsVDM5VjZMVFZKSE1tZnM9
 Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpENmFBWG16STFQUVZxNzZZRzRLYTA4X3lRanF1VkF4cU45OExNMzlsQmdISGFxTUxud1dXODBKcFhMVG9KNjdWVnlTTFFROVc3NDlsdlNHLUJXeG41NDBHaXhHR0VHVWl5UW9RNkVWbmlhakRKVW5pM0R4VHk0LUw0TV9LdkljNHdBLXJua21NQkl2b3l4UkVkMGN1YjBrMmJEeWtMay1jbmxrYWJNbUV0aktCXzU1djR2d2RSQXZORTNwcG92ZUVvVGMtQzQzTTVncEZTRGRtZUFIZWQ0dz09
 Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5ZmdDZ3hrSElrMnQzNFAtel9wX191VjVzN2g1LWZoa0V1YklubEdmMEJDdEZiR1RWeVZrM3V3enBHX3p6WUtTS0kwYkFyVEF0Nm8zX05CelVQcFJUc0lwVW5iNFczc1p1WWJ2WFBmd0lpLUxxWndEeUh0b2hGUHVpN19vb19nMTBnV1A1VmNpWERVX05lQ29VS20wTjZ3PT0=
 Connector_AiTavily_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQTdnUHMwd2pIaXNtMmtCTFREd0pyQXRKb1F5eGtHSnkyOGZiUnlBOFc0b3Vzcndrc3ViRm1nMDJIOEZKYWxqdWNkZGh5N0Z4R0JlQmxXSG5pVnJUR2VYckZhMWNMZ1FNeXJ3enJLVlpiblhOZTNleUg3ZzZyUzRZanFSeDlVMkI=
 Connector_AiPrivateLlm_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGRHM5eFdUVmVZU1R1cHBwN1RlMUx4T0NlLTJLUFFVX3J2OElDWFpuZmJHVmp4Z3BNNWMwZUVVZUd2TFhRSjVmVkVlcFlVRWtybXh0ZHloZ01ZcnVvX195YjdlWVdEcjZSWFFTTlNBWUlaTlNoLWhqVFBIb0thVlBiaWhjYjFQOFY=
 Connector_AiMistral_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGeEQxYUIxOHhia0JlQWpWQ2dWQWZzY3l6SWwyUnJoR1hRQWloX2lxb2lGNkc4UnA4U2tWNjJaYzB1d1hvNG9fWUp1N3V4OW9FMGhaWVhjSlVwWEc1X2loVDBSZDEtdHdfcTA5QkcxQTR4OHc4RkRzclJrU2d1RFZpNDJkRDRURlE=
 Service_MSFT_TENANT_ID = common
 # Google Cloud Speech Services configuration
 Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0=
 # Feature SyncDelta JIRA configuration
 Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0=
 # Teamsbot Browser Bot Service
 # For local testing: run the bot locally with `npm run dev` in service-teams-browser-bot
 # The bot will connect back to localhost:8000 via WebSocket
 TEAMSBOT_BROWSER_BOT_URL = http://localhost:4100
 # Debug Configuration
 APP_DEBUG_CHAT_WORKFLOW_ENABLED = True
 APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug
 APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True
 APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync
 # Manadate Pre-Processing Servers
 PREPROCESS_ALTHAUS_CHAT_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGbEphQ3ZUMlFMQ2EwSGpoSE9NNzRJNTJtaGk1N0RGakdIYnVVeVFHZmF5OXB3QTVWLVNaZk9wNkhfQkZWRnVwRGRxem9iRzJIWXdpX1NIN2FwSExfT3c9PQ==
 # Preprocessor API Configuration
 PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
 PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
 # Azure Communication Services Email Configuration
 MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
 MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
 # Zurich WFS Parcels (dynamic map layer). Default: Stadt Zürich OGD. Override for full canton if wfs.zh.ch resolves.
 # Connector_ZhWfsParcels_WFS_URL = https://wfs.zh.ch/av
 # Connector_ZhWfsParcels_TYPENAMES = av_li_liegenschaften_a
--- a/env_dev.env
+++ b/env_dev.env
@ -51,6 +51,8 @@ Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
 Service_CLICKUP_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd4ZWVBeHVtRnpIT0VBN0tSZDhLRmFmN05DOVBOelJtLWhkVnJDRVBqUkh3bDFTZFRWaWQ1cWowdGNLUk5IQzlGN1J6RFVCaW8zRnBwLVBnclJfdWgxV3pVRzFEV2lwcW5Rc19Xa1ROWXNJcUF0ajZaYUxOUXk0WHRsRmJLM25FaHV5T2IxdV92ZW1nRjhzaGpwU0l2Wm9FTkRnY2lJVjhuNHUwT29salAxYV8wPQ==
 Service_CLICKUP_OAUTH_REDIRECT_URI = http://localhost:8000/api/clickup/auth/connect/callback
 # Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
 # Stripe Billing (both end with _SECRET for encryption script)
 STRIPE_SECRET_KEY_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5aHNGejgzQmpTdmprdzQxR19KZkh3MlhYUTNseFN3WnlaWjh2SDZyalN6aU9xSktkbUQwUnZrVnlvbGVRQm4yZFdiRU5aSEk5WVJuUnR4VUwtTm9OVk1WWmJQeU5QaDdib0hfVWV5U1BfYTFXRmdoOWdnOWxkb3JFQmF3bm45UjFUVUxmWGtGRkFKUGd6bmhpQlFnaVI3Q2lLdDlsY1VESk1vOEM0ZFBJNW1qcVZ0N2tPYmRLNmVKajZ2M3o3S05lWnRRVG5LdkRseW4wQ3VjNHNQZTZUdz09
 STRIPE_WEBHOOK_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5dDJMSHBrVk8wTzJhU2xzTTZCZWdvWmU2NGI2WklfRXRJZVUzaVYyOU9GLUZsalUwa2lPdEgtUHo0dVVvRDU1cy1saHJyU0Rxa2xQZjBuakExQzk3bmxBcU9WbEIxUEtpR1JoUFMxZG9ISGRZUXFhdFpSMGxvQUV3a0VLQllfUUtCOHZwTGdteV9rYTFOazBfSlN3ekNWblFpakJlZVlCTmNkWWQ4Sm01a1RCWTlnTlFHWVA0MkZYMlprUExrWFN2V0NVU1BTd1NKczFJbVo3VHpLdlc4UT09
--- a/env_int.20260428_213451.backup
+++ b/env_int.20260428_213451.backup
@ -0,0 +1,100 @@
 # Integration Environment Configuration
 # System Configuration
 APP_ENV_TYPE = int
 APP_ENV_LABEL = Integration Instance
 APP_API_URL = https://gateway-int.poweron-center.net
 APP_KEY_SYSVAR = CONFIG_KEY
 APP_INIT_PASS_ADMIN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjWm41MWZ4TUZGaVlrX3pWZWNwakJsY3Facm0wLVZDd1VKeTFoZEVZQnItcEdUUnVJS1NXeDBpM2xKbGRsYmxOSmRhc29PZjJSU2txQjdLbUVrTTE1NEJjUXBHbV9NOVJWZUR3QlJkQnJvTEU9
 APP_INIT_PASS_EVENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjdmtrakgxa0djekZVNGtTZV8wM2I5UUpCZllveVBMWXROYk5yS3BiV3JEelJSM09VYTRONHpnY3VtMGxDRk5JTEZSRFhtcDZ0RVRmZ1RicTFhb3c5dVZRQ1o4SmlkLVpPTW5MMTU2eTQ0Vkk9
 # PostgreSQL DB Host
 DB_HOST=gateway-int-server.postgres.database.azure.com
 DB_USER=heeshkdlby
 DB_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjczYzOUtTa21MMGJVTUQ5UmFfdWc3YlhCbWZOeXFaNEE1QzdJV3BLVjhnalBkLVVCMm5BZzdxdlFXQXc2RHYzLWtPSFZkZE1iWG9rQ1NkVWlpRnF5TURVbnl1cm9iYXlSMGYxd1BGYVc0VDA9
 DB_PORT=5432
 # Security Configuration
 APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZzTEp4aDR0MktWRjNoeVBrY1Npd1R0VE9YVHp3M2w1cXRzbUxNaU82QUJvaDNFeVQyN05KblRWblBvbWtoT0VXbkNBbDQ5OHhwSUFnaDZGRG10Vmgtdm1YUkRsYUhFMzRVZURmSFlDTFIzVWg4MXNueDZyMGc5aVpFdWRxY3dkTExGM093ZTVUZVl5LUhGWnlRPQ==
 APP_TOKEN_EXPIRY=300
 # CORS Configuration
 APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net, https://nyla-int.poweron-center.net
 # Logging configuration
 APP_LOGGING_LOG_LEVEL = DEBUG
 APP_LOGGING_LOG_DIR = /home/site/wwwroot/
 APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
 APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
 APP_LOGGING_CONSOLE_ENABLED = True
 APP_LOGGING_FILE_ENABLED = True
 APP_LOGGING_ROTATION_SIZE = 10485760
 APP_LOGGING_BACKUP_COUNT = 5
 # OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
 Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
 Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
 Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/login/callback
 Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
 Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
 Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/connect/callback
 Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
 Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
 Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/login/callback
 Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
 Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
 Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/connect/callback
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
 Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
 Service_CLICKUP_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5SE1uVURMNVE3NkM4cHBKa2R2TjBnLWdpSXI5dHpKWGExZVFiUF95TFNnZ1NwLWFLdmh6eWFZTHVHYTBzU2FGRUpLYkVyM1NvZjZkWDZHN21qUER5ZVNOaGpCc3NrUGd3VnFTclF3OW1nUlVuWXQ1UVhDLVpyb1BwRExOeFpDeVhtbEhDVnd4TVdpbzNBNk5QQWFPdjdza0xBWGxFY1E3WFpCSUlNa1l4RDlBPQ==
 Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/clickup/auth/connect/callback
 # Infomaniak OAuth -- Data App (kDrive + Mail)
 Service_INFOMANIAK_DATA_CLIENT_ID = abd71a95-7c67-465a-b7ab-963cc5eccb4b
 Service_INFOMANIAK_DATA_CLIENT_SECRET = jwaEZza0VnmAHA1vIQJcpaCC1O4ND6IS0mkQ0GGiVlmof7XHxUcl9YMl7TbtEINz
 Service_INFOMANIAK_OAUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/infomaniak/auth/connect/callback
 # Stripe Billing (both end with _SECRET for encryption script)
 STRIPE_SECRET_KEY_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5ekdBaGNGVUlOQUpncTlzLWlTV0V5OWZzQkpDczhCUGw4U1JpTHZ0d3pfYlFNWElLRlNiNlNsaDRYTGZUTkg2OUFrTW1GZXpOUjBVbmRQWjN6ekhHd2ZSQ195OHlaeWh1TmxrUm10V2R3YmdncmFLbFMzVjdqcWJMSUJPR2xuSEozclNoZG1rZVBTaWg3OFQ1Qzdxb0wyQ2RKazc2dG1aZXBUTXlvbDZqLS1KOVI5M3BGc3NQZkZRbnFpRjIwWmh2ZHlVNlpxZVo2dWNmMjQ5eW02QmtzUT09
 STRIPE_WEBHOOK_SECRET = whsec_2agCQEbDPSOn2C40EJcwoPCqlvaPLF7M
 STRIPE_API_VERSION = 2026-01-28.clover
 STRIPE_AUTOMATIC_TAX_ENABLED = false
 STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
 # AI configuration
 Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4MENkQ2xJVmE5WFZKUkh2SHJFby1YVXN3ZmVxRkptS3ZWRmlwdU93ZEJjSjlMV2NGbU5mS3NCdmFfcmFYTEJNZXFIQ3ozTWE4ZC1pemlQNk9wbjU1d3BPS0ZCTTZfOF8yWmVXMWx0TU1DamlJLVFhSTJXclZsY3hMVWlPcXVqQWtMdER4T252NHZUWEhUOTdIN1VGR3ltazEweXFqQ0lvb0hYWmxQQnpxb0JwcFNhRDNGWXdoRTVJWm9FalZpTUF5b1RqZlRaYnVKYkp0NWR5Vko1WWJ0Wmg2VWJzYXZ0Z3Q4UkpsTldDX2dsekhKMmM4YjRoa2RwemMwYVQwM2cyMFlvaU5mOTVTWGlROU8xY2ZVRXlxZzJqWkxURWlGZGI2STZNb0NpdEtWUnM9
 Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjT1ZlRWVJdVZMT3ljSFJDcFdxRFBRVkZhS204NnN5RDBlQ0tpenhTM0FFVktuWW9mWHNwRWx2dHB0eDBSZ0JFQnZKWlp6c01pVGREWHd1eGpERnU0Q2xhaks1clQ1ZXVsdnd2ZzhpNXNQS1BhY3FjSkdkVEhHalNaRGR4emhpakZncnpDQUVxOHVXQzVUWmtQc0FsYmFwTF9TSG5FOUFtWk5Ick1NcHFvY2s1T1c2WXlRUFFJZnh6TWhuaVpMYmppcDR0QUx0a0R6RXlwbGRYb1R4dzJkUT09
 Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQnB5dkd6UkhtU3lhYmZMSlo0bklQZ2s3UTFBSkprZTNwWkg5Q2lVa0wtenhxWXpva21xVDVMRjdKSmhpTmxWS05IUTRoRHdCbktSRVVjcVFnY1RfV0N2S2dyV0dTMlhxQlRFVm41RkFTWVQzQThuVkZwdlNuVC05QlVRVXB6Qjk3akNpYmY1MFR6R1ByMzlIMllRZlRRYVVRN2ZBPT0=
 Connector_AiTavily_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkdkJMTDY0akhXNzZDWHVYSEt1cDZoOWEzSktneHZEV2JndTNmWlNSMV9KbFNIZmQzeVlrNE5qUEIwcUlBSGM1a0hOZ3J6djIyOVhnZzI3M1dIUkdicl9FVXF3RGktMmlEYmhnaHJfWTdGUkktSXVUSGdQMC1vSEV6VE8zR2F1SVk=
 Connector_AiPrivateLlm_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGSjZ1NWh0aWc1R3Z4MHNaeS1HamtUbndhcUZFZDlqUDhjSmg5eHFfdlVkU0RsVkJ2UVRaMWs3aWhraG5jSlc0YkxNWHVmR2JoSW5ENFFCdkJBM0VienlKSnhzNnBKbTJOUTFKczRfWlQ3bWpmUkRTT1I1OGNUSTlQdExacGRpeXg=
 Connector_AiMistral_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGZTNtZ1E4TWIxSEU1OUlreUpxZkJIR0Vxcm9xRHRUbnBxbTQ1cXlkbnltWkJVdTdMYWZ4c3Fsam42TERWUTVhNzZFMU9xVjdyRGFCYml6bmZsZFd2YmJzemlrSWN6Q3o3X0NXX2xXNUQteTNONHdKYzJ5YVpLLWdhU2JhSTJQZnI=
 Service_MSFT_TENANT_ID = common
 # Google Cloud Speech Services configuration
 Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0=
 # Feature SyncDelta JIRA configuration
 Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0=
 # Teamsbot Browser Bot Service
 TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io
 # Debug Configuration
 APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
 APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
 APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
 APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
 # Manadate Pre-Processing Servers
 PREPROCESS_ALTHAUS_CHAT_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4UkNBelhvckxCQUVjZm94N3BZUDcxaEMyckE2dm1lRVhqODhrWU1SUjNXZ3dQZlVJOWhveXFkZXpobW5xT0NneGZ2SkNUblFmYXd0WTBYNTl3UmRnSWc9PQ==
 # Preprocessor API Configuration
 PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
 PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
 # Azure Communication Services Email Configuration
 MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
 MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
--- a/env_int.env
+++ b/env_int.env
@ -49,11 +49,13 @@ Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron-center.net/api/go
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
 Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
 Service_CLICKUP_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5SE1uVURMNVE3NkM4cHBKa2R2TjBnLWdpSXI5dHpKWGExZVFiUF95TFNnZ1NwLWFLdmh6eWFZTHVHYTBzU2FGRUpLYkVyM1NvZjZkWDZHN21qUER5ZVNOaGpCc3NrUGd3VnFTclF3OW1nUlVuWXQ1UVhDLVpyb1BwRExOeFpDeVhtbEhDVnd4TVdpbzNBNk5QQWFPdjdza0xBWGxFY1E3WFpCSUlNa1l4RDlBPQ==
-Service_CLICKUP_OAUTH_REDIRECT_URI = http://gateway-int.poweron-center.net/api/clickup/auth/connect/callback
+Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/clickup/auth/connect/callback
 # Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
 # Stripe Billing (both end with _SECRET for encryption script)
 STRIPE_SECRET_KEY_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5ekdBaGNGVUlOQUpncTlzLWlTV0V5OWZzQkpDczhCUGw4U1JpTHZ0d3pfYlFNWElLRlNiNlNsaDRYTGZUTkg2OUFrTW1GZXpOUjBVbmRQWjN6ekhHd2ZSQ195OHlaeWh1TmxrUm10V2R3YmdncmFLbFMzVjdqcWJMSUJPR2xuSEozclNoZG1rZVBTaWg3OFQ1Qzdxb0wyQ2RKazc2dG1aZXBUTXlvbDZqLS1KOVI5M3BGc3NQZkZRbnFpRjIwWmh2ZHlVNlpxZVo2dWNmMjQ5eW02QmtzUT09
-STRIPE_WEBHOOK_SECRET = whsec_2agCQEbDPSOn2C40EJcwoPCqlvaPLF7M
+STRIPE_WEBHOOK_SECRET = INT_ENC:Z0FBQUFBQnA4UXZiUUVqTl9lREVRWTh1aHFDcFpwcXRkOUx4MS1ham9Ddkl6T0xzMnJuM1hhUHdGNG5CenY1MUg4RlJBOGFQTWl5cVd5MjJ2REItcHYyRmdLX3ZlT2p5Z3BRVkMtQnRoTVkteXlfaU92MVBtOEI0Ni1kbGlfa0NiRmFRRXNHLVE2NHI=
 STRIPE_API_VERSION = 2026-01-28.clover
 STRIPE_AUTOMATIC_TAX_ENABLED = false
 STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
--- a/env_prod.20260428_213451.backup
+++ b/env_prod.20260428_213451.backup
@ -0,0 +1,101 @@
 # Production Environment Configuration
 # System Configuration
 APP_ENV_TYPE = prod
 APP_ENV_LABEL = Production Instance
 APP_KEY_SYSVAR = CONFIG_KEY
 APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3UnJRV0sySFlDblpXUlREclREaW1WbUt6bGtQYkdrNkZDOXNOLXFua1hqeFF2RHJnRXJ5VlVGV3hOZm41QjZOMlNTb0duYXNxZi05dXVTc2xDVkx0SVBFLUhncVo5T0VUZHE0UTZLWWw3ck09
 APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3QVpIY19DQVZSSzJmc2F0VEZvQlU1cHBhTEgxdHdnR3g4eW01aTEzYTUxc1gxTDR1RVVpSHRXYjV6N1BLZUdCUGlfOW1qdy0xSHFVRkNBcGZvaGlSSkZycXRuUllaWnpyVGRoeFg1dGEyNUk9
 APP_API_URL = https://gateway-prod.poweron-center.net
 # PostgreSQL DB Host
 DB_HOST=gateway-prod-server.postgres.database.azure.com
 DB_USER=gzxxmcrdhn
 DB_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3Y1JScGxjZG9TdUkwaHRzSHZhRHpNcDV3N1U2TnIwZ21PRG5TWFFfR1k0N3BiRk5WelVadjlnXzVSTDZ6NXFQNFpqbnJ1R3dNVkJocm1zVEgtSk0xaDRiR19zNDBEbVIzSk51ekNlQ0Z3b0U9
 DB_PORT=5432
 # Security Configuration
 APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUlV5SVpaWXBNX1hpa2xPZGdMSWpnN2ZINHQxeGZnNHJweU5pZjlyYlY5Qm9zOUZEbl9wUEgtZHZXd1NhR19JSG9kbFU4MnFGQnllbFhRQVphRGQyNHlFVWR5VHQyUUpqN0stUmRuY2QyTi1oalczRHpLTEJqWURjZWs4YjZvT2U5YnFqcXEwdEpxV05fX05QMmtrPQ==
 APP_TOKEN_EXPIRY=300
 # CORS Configuration
 APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
 # Logging configuration
 APP_LOGGING_LOG_LEVEL = DEBUG
 APP_LOGGING_LOG_DIR = /home/site/wwwroot/
 APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
 APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
 APP_LOGGING_CONSOLE_ENABLED = True
 APP_LOGGING_FILE_ENABLED = True
 APP_LOGGING_ROTATION_SIZE = 10485760
 APP_LOGGING_BACKUP_COUNT = 5
 # OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
 Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
 Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
 Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/login/callback
 Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
 Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
 Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/connect/callback
 Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
 Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
 Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/login/callback
 Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
 Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
 Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/connect/callback
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
 Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
 Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
 Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/clickup/auth/connect/callback
 # Infomaniak OAuth -- Data App (kDrive + Mail)
 Service_INFOMANIAK_DATA_CLIENT_ID = abd71a95-7c67-465a-b7ab-963cc5eccb4b
 Service_INFOMANIAK_DATA_CLIENT_SECRET = jwaEZza0VnmAHA1vIQJcpaCC1O4ND6IS0mkQ0GGiVlmof7XHxUcl9YMl7TbtEINz
 Service_INFOMANIAK_OAUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/infomaniak/auth/connect/callback
 # Stripe Billing (both end with _SECRET for encryption script)
 STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
 STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
 STRIPE_API_VERSION = 2026-01-28.clover
 STRIPE_AUTOMATIC_TAX_ENABLED = false
 STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah
 # AI configuration
 Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3UxLUdwY2hhdGYxRGp4VFJqYXZIcmkxM1ZyOWV2M0Z4MHdFNkVYQ0ROb1d6LUZFUEdvMHhLMEtXYVBCRzM5TlYyY3ROYWtJRk41cDZxd0tYYi00MjVqMTh4QVcyTXl0bmVocEFHbXQwREpwNi1vODdBNmwzazE5bkpNelE2WXpvblIzWlQwbGdEelI2WXFqT1RibXVHcjNWbVhwYzBOM25XTzNmTDAwUjRvYk4yNjIyZHc5c2RSZzREQUFCdUwyb0ZuOXN1dzI2c2FKdXI4NGxEbk92czZWamJXU3ZSbUlLejZjRklRRk4tLV9aVUFZekI2bTU4OHYxNTUybDg3RVo0ZTh6dXNKRW5GNXVackZvcm9laGI0X3R6V3M9
 Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09
 Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6NG5CTm9QOFZRV1BIVC0tV2RKTGtCQWFOUXlpRnhEdjN1U2x3VUdDamtIZV9CQzQ5ZmRmcUh3ZUVUa0NxbGhlenVVdWtaYjdpcnhvUlNFLXZfOWh2dWFZai0xUGU5cWpuYmpnRVRWakh0RVNUUTFyX0w5V0NXVWFrQlZuOTd5TkI0eVRoQ0ZBSm9HYUlYamoyY1FCMmlBPT0=
 Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg=
 Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo=
 Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGc2tQc2lvMk1YZk01Q1dob1U5cnR0dG03WWE3WkpoOWo0SEpvLU9Rc2lCNDExdy1wZExaN3lpT2FEQkxnaHRmWmZUUUZUUUJmblZreGlpaFpOdnFhbzlEd1RsVVJtX216cmhxTm5BcTN2eUZ2T054cDE5bmlEamJ3NGR6MVpFQnA=
 Service_MSFT_TENANT_ID = common
 # Google Cloud Speech Services configuration
 Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4NFQxaF9uN3h1cVB6dnZid1c1R1VfNDlSQ1NHMEVDZWtKanpMQ29CLXc1MXBqRm1hQ0YtWVhaejBMY1ZTOEFEVlpWQ3hrYkFza1E2RDNsYkdMMndNR0VGNTMwVDRGdURJY3hyaVFxVjEtSEYwNHJzeWM3WmlpZW9jU2E3NTgycEV2allqQ3dJRTNyRFAzaDJ6dklKeXpNRkJhYjFzUkptN2dpbkNpMklrcGxuZl9vTkt3T0JvNm1YTXd5UlkwZWptUXdWVFpnV2J4X3J2WUhIUlFkSElFVnlqMnlJRnNHTnlpMWs2R1dZc2ROWjNYZG85cndmd1E5cUZnVmZRYnVjTG43dXFmSWd2bGFfVWFWSmtpWkpndWNlSUNwcnFNU2NqZXFaV0xsY3l3SElLRkVHcHZGZERKV1ltcGhTS0dhTko1VTJLYzNoZjRkSGVEX3dTMWVVTmdDczV5cE1JQUdSbUJGUm11eFhTVjJHbkt0SzB4UG1Dc2xmbnp1Y041Y2RTeWRuWGdmQy1sTGx0MGtnM2VJQ3EyLXViRlNhTU9ybzZkR1N1bXE5SXhlZENWRFpWSGlYOWx4SUQ3UlR0ZEVxQkxNakRUVFRiUmFnbklOalphLUZkRFVVaXBRUk5NZW5PaUZydTFmQkNPSTdTVTNZd0plWXllNVFJdmN4MVcyTGlwMGFtVjBzOGRxR1FjbzhfYW5zdTB0ZEZBTTJhakltazh1dktNMUZsOUItdFdTb1pIaUxySllXNkdlY20zUS0wTnpFNTB2SU5acG1VcXhyaHBmME8takw3RDh5T043T2VGOV92TzNya2pWSlpYVjZDdXlZcjM3a0hPTlhkaW9oQmxqQlpGRFYyTTY4WmZmT3k4Tk1tdXRuSGdTUVpNT2NKenhXb05PdXBfSEdhMTNxNjdpNXlKUUI2YUgydFFPX1VvXzVJb0UxWTU2YVNiNDQ0QndZanhMMHR1cGdHWGhvcEg1QXEtSXZJdTdZUE12ZEVVWkF4QmtsQS1GYnY3SFIxSHlsOGVfcEpGS1A4QUVEQWNEOFZYYlljQ3ByTU03YU16Y0UzUnJQZEprSWNjT1ZXVEtDWi03Y3ZzRVdYUTlabXJISEo5THRHVXVuM0xqbzA4bGVlZVpOMk1QMmptb21tV0pTMlVoOXdWVU95UW1iQmttc2w1RG9mMWwxXzg1T2IxYUVmTUJEZkpUdTFDTzZ3RlBFeUFiX01iRTZNWkNaSG45TkFOM2pzbUJRZ2N0VFpoejJUTG1RODY3TzZpSzVkYUQzaEpfY2pSTkRzU0VpanlkdXVQQmJ2WU5peno4QWNLTDVxZTlhSHI3NnNiM0k0Y3JkQ0xaOU05bGtsQl8zQklvaktWSDZ4aVp2MHlYelJuUDJyTU9CZC1OZjJxNFc1dDcwSUlxaVh1LTMyWWFwU0IwUU9kOUFpMWpnOERtLTh1VmJiNGVwcXBMbU5fMjVZc0hFbmxQT2puSFd1ZGpyTkphLU5sVlBZWWxrWEZrWGJQWmVkN19tZFZfZ1l1V3pSWlA0V0ZxM2lrWnl2NU9WeTdCbDROSmhfeENKTFhMVXk1d195S2JMUFJoRXZjcVo4V2g0MTNKRnZhUE1wRkNPM3FZOGdVazJPeW5PSGpuZnFGTTdJMkRnam5rUlV6NFlqODlIelRYaEN5VjdJNnVwbllNODNCTFRHMWlXbmM1VlRxbXB3Wm9LRjVrQUpjYzRNMThUMWwwSVhBMUlyamtPZnE4R0o4bEdHay1zMjR5RDJkZ1lYRHZaNHVHU2otR3ZpN25LZlEySEU0UmdTNzJGVHNWQXMyb0dVMV9WUE13ODhZWUFaakxGOWZieGNXZkNYRnV5djEyWTZLcmdrajRBLU1rS1Z0VVRkOWlDMU9fMGVmYXFhZXJGMUhpNkdmb2hkbzZ1OWV6VlNmVzNISjVYTFh6SjJNdWR5MWZidE8yVEo2dnRrZXhMRXBPczUwTG13OGhNUVpIQm0zQmRKRnJ0Nl8wNW1Ob0dHRDVpU0NWREV3TkY2SjktdVBkMFU1ZXBmSFpHQ3FHNTRZdTJvaExpZVEtLTU4YTVyeFBpNDdEajZtWUc4c1dBeUJqQ3NIY1NLS0FIMUxGZzZxNFNkOG9ORGNHWWJCVnZuNnJVTEtoQi1mRTZyUl81ZWJJMi1KOGdERzBhNVRZeHRYUUlqY2JvMFlaNHhWMU9pWFFiZjdaLUhkaG15TTBPZVlkS2R5UVdENTI4QVFiY1RJV0ZNZnlpVWxfZmlnN1BXbGdrbjFGUkhzYl9qeHBxVVJacUE4bjZETENHVFpSamh0NVpOM2hMYTZjYzBuS3J0a3hhZGxSM1V5UHd2OTU3ZHY0Yy1xWDBkWUk0Ymp0MWVrS3YzSktKODhQZnY3QTZ1Wm1VZkZJbS1jamdreks1ZlhpQjFOUDFiOHJ2Nm9NcmdTdU5LQXV2RkZWZEFNZnVKUjVwcVY3dDdhQnpmRVJ6SmlvVXpDM0ZiYXh5bGE2X04tTE9qZ3BiTnN3TF9ZaFRxSUpjNjB1dXZBcy1TZHRHTjFjSUR3WUl4cE9VNzB5Rkk4U3Z1SVZYTl9sYXlZVk83UnFrMlVmcnBpam9lRUlCY19DdVJwOXl2TVVDV1pMRFZTZk9MY3Z1eXA0MnhGazc5YllQaWtOeTc4NjlOa2lGY05RRzY1cG9nbGpYelc4c3FicWxWRkg0YzRSamFlQ19zOU14YWJreU9pNDREZVJ3a0REMUxGTzF1XzI1bEF3VXVZRjlBeWFiLXJsOXgza3VZem1WckhWSnVNbDBNcldadU8xQ3RwOTl5NGgtVlR0QklCLWl5WkE4V1FlQTBCOVU1RE9sQlRrYUNZOGdfUmEwbEZvUTFGUEFWVmQ4V1FhOU9VNjZqemRpZm1sUDhZQTJ0YVBRbWZldkF5THV4QXpfdUtNZ0tlcGdSRFM3c0lDOTNQbnBxdmxYYWNpTmI3MW9BMlZIdTQ5RldudHpNQWQ5NDNPLVVTLXVVNzdHZXh4UXpZa3dVa2J4dTFDV1RkYjRnWXU2M3lJekRYWGNMcWU5OVh6U2xZWDh6MmpqcnpiOHlnMjA5S3RFQm1NZjNSM21adkVnTUpSYVhkTzNkNnJCTmljY0x1cl9kMkx3UHhySjZEdHREanZERzNEUTFlTkR0NWlBczAtdmFGTjdZNVpTMlkxV2czYW5RN2lqemg4eUViZDV6RjdKNXdFcUlvcVhoNkJ6eVJkR1pua1hnNzQwOEs2TXJYSlpGcW9qRDU2QjBOWFFtdXBJRkRKbmdZUF9ZSmRPVEtvUjVhLTV1NjdXQjRhS0duaEtJb2FrQnNjUTRvdFMxdkdTNk1NYlFHUFhhYTJ1eUN3WHN4UlJ4UjdrZjY0SzFGYWVFN1k0cGJnc1RjNmFUenR4NHljbVhablZSWHZmUVN3cXRHNjhsX1BSZWEzdTJUZFA0S2pTaU9YMnZIQ1ZPcGhWMFJqZkVEMWRMR1h3SnU0Z2FzZ3VGM3puNzdhVjhaQXNIWHFsbjB0TDVYSFdSNV9rdWhUUUhSZHBGYkJIVDB5SDdlMC13QTVnS0g5Qkg5RGNxSGJlelVndUhPcEQ0QkRKMTJTZUM1OXJhVm0zYjU0OVY2dk9MQVBheklIQXpVNW9Yc0ROVjEzaFZTWmVxYlBWMlNlSzladzJ6TmNuMG5FVVZkN1VZN1pfS2ZHa0lQcE80S24wSnQtVlJVV09OVWJ3M09YMkZpV2ktVF9ENHhKU2dfYUQ2aUVyamk0VHJHQmVfVHU4clpUTFoteW5aSWRPV1M0RDRMTms4NGRoYmJfVE82aUl2X3VieVJOdDhBQmRwdzdnRTVBNzZwaW93dUlZb3ZRYUtOeG9ULWxvNVp5a0haSjdkcUhRb3d6UGIxRUpCVkVYX2d6TkRqQVozUWxkNGFoc1FXYVd2YWNkME9Qclo0bjYxMFRWTy1nbnI5NTBJNzRMMDluUXRKYTFqQUN4d0d5aHVlamN3Tkk3NWJXeXR0TW9BeUg5Vnp4Q2RnZUY3b3AtMDlrNmlrSGR0eGRtbUdUd2lFRWg4MklEeWJHN2wwZEpVSXMxNDNOWjRFS0tPdWxhMmFCckhfRENIY184aEFDZXNrRDl2dHQtQW12UnRuQXJjaDJoTUpiYkNWQUtfRG9GMUZoNWM4UnBYZ29RWWs2NHcyUm5kdTF3Vk1GeFpiRUJLaVZ2UGFjbi1jV3lMV0N2ZDl4VERPN295X01NNG56ZjZkRzZoYUtmY1E5NlVXemx2SnVfb19iSXg0R2M3Mjd1a2JRPT0=
 # Feature SyncDelta JIRA configuration
 Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4d3Z4d2x6N1FhUktMU0RKbkxfY2pTQkRzXzJ6UXVEbDNCaFM3UHMtQVFGYzNmYWs4N0lMM1R2SFJuZTVFVmx6MGVEbXc5U3NOTnY1TWN0ZDNaamlHQWloalM3VldmREJNSHQ1TlVkSVFJMTVhQWVGSVRMTGw4UTBqNGlQZFVuaHp4WUlKemR5UnBXZlh0REJFLXJ4ejR3PT0=
 # Teamsbot Browser Bot Service
 TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io
 # Debug Configuration
 APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
 APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
 APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
 APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
 # Manadate Pre-Processing Servers
 PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
 # Preprocessor API Configuration
 PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
 PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
 # Azure Communication Services Email Configuration
 MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
 MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
--- a/env_prod.env
+++ b/env_prod.env
@ -51,6 +51,8 @@ Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
 Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
 Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/clickup/auth/connect/callback
 # Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
 # Stripe Billing (both end with _SECRET for encryption script)
 STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
 STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
--- a/env_prod_forgejo.20260428_213451.backup
+++ b/env_prod_forgejo.20260428_213451.backup
@ -0,0 +1,101 @@
 # Production Environment Configuration
 # System Configuration
 APP_ENV_TYPE = prod
 APP_ENV_LABEL = Production Instance Forgejo
 APP_KEY_SYSVAR = /srv/gateway/shared/secrets/master_key.txt
 APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3UnJRV0sySFlDblpXUlREclREaW1WbUt6bGtQYkdrNkZDOXNOLXFua1hqeFF2RHJnRXJ5VlVGV3hOZm41QjZOMlNTb0duYXNxZi05dXVTc2xDVkx0SVBFLUhncVo5T0VUZHE0UTZLWWw3ck09
 APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3QVpIY19DQVZSSzJmc2F0VEZvQlU1cHBhTEgxdHdnR3g4eW01aTEzYTUxc1gxTDR1RVVpSHRXYjV6N1BLZUdCUGlfOW1qdy0xSHFVRkNBcGZvaGlSSkZycXRuUllaWnpyVGRoeFg1dGEyNUk9
 APP_API_URL = https://api.poweron.swiss
 # PostgreSQL DB Host
 DB_HOST=10.20.0.21
 DB_USER=poweron_dev
 DB_PASSWORD_SECRET = mypassword
 DB_PORT=5432
 # Security Configuration
 APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUlV5SVpaWXBNX1hpa2xPZGdMSWpnN2ZINHQxeGZnNHJweU5pZjlyYlY5Qm9zOUZEbl9wUEgtZHZXd1NhR19JSG9kbFU4MnFGQnllbFhRQVphRGQyNHlFVWR5VHQyUUpqN0stUmRuY2QyTi1oalczRHpLTEJqWURjZWs4YjZvT2U5YnFqcXEwdEpxV05fX05QMmtrPQ==
 APP_TOKEN_EXPIRY=300
 # CORS Configuration
 APP_ALLOWED_ORIGINS=https://porta.poweron.swiss
 # Logging configuration
 APP_LOGGING_LOG_LEVEL = DEBUG
 APP_LOGGING_LOG_DIR = srv/gateway/shared/logs
 APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
 APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
 APP_LOGGING_CONSOLE_ENABLED = True
 APP_LOGGING_FILE_ENABLED = True
 APP_LOGGING_ROTATION_SIZE = 10485760
 APP_LOGGING_BACKUP_COUNT = 5
 # OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
 Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
 Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
 Service_MSFT_AUTH_REDIRECT_URI=https://api.poweron.swiss/api/msft/auth/login/callback
 Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
 Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
 Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback
 Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
 Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
 Service_GOOGLE_AUTH_REDIRECT_URI = 
 Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
 Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
 Service_GOOGLE_DATA_REDIRECT_URI = 
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
 Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
 Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
 Service_CLICKUP_OAUTH_REDIRECT_URI = https://api.poweron.swiss/api/clickup/auth/connect/callback
 # Infomaniak OAuth -- Data App (kDrive + Mail)
 Service_INFOMANIAK_DATA_CLIENT_ID = abd71a95-7c67-465a-b7ab-963cc5eccb4b
 Service_INFOMANIAK_DATA_CLIENT_SECRET = jwaEZza0VnmAHA1vIQJcpaCC1O4ND6IS0mkQ0GGiVlmof7XHxUcl9YMl7TbtEINz
 Service_INFOMANIAK_OAUTH_REDIRECT_URI = https://api.poweron.swiss/api/infomaniak/auth/connect/callback
 # Stripe Billing (both end with _SECRET for encryption script)
 STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
 STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
 STRIPE_API_VERSION = 2026-01-28.clover
 STRIPE_AUTOMATIC_TAX_ENABLED = false
 STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah
 # AI configuration
 Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3UxLUdwY2hhdGYxRGp4VFJqYXZIcmkxM1ZyOWV2M0Z4MHdFNkVYQ0ROb1d6LUZFUEdvMHhLMEtXYVBCRzM5TlYyY3ROYWtJRk41cDZxd0tYYi00MjVqMTh4QVcyTXl0bmVocEFHbXQwREpwNi1vODdBNmwzazE5bkpNelE2WXpvblIzWlQwbGdEelI2WXFqT1RibXVHcjNWbVhwYzBOM25XTzNmTDAwUjRvYk4yNjIyZHc5c2RSZzREQUFCdUwyb0ZuOXN1dzI2c2FKdXI4NGxEbk92czZWamJXU3ZSbUlLejZjRklRRk4tLV9aVUFZekI2bTU4OHYxNTUybDg3RVo0ZTh6dXNKRW5GNXVackZvcm9laGI0X3R6V3M9
 Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09
 Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6NG5CTm9QOFZRV1BIVC0tV2RKTGtCQWFOUXlpRnhEdjN1U2x3VUdDamtIZV9CQzQ5ZmRmcUh3ZUVUa0NxbGhlenVVdWtaYjdpcnhvUlNFLXZfOWh2dWFZai0xUGU5cWpuYmpnRVRWakh0RVNUUTFyX0w5V0NXVWFrQlZuOTd5TkI0eVRoQ0ZBSm9HYUlYamoyY1FCMmlBPT0=
 Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg=
 Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo=
 Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGc2tQc2lvMk1YZk01Q1dob1U5cnR0dG03WWE3WkpoOWo0SEpvLU9Rc2lCNDExdy1wZExaN3lpT2FEQkxnaHRmWmZUUUZUUUJmblZreGlpaFpOdnFhbzlEd1RsVVJtX216cmhxTm5BcTN2eUZ2T054cDE5bmlEamJ3NGR6MVpFQnA=
 Service_MSFT_TENANT_ID = common
 # Google Cloud Speech Services configuration
 Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4NFQxaF9uN3h1cVB6dnZid1c1R1VfNDlSQ1NHMEVDZWtKanpMQ29CLXc1MXBqRm1hQ0YtWVhaejBMY1ZTOEFEVlpWQ3hrYkFza1E2RDNsYkdMMndNR0VGNTMwVDRGdURJY3hyaVFxVjEtSEYwNHJzeWM3WmlpZW9jU2E3NTgycEV2allqQ3dJRTNyRFAzaDJ6dklKeXpNRkJhYjFzUkptN2dpbkNpMklrcGxuZl9vTkt3T0JvNm1YTXd5UlkwZWptUXdWVFpnV2J4X3J2WUhIUlFkSElFVnlqMnlJRnNHTnlpMWs2R1dZc2ROWjNYZG85cndmd1E5cUZnVmZRYnVjTG43dXFmSWd2bGFfVWFWSmtpWkpndWNlSUNwcnFNU2NqZXFaV0xsY3l3SElLRkVHcHZGZERKV1ltcGhTS0dhTko1VTJLYzNoZjRkSGVEX3dTMWVVTmdDczV5cE1JQUdSbUJGUm11eFhTVjJHbkt0SzB4UG1Dc2xmbnp1Y041Y2RTeWRuWGdmQy1sTGx0MGtnM2VJQ3EyLXViRlNhTU9ybzZkR1N1bXE5SXhlZENWRFpWSGlYOWx4SUQ3UlR0ZEVxQkxNakRUVFRiUmFnbklOalphLUZkRFVVaXBRUk5NZW5PaUZydTFmQkNPSTdTVTNZd0plWXllNVFJdmN4MVcyTGlwMGFtVjBzOGRxR1FjbzhfYW5zdTB0ZEZBTTJhakltazh1dktNMUZsOUItdFdTb1pIaUxySllXNkdlY20zUS0wTnpFNTB2SU5acG1VcXhyaHBmME8takw3RDh5T043T2VGOV92TzNya2pWSlpYVjZDdXlZcjM3a0hPTlhkaW9oQmxqQlpGRFYyTTY4WmZmT3k4Tk1tdXRuSGdTUVpNT2NKenhXb05PdXBfSEdhMTNxNjdpNXlKUUI2YUgydFFPX1VvXzVJb0UxWTU2YVNiNDQ0QndZanhMMHR1cGdHWGhvcEg1QXEtSXZJdTdZUE12ZEVVWkF4QmtsQS1GYnY3SFIxSHlsOGVfcEpGS1A4QUVEQWNEOFZYYlljQ3ByTU03YU16Y0UzUnJQZEprSWNjT1ZXVEtDWi03Y3ZzRVdYUTlabXJISEo5THRHVXVuM0xqbzA4bGVlZVpOMk1QMmptb21tV0pTMlVoOXdWVU95UW1iQmttc2w1RG9mMWwxXzg1T2IxYUVmTUJEZkpUdTFDTzZ3RlBFeUFiX01iRTZNWkNaSG45TkFOM2pzbUJRZ2N0VFpoejJUTG1RODY3TzZpSzVkYUQzaEpfY2pSTkRzU0VpanlkdXVQQmJ2WU5peno4QWNLTDVxZTlhSHI3NnNiM0k0Y3JkQ0xaOU05bGtsQl8zQklvaktWSDZ4aVp2MHlYelJuUDJyTU9CZC1OZjJxNFc1dDcwSUlxaVh1LTMyWWFwU0IwUU9kOUFpMWpnOERtLTh1VmJiNGVwcXBMbU5fMjVZc0hFbmxQT2puSFd1ZGpyTkphLU5sVlBZWWxrWEZrWGJQWmVkN19tZFZfZ1l1V3pSWlA0V0ZxM2lrWnl2NU9WeTdCbDROSmhfeENKTFhMVXk1d195S2JMUFJoRXZjcVo4V2g0MTNKRnZhUE1wRkNPM3FZOGdVazJPeW5PSGpuZnFGTTdJMkRnam5rUlV6NFlqODlIelRYaEN5VjdJNnVwbllNODNCTFRHMWlXbmM1VlRxbXB3Wm9LRjVrQUpjYzRNMThUMWwwSVhBMUlyamtPZnE4R0o4bEdHay1zMjR5RDJkZ1lYRHZaNHVHU2otR3ZpN25LZlEySEU0UmdTNzJGVHNWQXMyb0dVMV9WUE13ODhZWUFaakxGOWZieGNXZkNYRnV5djEyWTZLcmdrajRBLU1rS1Z0VVRkOWlDMU9fMGVmYXFhZXJGMUhpNkdmb2hkbzZ1OWV6VlNmVzNISjVYTFh6SjJNdWR5MWZidE8yVEo2dnRrZXhMRXBPczUwTG13OGhNUVpIQm0zQmRKRnJ0Nl8wNW1Ob0dHRDVpU0NWREV3TkY2SjktdVBkMFU1ZXBmSFpHQ3FHNTRZdTJvaExpZVEtLTU4YTVyeFBpNDdEajZtWUc4c1dBeUJqQ3NIY1NLS0FIMUxGZzZxNFNkOG9ORGNHWWJCVnZuNnJVTEtoQi1mRTZyUl81ZWJJMi1KOGdERzBhNVRZeHRYUUlqY2JvMFlaNHhWMU9pWFFiZjdaLUhkaG15TTBPZVlkS2R5UVdENTI4QVFiY1RJV0ZNZnlpVWxfZmlnN1BXbGdrbjFGUkhzYl9qeHBxVVJacUE4bjZETENHVFpSamh0NVpOM2hMYTZjYzBuS3J0a3hhZGxSM1V5UHd2OTU3ZHY0Yy1xWDBkWUk0Ymp0MWVrS3YzSktKODhQZnY3QTZ1Wm1VZkZJbS1jamdreks1ZlhpQjFOUDFiOHJ2Nm9NcmdTdU5LQXV2RkZWZEFNZnVKUjVwcVY3dDdhQnpmRVJ6SmlvVXpDM0ZiYXh5bGE2X04tTE9qZ3BiTnN3TF9ZaFRxSUpjNjB1dXZBcy1TZHRHTjFjSUR3WUl4cE9VNzB5Rkk4U3Z1SVZYTl9sYXlZVk83UnFrMlVmcnBpam9lRUlCY19DdVJwOXl2TVVDV1pMRFZTZk9MY3Z1eXA0MnhGazc5YllQaWtOeTc4NjlOa2lGY05RRzY1cG9nbGpYelc4c3FicWxWRkg0YzRSamFlQ19zOU14YWJreU9pNDREZVJ3a0REMUxGTzF1XzI1bEF3VXVZRjlBeWFiLXJsOXgza3VZem1WckhWSnVNbDBNcldadU8xQ3RwOTl5NGgtVlR0QklCLWl5WkE4V1FlQTBCOVU1RE9sQlRrYUNZOGdfUmEwbEZvUTFGUEFWVmQ4V1FhOU9VNjZqemRpZm1sUDhZQTJ0YVBRbWZldkF5THV4QXpfdUtNZ0tlcGdSRFM3c0lDOTNQbnBxdmxYYWNpTmI3MW9BMlZIdTQ5RldudHpNQWQ5NDNPLVVTLXVVNzdHZXh4UXpZa3dVa2J4dTFDV1RkYjRnWXU2M3lJekRYWGNMcWU5OVh6U2xZWDh6MmpqcnpiOHlnMjA5S3RFQm1NZjNSM21adkVnTUpSYVhkTzNkNnJCTmljY0x1cl9kMkx3UHhySjZEdHREanZERzNEUTFlTkR0NWlBczAtdmFGTjdZNVpTMlkxV2czYW5RN2lqemg4eUViZDV6RjdKNXdFcUlvcVhoNkJ6eVJkR1pua1hnNzQwOEs2TXJYSlpGcW9qRDU2QjBOWFFtdXBJRkRKbmdZUF9ZSmRPVEtvUjVhLTV1NjdXQjRhS0duaEtJb2FrQnNjUTRvdFMxdkdTNk1NYlFHUFhhYTJ1eUN3WHN4UlJ4UjdrZjY0SzFGYWVFN1k0cGJnc1RjNmFUenR4NHljbVhablZSWHZmUVN3cXRHNjhsX1BSZWEzdTJUZFA0S2pTaU9YMnZIQ1ZPcGhWMFJqZkVEMWRMR1h3SnU0Z2FzZ3VGM3puNzdhVjhaQXNIWHFsbjB0TDVYSFdSNV9rdWhUUUhSZHBGYkJIVDB5SDdlMC13QTVnS0g5Qkg5RGNxSGJlelVndUhPcEQ0QkRKMTJTZUM1OXJhVm0zYjU0OVY2dk9MQVBheklIQXpVNW9Yc0ROVjEzaFZTWmVxYlBWMlNlSzladzJ6TmNuMG5FVVZkN1VZN1pfS2ZHa0lQcE80S24wSnQtVlJVV09OVWJ3M09YMkZpV2ktVF9ENHhKU2dfYUQ2aUVyamk0VHJHQmVfVHU4clpUTFoteW5aSWRPV1M0RDRMTms4NGRoYmJfVE82aUl2X3VieVJOdDhBQmRwdzdnRTVBNzZwaW93dUlZb3ZRYUtOeG9ULWxvNVp5a0haSjdkcUhRb3d6UGIxRUpCVkVYX2d6TkRqQVozUWxkNGFoc1FXYVd2YWNkME9Qclo0bjYxMFRWTy1nbnI5NTBJNzRMMDluUXRKYTFqQUN4d0d5aHVlamN3Tkk3NWJXeXR0TW9BeUg5Vnp4Q2RnZUY3b3AtMDlrNmlrSGR0eGRtbUdUd2lFRWg4MklEeWJHN2wwZEpVSXMxNDNOWjRFS0tPdWxhMmFCckhfRENIY184aEFDZXNrRDl2dHQtQW12UnRuQXJjaDJoTUpiYkNWQUtfRG9GMUZoNWM4UnBYZ29RWWs2NHcyUm5kdTF3Vk1GeFpiRUJLaVZ2UGFjbi1jV3lMV0N2ZDl4VERPN295X01NNG56ZjZkRzZoYUtmY1E5NlVXemx2SnVfb19iSXg0R2M3Mjd1a2JRPT0=
 # Feature SyncDelta JIRA configuration
 Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4d3Z4d2x6N1FhUktMU0RKbkxfY2pTQkRzXzJ6UXVEbDNCaFM3UHMtQVFGYzNmYWs4N0lMM1R2SFJuZTVFVmx6MGVEbXc5U3NOTnY1TWN0ZDNaamlHQWloalM3VldmREJNSHQ1TlVkSVFJMTVhQWVGSVRMTGw4UTBqNGlQZFVuaHp4WUlKemR5UnBXZlh0REJFLXJ4ejR3PT0=
 # Teamsbot Browser Bot Service
 TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io
 # Debug Configuration
 APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
 APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
 APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
 APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
 # Manadate Pre-Processing Servers
 PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
 # Preprocessor API Configuration
 PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
 PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
 # Azure Communication Services Email Configuration
 MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
 MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
--- a/env_prod_forgejo.env
+++ b/env_prod_forgejo.env
@ -11,7 +11,7 @@ APP_API_URL = https://api.poweron.swiss
 # PostgreSQL DB Host
 DB_HOST=10.20.0.21
 DB_USER=poweron_dev
-DB_PASSWORD_SECRET = mypassword
+DB_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQnA4UXZiMnRoUzVlbVRLX3JTRl94cVpMaURtMndZVmFBYXdvdnIxLV81dWwxWmhmcUlCMUFZbDhRT2NsQmNqSl9ZMmRWRVN1Y2JqNlVwOXRJY1VBTm1oSjNiaFE9PQ==
 DB_PORT=5432
 # Security Configuration
@ -51,6 +51,8 @@ Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
 Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
 Service_CLICKUP_OAUTH_REDIRECT_URI = https://api.poweron.swiss/api/clickup/auth/connect/callback
 # Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
 # Stripe Billing (both end with _SECRET for encryption script)
 STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
 STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
--- a/modules/aicore/aicorePluginAnthropic.py
+++ b/modules/aicore/aicorePluginAnthropic.py
@ -13,6 +13,35 @@ from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingMode
 # Configure logger
 logger = logging.getLogger(__name__)
 def _supportsCustomTemperature(modelName: str) -> bool:
    """Check whether an Anthropic model accepts a custom ``temperature``.
    Anthropic's Extended-Thinking models (Claude 4.7 Opus and the
    upcoming 4.7 Sonnet/Haiku, plus all 5.x and beyond) reject every
    ``temperature`` value with HTTP 400
    ``{"error": "`temperature` is deprecated for this model."}`` --
    only the model's internal default is accepted. Older Claude 4.5 /
    4.6 models still accept any value in [0, 1].
    Returns:
        True if ``temperature`` may be sent; False if it must be omitted.
    """
    if not modelName:
        return True
    name = modelName.lower()
    if name.startswith("claude-opus-4-7"):
        return False
    if name.startswith("claude-sonnet-4-7"):
        return False
    if name.startswith("claude-haiku-4-7"):
        return False
    # 5.x and beyond: same Extended-Thinking family, no custom temperature.
    if name.startswith("claude-opus-5") or name.startswith("claude-sonnet-5") or name.startswith("claude-haiku-5"):
        return False
    return True
 def loadConfigData():
    """Load configuration data for Anthropic connector"""
    return {
@ -276,9 +305,12 @@ class AiAnthropic(BaseConnectorAi):
            payload: Dict[str, Any] = {
                "model": model.name,
                "messages": converted_messages,
                "temperature": temperature,
            }
-            
+            # Extended-Thinking models (claude-opus-4-7 etc.) reject any
            # `temperature` value -- only the model default is accepted.
            if _supportsCustomTemperature(model.name):
                payload["temperature"] = temperature
            # Anthropic requires max_tokens - use provided value or throw error
            if maxTokens is None:
                raise ValueError("maxTokens must be provided for Anthropic API calls")
@ -381,10 +413,11 @@ class AiAnthropic(BaseConnectorAi):
            payload: Dict[str, Any] = {
                "model": model.name,
                "messages": converted,
                "temperature": temperature,
                "max_tokens": model.maxTokens,
                "stream": True,
            }
            if _supportsCustomTemperature(model.name):
                payload["temperature"] = temperature
            if system_prompt:
                payload["system"] = system_prompt
            if modelCall.tools:
@ -608,10 +641,10 @@ class AiAnthropic(BaseConnectorAi):
            if systemPrompt:
                payload["system"] = systemPrompt
-            
+
-            # Set temperature from model
+            if _supportsCustomTemperature(model.name):
-            payload["temperature"] = temperature
+                payload["temperature"] = temperature
-            
+
            # Make API call with headers from httpClient (which includes anthropic-version)
            response = await self.httpClient.post(
                "https://api.anthropic.com/v1/messages",
--- a/modules/aicore/aicorePluginOpenai.py
+++ b/modules/aicore/aicorePluginOpenai.py
@ -11,6 +11,30 @@ from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingMode
 logger = logging.getLogger(__name__)
 def _supportsCustomTemperature(modelName: str) -> bool:
    """Check whether an OpenAI model accepts a custom `temperature` value.
    GPT-5.x and the o-series (o1/o3/o4) reasoning models reject every
    `temperature` value other than the default (1) with HTTP 400
    `unsupported_value`. For these models we must omit `temperature`
    from the payload entirely. Older chat-completions models
    (gpt-4o, gpt-4o-mini, gpt-4.1, gpt-3.5-*) still accept any value
    in [0, 2].
    Returns:
        True if `temperature` may be sent; False if it must be omitted.
    """
    if not modelName:
        return True
    name = modelName.lower()
    if name.startswith("gpt-5"):
        return False
    if name.startswith("o1") or name.startswith("o3") or name.startswith("o4"):
        return False
    return True
 def loadConfigData():
    """Load configuration data for OpenAI connector"""
    return {
@ -344,14 +368,18 @@ class AiOpenai(BaseConnectorAi):
            payload = {
                "model": model.name,
                "messages": messages,
                "temperature": temperature,
                # Universal output-length cap. `max_tokens` is deprecated and
                # rejected outright by gpt-5.x / o-series; `max_completion_tokens`
                # is accepted by every current chat-completions model (legacy
                # gpt-4o, gpt-4.1, gpt-5.x, o1/o3/o4) per OpenAI API reference.
                "max_completion_tokens": maxTokens
            }
-            
+            # gpt-5.x and o-series only accept the default temperature (1) and
            # return HTTP 400 `unsupported_value` for anything else - omit the
            # field entirely for those models.
            if _supportsCustomTemperature(model.name):
                payload["temperature"] = temperature
            if modelCall.tools:
                payload["tools"] = modelCall.tools
                payload["tool_choice"] = modelCall.toolChoice or "auto"
@ -428,13 +456,15 @@ class AiOpenai(BaseConnectorAi):
            payload: Dict[str, Any] = {
                "model": model.name,
                "messages": messages,
                "temperature": temperature,
                # See callAiBasic for the rationale: `max_completion_tokens`
                # is the universal output-length parameter; `max_tokens` is
                # deprecated and rejected by gpt-5.x / o-series.
                "max_completion_tokens": model.maxTokens,
                "stream": True,
            }
            if _supportsCustomTemperature(model.name):
                payload["temperature"] = temperature
            if modelCall.tools:
                payload["tools"] = modelCall.tools
                payload["tool_choice"] = modelCall.toolChoice or "auto"
@ -585,15 +615,15 @@ class AiOpenai(BaseConnectorAi):
            # Use the messages directly - they should already contain the image data
            # in the format: {"type": "image_url", "image_url": {"url": "data:...base64,..."}}
            # Use parameters from model
            temperature = model.temperature
            # Don't set maxTokens - let the model use its full context length
-            
+
            payload = {
                "model": model.name,
                "messages": messages,
                "temperature": temperature
            }
            if _supportsCustomTemperature(model.name):
                payload["temperature"] = temperature
            response = await self.httpClient.post(
                model.apiUrl,
--- a/modules/auth/oauthProviderConfig.py
+++ b/modules/auth/oauthProviderConfig.py
@ -9,13 +9,15 @@ googleAuthScopes = [
    "https://www.googleapis.com/auth/userinfo.profile",
 ]
-# Google — Data app (Gmail + Drive + identity for token responses)
+# Google — Data app (Gmail + Drive + Calendar + Contacts + identity for token responses)
 googleDataScopes = [
    "openid",
    "https://www.googleapis.com/auth/userinfo.email",
    "https://www.googleapis.com/auth/userinfo.profile",
    "https://www.googleapis.com/auth/gmail.readonly",
    "https://www.googleapis.com/auth/drive.readonly",
    "https://www.googleapis.com/auth/calendar.readonly",
    "https://www.googleapis.com/auth/contacts.readonly",
 ]
 # Microsoft — Auth app: Graph profile only (MSAL adds openid, profile, offline_access, …)
@ -34,6 +36,8 @@ msftDataScopes = [
    "OnlineMeetings.Read",
    "Chat.ReadWrite",
    "ChatMessage.Send",
    "Calendars.Read",
    "Contacts.Read",
 ]
@ -42,14 +46,8 @@ def msftDataScopesForRefresh() -> str:
    return " ".join(msftDataScopes)
-# Infomaniak — Data app (kDrive + Mail; user_info needed for /1/profile lookup)
+# Infomaniak intentionally has no OAuth scope set: the kDrive + Mail data APIs
-infomaniakDataScopes = [
+# are only reachable with manually issued Personal Access Tokens (see
-    "user_info",
+# wiki/d-guides/infomaniak-token-setup.md). The OAuth /authorize endpoint at
-    "kdrive",
+# login.infomaniak.com only accepts identity scopes (openid/profile/email/phone)
-    "mail",
+# and does not return tokens that work against /1/* data routes.
 ]
 def infomaniakDataScopesForRefresh() -> str:
    """Space-separated scope string identical to authorization request."""
    return " ".join(infomaniakDataScopes)
--- a/modules/auth/tokenManager.py
+++ b/modules/auth/tokenManager.py
@ -13,7 +13,7 @@ from modules.datamodels.datamodelSecurity import Token, TokenPurpose
 from modules.datamodels.datamodelUam import AuthAuthority
 from modules.shared.configuration import APP_CONFIG
 from modules.shared.timeUtils import getUtcTimestamp, createExpirationTimestamp, parseTimestamp
-from modules.auth.oauthProviderConfig import msftDataScopesForRefresh, infomaniakDataScopesForRefresh
+from modules.auth.oauthProviderConfig import msftDataScopesForRefresh
 logger = logging.getLogger(__name__)
@ -30,9 +30,6 @@ class TokenManager:
        self.google_client_id = APP_CONFIG.get("Service_GOOGLE_DATA_CLIENT_ID")
        self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_DATA_CLIENT_SECRET")
        # Infomaniak Data OAuth (kDrive + Mail)
        self.infomaniak_client_id = APP_CONFIG.get("Service_INFOMANIAK_DATA_CLIENT_ID")
        self.infomaniak_client_secret = APP_CONFIG.get("Service_INFOMANIAK_DATA_CLIENT_SECRET")
    def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
        """Refresh Microsoft OAuth token using refresh token"""
@ -166,65 +163,6 @@ class TokenManager:
            logger.error(f"Error refreshing Google token: {str(e)}")
            return None
    def refreshInfomaniakToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
        """Refresh Infomaniak OAuth token using refresh token"""
        try:
            logger.debug(f"refreshInfomaniakToken: Starting Infomaniak token refresh for user {userId}")
            if not self.infomaniak_client_id or not self.infomaniak_client_secret:
                logger.error("Infomaniak OAuth configuration not found")
                return None
            tokenUrl = "https://login.infomaniak.com/token"
            data = {
                "client_id": self.infomaniak_client_id,
                "client_secret": self.infomaniak_client_secret,
                "grant_type": "refresh_token",
                "refresh_token": refreshToken,
                "scope": infomaniakDataScopesForRefresh(),
            }
            with httpx.Client(timeout=30.0) as client:
                response = client.post(tokenUrl, data=data)
                logger.debug(f"refreshInfomaniakToken: HTTP response status: {response.status_code}")
                if response.status_code == 200:
                    tokenData = response.json()
                    if "access_token" not in tokenData:
                        logger.error("Infomaniak token refresh response missing access_token")
                        return None
                    newToken = Token(
                        userId=userId,
                        authority=AuthAuthority.INFOMANIAK,
                        connectionId=oldToken.connectionId,
                        tokenPurpose=TokenPurpose.DATA_CONNECTION,
                        tokenAccess=tokenData["access_token"],
                        tokenRefresh=tokenData.get("refresh_token", refreshToken),
                        tokenType=tokenData.get("token_type", "bearer"),
                        expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)),
                        createdAt=getUtcTimestamp(),
                    )
                    return newToken
                logger.error(
                    f"Failed to refresh Infomaniak token: {response.status_code} - {response.text}"
                )
                if response.status_code == 400:
                    try:
                        errorData = response.json()
                        if errorData.get("error") == "invalid_grant":
                            logger.warning(
                                "Infomaniak refresh token is invalid or expired - user needs to re-authenticate"
                            )
                    except Exception:
                        pass
                return None
        except Exception as e:
            logger.error(f"Error refreshing Infomaniak token: {str(e)}")
            return None
    def refreshToken(self, oldToken: Token) -> Optional[Token]:
        """Refresh an expired token using the appropriate OAuth service"""
        try:
@ -268,9 +206,6 @@ class TokenManager:
            elif oldToken.authority == AuthAuthority.GOOGLE:
                logger.debug(f"refreshToken: Refreshing Google token")
                return self.refreshGoogleToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
            elif oldToken.authority == AuthAuthority.INFOMANIAK:
                logger.debug(f"refreshToken: Refreshing Infomaniak token")
                return self.refreshInfomaniakToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
            else:
                logger.warning(f"Unknown authority for token refresh: {oldToken.authority}")
                return None
--- a/modules/auth/tokenRefreshService.py
+++ b/modules/auth/tokenRefreshService.py
@ -144,45 +144,6 @@ class TokenRefreshService:
            logger.error(f"Error refreshing Microsoft token for connection {connection.id}: {str(e)}")
            return False
    async def _refresh_infomaniak_token(self, interface, connection: UserConnection) -> bool:
        """Refresh Infomaniak OAuth token"""
        try:
            logger.debug(f"Refreshing Infomaniak token for connection {connection.id}")
            current_token = interface.getConnectionToken(connection.id)
            if not current_token:
                logger.warning(f"No Infomaniak token found for connection {connection.id}")
                return False
            from modules.auth.tokenManager import TokenManager
            token_manager = TokenManager()
            refreshedToken = token_manager.refreshToken(current_token)
            if refreshedToken:
                interface.saveConnectionToken(refreshedToken)
                interface.db.recordModify(UserConnection, connection.id, {
                    "lastChecked": getUtcTimestamp(),
                    "expiresAt": refreshedToken.expiresAt,
                })
                logger.info(f"Successfully refreshed Infomaniak token for connection {connection.id}")
                try:
                    audit_logger.logSecurityEvent(
                        userId=str(connection.userId),
                        mandateId="system",
                        action="token_refresh",
                        details=f"Infomaniak token refreshed for connection {connection.id}",
                    )
                except Exception:
                    pass
                return True
            logger.warning(f"Failed to refresh Infomaniak token for connection {connection.id}")
            return False
        except Exception as e:
            logger.error(f"Error refreshing Infomaniak token for connection {connection.id}: {str(e)}")
            return False
    async def refresh_expired_tokens(self, user_id: str) -> Dict[str, Any]:
        """
        Refresh expired OAuth tokens for a user
@ -216,7 +177,7 @@ class TokenRefreshService:
            for connection in connections:
                # Only refresh expired OAuth connections
                if (connection.tokenStatus == 'expired' and 
-                    connection.authority in [AuthAuthority.GOOGLE, AuthAuthority.MSFT, AuthAuthority.INFOMANIAK]):
+                    connection.authority in [AuthAuthority.GOOGLE, AuthAuthority.MSFT]):
                    # Check rate limiting
                    if self._is_rate_limited(connection.id):
@ -233,8 +194,6 @@ class TokenRefreshService:
                        success = await self._refresh_google_token(root_interface, connection)
                    elif connection.authority == AuthAuthority.MSFT:
                        success = await self._refresh_microsoft_token(root_interface, connection)
                    elif connection.authority == AuthAuthority.INFOMANIAK:
                        success = await self._refresh_infomaniak_token(root_interface, connection)
                    if success:
                        refreshed_count += 1
@ -289,7 +248,7 @@ class TokenRefreshService:
                # Only refresh active tokens that expire soon
                if (connection.tokenStatus == 'active' and 
                    connection.tokenExpiresAt and
-                    connection.authority in [AuthAuthority.GOOGLE, AuthAuthority.MSFT, AuthAuthority.INFOMANIAK]):
+                    connection.authority in [AuthAuthority.GOOGLE, AuthAuthority.MSFT]):
                    # Check if token expires within 5 minutes
                    time_until_expiry = connection.tokenExpiresAt - current_time
@ -310,8 +269,6 @@ class TokenRefreshService:
                            success = await self._refresh_google_token(root_interface, connection)
                        elif connection.authority == AuthAuthority.MSFT:
                            success = await self._refresh_microsoft_token(root_interface, connection)
                        elif connection.authority == AuthAuthority.INFOMANIAK:
                            success = await self._refresh_infomaniak_token(root_interface, connection)
                        if success:
                            refreshed_count += 1
--- a/modules/connectors/connectorDbPostgre.py
+++ b/modules/connectors/connectorDbPostgre.py
@ -21,6 +21,47 @@ logger = logging.getLogger(__name__)
 # No mapping needed - table name = Pydantic model name exactly
 class DatabaseQueryError(RuntimeError):
    """Raised by DB read methods when the underlying SQL query failed.
    Empty result sets do NOT raise this — they return ``[]`` / ``None`` /
    ``{"items": [], "totalItems": 0, "totalPages": 0}`` as before. This
    exception is reserved for **real** failures: psycopg2 ProgrammingError,
    DataError, OperationalError, IntegrityError, plus any unexpected
    Python error raised inside a query path.
    Read methods used to silently swallow such errors and return empty
    collections, which made every caller incapable of distinguishing
    "no rows" from "broken query / type adapter / dropped column / lost
    connection". That hid concrete bugs (e.g. dict passed where Postgres
    expected a UUID string) behind misleading downstream "no record found"
    errors.
    """
    def __init__(self, table: str, message: str, original: BaseException = None):
        super().__init__(f"{table}: {message}")
        self.table = table
        self.original = original
 def _rollbackQuietly(connection) -> None:
    """Restore the connection state after a failed query.
    Postgres puts the connection in an error state after any failed
    statement; subsequent queries on the same connection raise
    ``InFailedSqlTransaction`` until we rollback. We swallow rollback
    errors because the original query error is what the caller should
    see — a secondary rollback failure typically means the connection
    is gone and will be reopened on the next ``_ensure_connection``.
    """
    if connection is None:
        return
    try:
        connection.rollback()
    except Exception:
        pass
 class SystemTable(PowerOnModel):
    """Data model for system table entries"""
@ -762,7 +803,8 @@ class DatabaseConnector:
                return record
        except Exception as e:
            logger.error(f"Error loading record {recordId} from table {table}: {e}")
-            return None
+            _rollbackQuietly(getattr(self, "connection", None))
            raise DatabaseQueryError(table, str(e), original=e) from e
    def getRecord(self, model_class: type, recordId: str) -> Optional[Dict[str, Any]]:
        """Load one row by primary key (routes / services; wraps _loadRecord)."""
@ -848,7 +890,8 @@ class DatabaseConnector:
                return records
        except Exception as e:
            logger.error(f"Error loading table {table}: {e}")
-            return []
+            _rollbackQuietly(getattr(self, "connection", None))
            raise DatabaseQueryError(table, str(e), original=e) from e
    def _registerInitialId(self, table: str, initialId: str) -> bool:
        """Registers the initial ID for a table."""
@ -1047,7 +1090,8 @@ class DatabaseConnector:
                return records
        except Exception as e:
            logger.error(f"Error loading records from table {table}: {e}")
-            return []
+            _rollbackQuietly(getattr(self, "connection", None))
            raise DatabaseQueryError(table, str(e), original=e) from e
    def _buildPaginationClauses(
        self,
@ -1270,7 +1314,8 @@ class DatabaseConnector:
            return {"items": records, "totalItems": totalItems, "totalPages": totalPages}
        except Exception as e:
            logger.error(f"Error in getRecordsetPaginated for table {table}: {e}")
-            return {"items": [], "totalItems": 0, "totalPages": 0}
+            _rollbackQuietly(getattr(self, "connection", None))
            raise DatabaseQueryError(table, str(e), original=e) from e
    def getDistinctColumnValues(
        self,
@ -1332,7 +1377,8 @@ class DatabaseConnector:
            return result
        except Exception as e:
            logger.error(f"Error in getDistinctColumnValues for {table}.{column}: {e}")
-            return []
+            _rollbackQuietly(getattr(self, "connection", None))
            raise DatabaseQueryError(table, str(e), original=e) from e
    def recordCreate(
        self, model_class: type, record: Union[Dict[str, Any], BaseModel]
@ -1710,7 +1756,8 @@ class DatabaseConnector:
                return records
        except Exception as e:
            logger.error(f"Error in semantic search on {table}: {e}")
-            return []
+            _rollbackQuietly(getattr(self, "connection", None))
            raise DatabaseQueryError(table, str(e), original=e) from e
    def close(self, forceClose: bool = False):
        """Close the database connection.
--- a/modules/connectors/providerGoogle/connectorGoogle.py
+++ b/modules/connectors/providerGoogle/connectorGoogle.py
@ -14,6 +14,8 @@ logger = logging.getLogger(__name__)
 _DRIVE_BASE = "https://www.googleapis.com/drive/v3"
 _GMAIL_BASE = "https://gmail.googleapis.com/gmail/v1"
 _CALENDAR_BASE = "https://www.googleapis.com/calendar/v3"
 _PEOPLE_BASE = "https://people.googleapis.com/v1"
 async def _googleGet(token: str, url: str) -> Dict[str, Any]:
@ -274,12 +276,480 @@ class GmailAdapter(ServiceAdapter):
        ]
 class CalendarAdapter(ServiceAdapter):
    """Google Calendar ServiceAdapter -- browse calendars, list events, .ics download.
    Path conventions:
        ``""`` / ``"/"``                -> list calendars from ``calendarList``
        ``"/<calendarId>"``             -> list upcoming events in that calendar
        ``"/<calendarId>/<eventId>"``   -> reserved for future event detail browse
    """
    _DEFAULT_EVENT_LIMIT = 100
    _MAX_EVENT_LIMIT = 2500
    def __init__(self, accessToken: str):
        self._token = accessToken
    async def browse(
        self,
        path: str,
        filter: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> List[ExternalEntry]:
        cleanPath = (path or "").strip("/")
        if not cleanPath:
            url = f"{_CALENDAR_BASE}/users/me/calendarList?maxResults=250"
            result = await _googleGet(self._token, url)
            if "error" in result:
                logger.warning(f"Google Calendar list failed: {result['error']}")
                return []
            calendars = result.get("items", [])
            if filter:
                f = filter.lower()
                calendars = [c for c in calendars if f in (c.get("summary") or "").lower()]
            return [
                ExternalEntry(
                    name=c.get("summaryOverride") or c.get("summary", ""),
                    path=f"/{c.get('id', '')}",
                    isFolder=True,
                    metadata={
                        "id": c.get("id"),
                        "primary": c.get("primary", False),
                        "accessRole": c.get("accessRole"),
                        "backgroundColor": c.get("backgroundColor"),
                        "timeZone": c.get("timeZone"),
                    },
                )
                for c in calendars
            ]
        from urllib.parse import quote
        calendarId = cleanPath.split("/", 1)[0]
        effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
        url = (
            f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
            f"?maxResults={effectiveLimit}&orderBy=startTime&singleEvents=true"
        )
        result = await _googleGet(self._token, url)
        if "error" in result:
            logger.warning(f"Google Calendar events failed: {result['error']}")
            return []
        events = result.get("items", [])
        return [
            ExternalEntry(
                name=ev.get("summary", "(no title)"),
                path=f"/{calendarId}/{ev.get('id', '')}",
                isFolder=False,
                mimeType="text/calendar",
                metadata={
                    "id": ev.get("id"),
                    "start": (ev.get("start") or {}).get("dateTime") or (ev.get("start") or {}).get("date"),
                    "end": (ev.get("end") or {}).get("dateTime") or (ev.get("end") or {}).get("date"),
                    "location": ev.get("location"),
                    "organizer": (ev.get("organizer") or {}).get("email"),
                    "htmlLink": ev.get("htmlLink"),
                    "status": ev.get("status"),
                },
            )
            for ev in events
        ]
    async def download(self, path: str) -> DownloadResult:
        from urllib.parse import quote
        cleanPath = (path or "").strip("/")
        if "/" not in cleanPath:
            return DownloadResult()
        calendarId, eventId = cleanPath.split("/", 1)
        url = f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events/{quote(eventId, safe='')}"
        ev = await _googleGet(self._token, url)
        if "error" in ev:
            logger.warning(f"Google Calendar event fetch failed: {ev['error']}")
            return DownloadResult()
        icsBytes = _googleEventToIcs(ev)
        summary = ev.get("summary") or eventId
        safeName = _googleSafeFileName(summary) or "event"
        return DownloadResult(
            data=icsBytes,
            fileName=f"{safeName}.ics",
            mimeType="text/calendar",
        )
    async def upload(self, path: str, data: bytes, fileName: str) -> dict:
        return {"error": "Google Calendar upload not supported"}
    async def search(
        self,
        query: str,
        path: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> List[ExternalEntry]:
        from urllib.parse import quote
        calendarId = (path or "").strip("/").split("/", 1)[0] or "primary"
        effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
        url = (
            f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
            f"?q={quote(query, safe='')}&maxResults={effectiveLimit}&singleEvents=true"
        )
        result = await _googleGet(self._token, url)
        if "error" in result:
            return []
        return [
            ExternalEntry(
                name=ev.get("summary", "(no title)"),
                path=f"/{calendarId}/{ev.get('id', '')}",
                isFolder=False,
                mimeType="text/calendar",
                metadata={
                    "id": ev.get("id"),
                    "start": (ev.get("start") or {}).get("dateTime") or (ev.get("start") or {}).get("date"),
                    "end": (ev.get("end") or {}).get("dateTime") or (ev.get("end") or {}).get("date"),
                },
            )
            for ev in result.get("items", [])
        ]
 class ContactsAdapter(ServiceAdapter):
    """Google Contacts ServiceAdapter -- People API (read-only).
    Path conventions:
        ``""`` / ``"/"``                -> list contact groups (incl. virtual ``all`` for the user's connections)
        ``"/all"``                      -> list all ``people/me/connections``
        ``"/<groupResourceName>"``      -> list members of that contact group (e.g. ``contactGroups/myFriends``)
        ``"/<group>/<personId>"``       -> reserved for future detail browse;
                                           ``personId`` is the suffix after ``people/``
    """
    _DEFAULT_CONTACT_LIMIT = 200
    _MAX_CONTACT_LIMIT = 1000
    _PERSON_FIELDS = (
        "names,emailAddresses,phoneNumbers,organizations,addresses,biographies,memberships"
    )
    def __init__(self, accessToken: str):
        self._token = accessToken
    async def browse(
        self,
        path: str,
        filter: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> List[ExternalEntry]:
        cleanPath = (path or "").strip("/")
        if not cleanPath:
            entries: List[ExternalEntry] = [
                ExternalEntry(
                    name="Alle Kontakte",
                    path="/all",
                    isFolder=True,
                    metadata={"id": "all", "isVirtual": True},
                ),
            ]
            url = f"{_PEOPLE_BASE}/contactGroups?pageSize=200"
            result = await _googleGet(self._token, url)
            if "error" not in result:
                for grp in result.get("contactGroups", []):
                    name = grp.get("formattedName") or grp.get("name") or ""
                    if not name:
                        continue
                    entries.append(
                        ExternalEntry(
                            name=name,
                            path=f"/{grp.get('resourceName', '')}",
                            isFolder=True,
                            metadata={
                                "id": grp.get("resourceName"),
                                "memberCount": grp.get("memberCount", 0),
                                "groupType": grp.get("groupType"),
                            },
                        )
                    )
            else:
                logger.warning(f"Google contactGroups list failed: {result['error']}")
            return entries
        from urllib.parse import quote
        effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
        groupRef = cleanPath.split("/", 1)[0]
        if groupRef == "all":
            url = (
                f"{_PEOPLE_BASE}/people/me/connections"
                f"?pageSize={min(effectiveLimit, 1000)}&personFields={self._PERSON_FIELDS}"
            )
            result = await _googleGet(self._token, url)
            if "error" in result:
                logger.warning(f"Google People connections failed: {result['error']}")
                return []
            people = result.get("connections", [])
        else:
            groupResource = groupRef
            grpUrl = (
                f"{_PEOPLE_BASE}/{quote(groupResource, safe='/')}"
                f"?maxMembers={min(effectiveLimit, 1000)}"
            )
            grpResult = await _googleGet(self._token, grpUrl)
            if "error" in grpResult:
                logger.warning(f"Google contactGroup detail failed: {grpResult['error']}")
                return []
            memberResourceNames = grpResult.get("memberResourceNames") or []
            if not memberResourceNames:
                return []
            chunkSize = 200
            people: List[Dict[str, Any]] = []
            for i in range(0, min(len(memberResourceNames), effectiveLimit), chunkSize):
                chunk = memberResourceNames[i : i + chunkSize]
                params = "&".join(f"resourceNames={quote(rn, safe='/')}" for rn in chunk)
                batchUrl = f"{_PEOPLE_BASE}/people:batchGet?{params}&personFields={self._PERSON_FIELDS}"
                batchResult = await _googleGet(self._token, batchUrl)
                if "error" in batchResult:
                    logger.warning(f"Google People batchGet failed: {batchResult['error']}")
                    continue
                for resp in batchResult.get("responses", []):
                    person = resp.get("person")
                    if person:
                        people.append(person)
                if len(people) >= effectiveLimit:
                    break
        return [
            ExternalEntry(
                name=_googlePersonLabel(p) or "(no name)",
                path=f"/{groupRef}/{(p.get('resourceName', '') or '').split('/')[-1]}",
                isFolder=False,
                mimeType="text/vcard",
                metadata={
                    "id": p.get("resourceName"),
                    "emails": [e.get("value") for e in (p.get("emailAddresses") or []) if e.get("value")],
                    "phones": [pn.get("value") for pn in (p.get("phoneNumbers") or []) if pn.get("value")],
                    "organization": (p.get("organizations") or [{}])[0].get("name") if p.get("organizations") else None,
                },
            )
            for p in people[:effectiveLimit]
        ]
    async def download(self, path: str) -> DownloadResult:
        from urllib.parse import quote
        cleanPath = (path or "").strip("/")
        if "/" not in cleanPath:
            return DownloadResult()
        personSuffix = cleanPath.split("/")[-1]
        if not personSuffix:
            return DownloadResult()
        url = f"{_PEOPLE_BASE}/people/{quote(personSuffix, safe='')}?personFields={self._PERSON_FIELDS}"
        person = await _googleGet(self._token, url)
        if "error" in person:
            logger.warning(f"Google People fetch failed: {person['error']}")
            return DownloadResult()
        vcfBytes = _googlePersonToVcard(person)
        label = _googlePersonLabel(person) or personSuffix
        safeName = _googleSafeFileName(label) or "contact"
        return DownloadResult(
            data=vcfBytes,
            fileName=f"{safeName}.vcf",
            mimeType="text/vcard",
        )
    async def upload(self, path: str, data: bytes, fileName: str) -> dict:
        return {"error": "Google Contacts upload not supported"}
    async def search(
        self,
        query: str,
        path: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> List[ExternalEntry]:
        from urllib.parse import quote
        effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
        url = (
            f"{_PEOPLE_BASE}/people:searchContacts"
            f"?query={quote(query, safe='')}&pageSize={min(effectiveLimit, 30)}"
            f"&readMask={self._PERSON_FIELDS}"
        )
        result = await _googleGet(self._token, url)
        if "error" in result:
            return []
        entries: List[ExternalEntry] = []
        for r in result.get("results", []):
            p = r.get("person") or {}
            entries.append(
                ExternalEntry(
                    name=_googlePersonLabel(p) or "(no name)",
                    path=f"/search/{(p.get('resourceName', '') or '').split('/')[-1]}",
                    isFolder=False,
                    mimeType="text/vcard",
                    metadata={
                        "id": p.get("resourceName"),
                        "emails": [e.get("value") for e in (p.get("emailAddresses") or []) if e.get("value")],
                    },
                )
            )
        return entries
 def _googleSafeFileName(name: str) -> str:
    import re
    return re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", name or "")[:80].strip(". ")
 def _googleIcsEscape(value: str) -> str:
    if value is None:
        return ""
    return (
        value.replace("\\", "\\\\")
        .replace(";", "\\;")
        .replace(",", "\\,")
        .replace("\r\n", "\\n")
        .replace("\n", "\\n")
    )
 def _googleIcsDateTime(value: Optional[str]) -> Optional[str]:
    """Convert a Google Calendar dateTime/date string to RFC 5545 format (UTC)."""
    if not value:
        return None
    from datetime import datetime, timezone
    try:
        if "T" not in value:
            dt = datetime.strptime(value, "%Y-%m-%d")
            return dt.strftime("%Y%m%d")
        normalized = value.replace("Z", "+00:00") if value.endswith("Z") else value
        dt = datetime.fromisoformat(normalized)
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)
        return dt.astimezone(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
    except (TypeError, ValueError):
        return None
 def _googleEventToIcs(event: Dict[str, Any]) -> bytes:
    """Build a minimal RFC 5545 VCALENDAR/VEVENT for a Google Calendar event."""
    from datetime import datetime, timezone
    uid = event.get("iCalUID") or event.get("id") or "unknown@poweron"
    summary = _googleIcsEscape(event.get("summary") or "")
    location = _googleIcsEscape(event.get("location") or "")
    description = _googleIcsEscape(event.get("description") or "")
    rawStart = (event.get("start") or {}).get("dateTime") or (event.get("start") or {}).get("date")
    rawEnd = (event.get("end") or {}).get("dateTime") or (event.get("end") or {}).get("date")
    isAllDay = bool((event.get("start") or {}).get("date") and not (event.get("start") or {}).get("dateTime"))
    dtstart = _googleIcsDateTime(rawStart)
    dtend = _googleIcsDateTime(rawEnd)
    dtstamp = _googleIcsDateTime(event.get("updated")) or datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
    lines = [
        "BEGIN:VCALENDAR",
        "VERSION:2.0",
        "PRODID:-//PowerOn//Google-Calendar-Adapter//EN",
        "CALSCALE:GREGORIAN",
        "BEGIN:VEVENT",
        f"UID:{uid}",
        f"DTSTAMP:{dtstamp}",
    ]
    if dtstart:
        lines.append(f"DTSTART;VALUE=DATE:{dtstart}" if isAllDay else f"DTSTART:{dtstart}")
    if dtend:
        lines.append(f"DTEND;VALUE=DATE:{dtend}" if isAllDay else f"DTEND:{dtend}")
    if summary:
        lines.append(f"SUMMARY:{summary}")
    if location:
        lines.append(f"LOCATION:{location}")
    if description:
        lines.append(f"DESCRIPTION:{description}")
    organizer = (event.get("organizer") or {}).get("email")
    if organizer:
        lines.append(f"ORGANIZER:mailto:{organizer}")
    for att in (event.get("attendees") or []):
        addr = att.get("email")
        if addr:
            lines.append(f"ATTENDEE:mailto:{addr}")
    lines.append("END:VEVENT")
    lines.append("END:VCALENDAR")
    return ("\r\n".join(lines) + "\r\n").encode("utf-8")
 def _googlePersonLabel(person: Dict[str, Any]) -> str:
    names = person.get("names") or []
    if names:
        primary = names[0]
        display = primary.get("displayName") or ""
        if display:
            return display
        given = primary.get("givenName") or ""
        family = primary.get("familyName") or ""
        full = f"{given} {family}".strip()
        if full:
            return full
    orgs = person.get("organizations") or []
    if orgs and orgs[0].get("name"):
        return orgs[0]["name"]
    emails = person.get("emailAddresses") or []
    if emails and emails[0].get("value"):
        return emails[0]["value"]
    return ""
 def _googlePersonToVcard(person: Dict[str, Any]) -> bytes:
    """Build a vCard 3.0 from a Google People API person payload."""
    names = person.get("names") or []
    primaryName = names[0] if names else {}
    given = primaryName.get("givenName") or ""
    family = primaryName.get("familyName") or ""
    middle = primaryName.get("middleName") or ""
    fn = primaryName.get("displayName") or _googlePersonLabel(person) or ""
    lines = [
        "BEGIN:VCARD",
        "VERSION:3.0",
        f"N:{family};{given};{middle};;",
        f"FN:{fn}",
    ]
    orgs = person.get("organizations") or []
    if orgs:
        org = orgs[0]
        orgVal = org.get("name") or ""
        if org.get("department"):
            orgVal = f"{orgVal};{org['department']}"
        if orgVal:
            lines.append(f"ORG:{orgVal}")
        if org.get("title"):
            lines.append(f"TITLE:{org['title']}")
    for em in (person.get("emailAddresses") or []):
        addr = em.get("value")
        if not addr:
            continue
        emailType = (em.get("type") or "INTERNET").upper()
        lines.append(f"EMAIL;TYPE={emailType}:{addr}")
    for ph in (person.get("phoneNumbers") or []):
        val = ph.get("value")
        if not val:
            continue
        phType = (ph.get("type") or "VOICE").upper()
        lines.append(f"TEL;TYPE={phType}:{val}")
    for addr in (person.get("addresses") or []):
        street = addr.get("streetAddress") or ""
        city = addr.get("city") or ""
        region = addr.get("region") or ""
        postal = addr.get("postalCode") or ""
        country = addr.get("country") or ""
        if any([street, city, region, postal, country]):
            adrType = (addr.get("type") or "OTHER").upper()
            lines.append(f"ADR;TYPE={adrType}:;;{street};{city};{region};{postal};{country}")
    bios = person.get("biographies") or []
    if bios and bios[0].get("value"):
        lines.append(f"NOTE:{_googleIcsEscape(bios[0]['value'])}")
    lines.append(f"UID:{person.get('resourceName', '')}")
    lines.append("END:VCARD")
    return ("\r\n".join(lines) + "\r\n").encode("utf-8")
 class GoogleConnector(ProviderConnector):
-    """Google ProviderConnector -- 1 connection -> Drive + Gmail."""
+    """Google ProviderConnector -- 1 connection -> Drive + Gmail + Calendar + Contacts."""
    _SERVICE_MAP = {
        "drive": DriveAdapter,
        "gmail": GmailAdapter,
        "calendar": CalendarAdapter,
        "contact": ContactsAdapter,
    }
    def getAvailableServices(self) -> List[str]:
--- a/modules/connectors/providerInfomaniak/connectorInfomaniak.py
+++ b/modules/connectors/providerInfomaniak/connectorInfomaniak.py
--- a/modules/connectors/providerMsft/connectorMsft.py
+++ b/modules/connectors/providerMsft/connectorMsft.py
@ -126,6 +126,11 @@ def _stripGraphBase(url: str) -> str:
 def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> ExternalEntry:
    isFolder = "folder" in item
    # Graph exposes the driveItem content hash as ``eTag`` (quoted) or
    # ``cTag``; we normalise to a "revision" string so callers can use it as a
    # stable ``contentVersion`` for idempotent ingestion without re-downloading
    # file bytes.
    revision = item.get("eTag") or item.get("cTag")
    return ExternalEntry(
        name=item.get("name", ""),
        path=f"{basePath}/{item.get('name', '')}" if basePath else item.get("name", ""),
@ -137,6 +142,9 @@ def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> Exter
            "id": item.get("id"),
            "webUrl": item.get("webUrl"),
            "childCount": item.get("folder", {}).get("childCount") if isFolder else None,
            "revision": revision,
            "lastModifiedDateTime": item.get("lastModifiedDateTime"),
            "parentReference": item.get("parentReference", {}),
        },
    )
@ -167,21 +175,36 @@ class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
            return await self._discoverSites()
        if not folderPath or folderPath == "/":
-            endpoint = f"sites/{siteId}/drive/root/children"
+            endpoint: Optional[str] = f"sites/{siteId}/drive/root/children?$top=200"
        else:
            cleanPath = folderPath.lstrip("/")
-            endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children"
+            endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children?$top=200"
-        result = await self._graphGet(endpoint)
+        # Follow @odata.nextLink until a hard cap is reached so large libraries
-        if "error" in result:
+        # are fully enumerated (required for bootstrap). Per-page size uses
-            logger.warning(f"SharePoint browse failed: {result['error']}")
+        # Graph's max supported value to minimise round-trips.
-            return []
+        effectiveLimit = int(limit) if limit is not None else None
        items: List[Dict[str, Any]] = []
        hardCap = 5000
        while endpoint and len(items) < hardCap:
            result = await self._graphGet(endpoint)
            if "error" in result:
                logger.warning(f"SharePoint browse failed: {result['error']}")
                break
            for raw in result.get("value", []) or []:
                items.append(raw)
                if effectiveLimit is not None and len(items) >= effectiveLimit:
                    break
            if effectiveLimit is not None and len(items) >= effectiveLimit:
                break
            nextLink = result.get("@odata.nextLink")
            endpoint = _stripGraphBase(nextLink) if nextLink else None
-        entries = [_graphItemToExternalEntry(item, path) for item in result.get("value", [])]
+        entries = [_graphItemToExternalEntry(item, path) for item in items]
        if filter:
            entries = [e for e in entries if _matchFilter(e, filter)]
-        if limit is not None:
+        if effectiveLimit is not None:
-            entries = entries[: max(1, int(limit))]
+            entries = entries[: max(1, effectiveLimit)]
        return entries
    async def _discoverSites(self) -> List[ExternalEntry]:
@ -841,6 +864,285 @@ class OneDriveAdapter(_GraphApiMixin, ServiceAdapter):
        return entries
 # ---------------------------------------------------------------------------
 # Calendar Adapter
 # ---------------------------------------------------------------------------
 class CalendarAdapter(_GraphApiMixin, ServiceAdapter):
    """ServiceAdapter for Outlook Calendar via Microsoft Graph.
    Path conventions:
        ``""``  / ``"/"``                -> list user calendars
        ``"/<calendarId>"``              -> list events in that calendar
        ``"/<calendarId>/<eventId>"``    -> reserved for future event detail browse
    Downloads return a synthesised ``.ics`` (VCALENDAR/VEVENT) since Microsoft
    Graph does not expose a ``/$value`` endpoint for events.
    """
    _DEFAULT_EVENT_LIMIT = 100
    _MAX_EVENT_LIMIT = 1000
    _PAGE_SIZE = 100
    async def browse(
        self,
        path: str,
        filter: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> List[ExternalEntry]:
        cleanPath = (path or "").strip("/")
        if not cleanPath:
            result = await self._graphGet("me/calendars?$top=100")
            if "error" in result:
                logger.warning(f"MSFT Calendar list failed: {result['error']}")
                return []
            calendars = result.get("value", [])
            if filter:
                calendars = [c for c in calendars if filter.lower() in (c.get("name") or "").lower()]
            return [
                ExternalEntry(
                    name=c.get("name", ""),
                    path=f"/{c.get('id', '')}",
                    isFolder=True,
                    metadata={
                        "id": c.get("id"),
                        "color": c.get("color"),
                        "owner": (c.get("owner") or {}).get("address"),
                        "isDefaultCalendar": c.get("isDefaultCalendar", False),
                        "canEdit": c.get("canEdit", False),
                    },
                )
                for c in calendars
            ]
        calendarId = cleanPath.split("/", 1)[0]
        effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
        pageSize = min(self._PAGE_SIZE, effectiveLimit)
        endpoint: Optional[str] = (
            f"me/calendars/{calendarId}/events"
            f"?$top={pageSize}&$orderby=start/dateTime desc"
        )
        events: List[Dict[str, Any]] = []
        while endpoint and len(events) < effectiveLimit:
            result = await self._graphGet(endpoint)
            if "error" in result:
                logger.warning(f"MSFT Calendar events failed: {result['error']}")
                break
            for ev in result.get("value", []):
                events.append(ev)
                if len(events) >= effectiveLimit:
                    break
            nextLink = result.get("@odata.nextLink")
            endpoint = _stripGraphBase(nextLink) if nextLink else None
        return [
            ExternalEntry(
                name=ev.get("subject", "(no subject)"),
                path=f"/{calendarId}/{ev.get('id', '')}",
                isFolder=False,
                mimeType="text/calendar",
                metadata={
                    "id": ev.get("id"),
                    "start": (ev.get("start") or {}).get("dateTime"),
                    "end": (ev.get("end") or {}).get("dateTime"),
                    "location": (ev.get("location") or {}).get("displayName"),
                    "organizer": (ev.get("organizer") or {}).get("emailAddress", {}).get("address"),
                    "isAllDay": ev.get("isAllDay", False),
                    "webLink": ev.get("webLink"),
                },
            )
            for ev in events
        ]
    async def download(self, path: str) -> DownloadResult:
        cleanPath = (path or "").strip("/")
        if "/" not in cleanPath:
            return DownloadResult()
        eventId = cleanPath.split("/")[-1]
        ev = await self._graphGet(f"me/events/{eventId}")
        if "error" in ev:
            logger.warning(f"MSFT Calendar event fetch failed: {ev['error']}")
            return DownloadResult()
        icsBytes = _eventToIcs(ev)
        subject = ev.get("subject") or eventId
        safeName = _safeFileName(subject) or "event"
        return DownloadResult(
            data=icsBytes,
            fileName=f"{safeName}.ics",
            mimeType="text/calendar",
        )
    async def upload(self, path: str, data: bytes, fileName: str) -> dict:
        return {"error": "Calendar upload not supported"}
    async def search(
        self,
        query: str,
        path: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> List[ExternalEntry]:
        safeQuery = query.replace("'", "''")
        effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
        endpoint = f"me/events?$search=\"{safeQuery}\"&$top={effectiveLimit}"
        result = await self._graphGet(endpoint)
        if "error" in result:
            return []
        return [
            ExternalEntry(
                name=ev.get("subject", "(no subject)"),
                path=f"/search/{ev.get('id', '')}",
                isFolder=False,
                mimeType="text/calendar",
                metadata={
                    "id": ev.get("id"),
                    "start": (ev.get("start") or {}).get("dateTime"),
                    "end": (ev.get("end") or {}).get("dateTime"),
                },
            )
            for ev in result.get("value", [])
        ]
 # ---------------------------------------------------------------------------
 # Contacts Adapter
 # ---------------------------------------------------------------------------
 class ContactsAdapter(_GraphApiMixin, ServiceAdapter):
    """ServiceAdapter for Outlook Contacts via Microsoft Graph.
    Path conventions:
        ``""``                          -> list contact folders (default + custom)
        ``"/<folderId>"``               -> list contacts in that folder; the
                                           virtual id ``default`` maps to
                                           ``/me/contacts`` (the user's primary
                                           contact list)
        ``"/<folderId>/<contactId>"``   -> reserved for future detail browse
    Downloads return a synthesised vCard 3.0 (.vcf) since Microsoft Graph
    does not expose a ``/$value`` endpoint for contacts.
    """
    _DEFAULT_CONTACT_LIMIT = 200
    _MAX_CONTACT_LIMIT = 1000
    _PAGE_SIZE = 100
    _DEFAULT_FOLDER_ID = "default"
    async def browse(
        self,
        path: str,
        filter: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> List[ExternalEntry]:
        cleanPath = (path or "").strip("/")
        if not cleanPath:
            folders: List[ExternalEntry] = [
                ExternalEntry(
                    name="Kontakte",
                    path=f"/{self._DEFAULT_FOLDER_ID}",
                    isFolder=True,
                    metadata={"id": self._DEFAULT_FOLDER_ID, "isDefault": True},
                ),
            ]
            result = await self._graphGet("me/contactFolders?$top=100")
            if "error" not in result:
                for f in result.get("value", []):
                    folders.append(
                        ExternalEntry(
                            name=f.get("displayName", ""),
                            path=f"/{f.get('id', '')}",
                            isFolder=True,
                            metadata={"id": f.get("id"), "parentFolderId": f.get("parentFolderId")},
                        )
                    )
            else:
                logger.warning(f"MSFT contactFolders list failed: {result['error']}")
            return folders
        folderId = cleanPath.split("/", 1)[0]
        effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
        pageSize = min(self._PAGE_SIZE, effectiveLimit)
        if folderId == self._DEFAULT_FOLDER_ID:
            endpoint: Optional[str] = f"me/contacts?$top={pageSize}&$orderby=displayName"
        else:
            endpoint = f"me/contactFolders/{folderId}/contacts?$top={pageSize}&$orderby=displayName"
        contacts: List[Dict[str, Any]] = []
        while endpoint and len(contacts) < effectiveLimit:
            result = await self._graphGet(endpoint)
            if "error" in result:
                logger.warning(f"MSFT contacts list failed: {result['error']}")
                break
            for c in result.get("value", []):
                contacts.append(c)
                if len(contacts) >= effectiveLimit:
                    break
            nextLink = result.get("@odata.nextLink")
            endpoint = _stripGraphBase(nextLink) if nextLink else None
        return [
            ExternalEntry(
                name=c.get("displayName") or _personLabel(c) or "(no name)",
                path=f"/{folderId}/{c.get('id', '')}",
                isFolder=False,
                mimeType="text/vcard",
                metadata={
                    "id": c.get("id"),
                    "givenName": c.get("givenName"),
                    "surname": c.get("surname"),
                    "companyName": c.get("companyName"),
                    "emailAddresses": [e.get("address") for e in (c.get("emailAddresses") or []) if e.get("address")],
                    "businessPhones": c.get("businessPhones") or [],
                    "mobilePhone": c.get("mobilePhone"),
                },
            )
            for c in contacts
        ]
    async def download(self, path: str) -> DownloadResult:
        cleanPath = (path or "").strip("/")
        if "/" not in cleanPath:
            return DownloadResult()
        contactId = cleanPath.split("/")[-1]
        c = await self._graphGet(f"me/contacts/{contactId}")
        if "error" in c:
            logger.warning(f"MSFT contact fetch failed: {c['error']}")
            return DownloadResult()
        vcfBytes = _contactToVcard(c)
        label = c.get("displayName") or _personLabel(c) or contactId
        safeName = _safeFileName(label) or "contact"
        return DownloadResult(
            data=vcfBytes,
            fileName=f"{safeName}.vcf",
            mimeType="text/vcard",
        )
    async def upload(self, path: str, data: bytes, fileName: str) -> dict:
        return {"error": "Contacts upload not supported"}
    async def search(
        self,
        query: str,
        path: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> List[ExternalEntry]:
        safeQuery = query.replace("'", "''")
        effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
        endpoint = f"me/contacts?$search=\"{safeQuery}\"&$top={effectiveLimit}"
        result = await self._graphGet(endpoint)
        if "error" in result:
            return []
        return [
            ExternalEntry(
                name=c.get("displayName") or _personLabel(c) or "(no name)",
                path=f"/search/{c.get('id', '')}",
                isFolder=False,
                mimeType="text/vcard",
                metadata={"id": c.get("id")},
            )
            for c in result.get("value", [])
        ]
 # ---------------------------------------------------------------------------
 # MsftConnector (1:n)
 # ---------------------------------------------------------------------------
@ -853,6 +1155,8 @@ class MsftConnector(ProviderConnector):
        "outlook": OutlookAdapter,
        "teams": TeamsAdapter,
        "onedrive": OneDriveAdapter,
        "calendar": CalendarAdapter,
        "contact": ContactsAdapter,
    }
    def getAvailableServices(self) -> List[str]:
@ -891,3 +1195,143 @@ def _matchFilter(entry: ExternalEntry, pattern: str) -> bool:
    """Simple glob-like filter (supports * wildcard)."""
    import fnmatch
    return fnmatch.fnmatch(entry.name.lower(), pattern.lower())
 def _safeFileName(name: str) -> str:
    """Strip path-unsafe characters and trim length so the result is a usable file name."""
    import re
    return re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", name or "")[:80].strip(". ")
 def _personLabel(contact: Dict[str, Any]) -> str:
    given = (contact.get("givenName") or "").strip()
    surname = (contact.get("surname") or "").strip()
    if given or surname:
        return f"{given} {surname}".strip()
    company = (contact.get("companyName") or "").strip()
    return company
 def _icsEscape(value: str) -> str:
    """Escape RFC 5545 reserved characters in TEXT properties."""
    if value is None:
        return ""
    return (
        value.replace("\\", "\\\\")
        .replace(";", "\\;")
        .replace(",", "\\,")
        .replace("\r\n", "\\n")
        .replace("\n", "\\n")
    )
 def _icsDateTime(value: Optional[str]) -> Optional[str]:
    """Convert an ISO datetime string to an RFC 5545 DATE-TIME value (UTC)."""
    if not value:
        return None
    from datetime import datetime, timezone
    try:
        normalized = value.replace("Z", "+00:00") if value.endswith("Z") else value
        dt = datetime.fromisoformat(normalized)
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)
        return dt.astimezone(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
    except (TypeError, ValueError):
        return None
 def _eventToIcs(event: Dict[str, Any]) -> bytes:
    """Build a minimal RFC 5545 VCALENDAR/VEVENT for a Graph event payload."""
    from datetime import datetime, timezone
    uid = event.get("iCalUId") or event.get("id") or "unknown@poweron"
    summary = _icsEscape(event.get("subject") or "")
    location = _icsEscape((event.get("location") or {}).get("displayName") or "")
    body = (event.get("body") or {}).get("content") or ""
    description = _icsEscape(body)
    dtstart = _icsDateTime((event.get("start") or {}).get("dateTime"))
    dtend = _icsDateTime((event.get("end") or {}).get("dateTime"))
    dtstamp = _icsDateTime(event.get("lastModifiedDateTime")) or datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
    lines = [
        "BEGIN:VCALENDAR",
        "VERSION:2.0",
        "PRODID:-//PowerOn//MSFT-Calendar-Adapter//EN",
        "CALSCALE:GREGORIAN",
        "BEGIN:VEVENT",
        f"UID:{uid}",
        f"DTSTAMP:{dtstamp}",
    ]
    if dtstart:
        lines.append(f"DTSTART:{dtstart}")
    if dtend:
        lines.append(f"DTEND:{dtend}")
    if summary:
        lines.append(f"SUMMARY:{summary}")
    if location:
        lines.append(f"LOCATION:{location}")
    if description:
        lines.append(f"DESCRIPTION:{description}")
    organizer = (event.get("organizer") or {}).get("emailAddress", {}).get("address")
    if organizer:
        lines.append(f"ORGANIZER:mailto:{organizer}")
    for att in (event.get("attendees") or []):
        addr = (att.get("emailAddress") or {}).get("address")
        if addr:
            lines.append(f"ATTENDEE:mailto:{addr}")
    lines.append("END:VEVENT")
    lines.append("END:VCALENDAR")
    return ("\r\n".join(lines) + "\r\n").encode("utf-8")
 def _contactToVcard(contact: Dict[str, Any]) -> bytes:
    """Build a vCard 3.0 from a Graph /me/contacts payload."""
    given = contact.get("givenName") or ""
    surname = contact.get("surname") or ""
    middle = contact.get("middleName") or ""
    fn = contact.get("displayName") or _personLabel(contact) or contact.get("companyName") or ""
    lines = [
        "BEGIN:VCARD",
        "VERSION:3.0",
        f"N:{surname};{given};{middle};;",
        f"FN:{fn}",
    ]
    if contact.get("companyName"):
        org = contact["companyName"]
        if contact.get("department"):
            org = f"{org};{contact['department']}"
        lines.append(f"ORG:{org}")
    if contact.get("jobTitle"):
        lines.append(f"TITLE:{contact['jobTitle']}")
    for em in (contact.get("emailAddresses") or []):
        addr = em.get("address")
        if addr:
            lines.append(f"EMAIL;TYPE=INTERNET:{addr}")
    for phone in (contact.get("businessPhones") or []):
        if phone:
            lines.append(f"TEL;TYPE=WORK,VOICE:{phone}")
    if contact.get("mobilePhone"):
        lines.append(f"TEL;TYPE=CELL,VOICE:{contact['mobilePhone']}")
    for phone in (contact.get("homePhones") or []):
        if phone:
            lines.append(f"TEL;TYPE=HOME,VOICE:{phone}")
    def _appendAddress(addr: Dict[str, Any], typ: str) -> None:
        if not addr:
            return
        street = addr.get("street") or ""
        city = addr.get("city") or ""
        state = addr.get("state") or ""
        postal = addr.get("postalCode") or ""
        country = addr.get("countryOrRegion") or ""
        if any([street, city, state, postal, country]):
            lines.append(f"ADR;TYPE={typ}:;;{street};{city};{state};{postal};{country}")
    _appendAddress(contact.get("businessAddress") or {}, "WORK")
    _appendAddress(contact.get("homeAddress") or {}, "HOME")
    _appendAddress(contact.get("otherAddress") or {}, "OTHER")
    if contact.get("personalNotes"):
        lines.append(f"NOTE:{_icsEscape(contact['personalNotes'])}")
    lines.append(f"UID:{contact.get('id', '')}")
    lines.append("END:VCARD")
    return ("\r\n".join(lines) + "\r\n").encode("utf-8")
--- a/modules/datamodels/datamodelDataSource.py
+++ b/modules/datamodels/datamodelDataSource.py
@ -26,7 +26,12 @@ class DataSource(PowerOnModel):
        json_schema_extra={"label": "Verbindungs-ID", "fk_target": {"db": "poweron_app", "table": "UserConnection", "labelField": "externalUsername"}},
    )
    sourceType: str = Field(
-        description="sharepointFolder, googleDriveFolder, outlookFolder, ftpFolder, clickupList (path under /team/...)",
+        description=(
            "sharepointFolder, onedriveFolder, googleDriveFolder, "
            "outlookFolder, gmailFolder, ftpFolder, clickupList "
            "(path under /team/...), kdriveFolder, calendarFolder, "
            "contactFolder"
        ),
        json_schema_extra={"label": "Quellentyp"},
    )
    path: str = Field(
--- a/modules/datamodels/datamodelDocref.py
+++ b/modules/datamodels/datamodelDocref.py
@ -4,10 +4,13 @@
 Document reference models for typed document references in workflows.
 """
-from typing import List, Optional
+import logging
 from typing import Any, List, Optional
 from pydantic import BaseModel, Field
 from modules.shared.i18nRegistry import i18nModel
 logger = logging.getLogger(__name__)
 class DocumentReference(BaseModel):
    """Base class for document references"""
@ -115,3 +118,86 @@ class DocumentReferenceList(BaseModel):
                    references.append(DocumentListReference(label=refStr))
        return cls(references=references)
 def coerceDocumentReferenceList(value: Any) -> DocumentReferenceList:
    """Tolerant coercion of any agent/UI-supplied document list to
    :class:`DocumentReferenceList`.
    Accepts the canonical formats plus the dict-wrapper shapes that
    LLM tool-callers tend to generate when they see a
    ``type=DocumentList`` parameter:
    * ``None`` / ``""`` -> empty list
    * :class:`DocumentReferenceList` -> as-is
    * ``str`` -> single-element string list
    * ``list[str]`` -> :meth:`from_string_list`
    * ``list[dict]`` with ``id`` or ``documentId`` -> item references
    * ``{"documents": [...]}`` / ``{"references": [...]}`` ->
      recurse into the inner list (this is the shape LLMs love)
    * ``{"id": "..."}`` / ``{"documentId": "..."}`` -> single
      item reference
    * any unrecognised input -> empty list with a WARN log; never
      raises (the caller decides whether an empty list is fatal).
    """
    if value is None or value == "":
        return DocumentReferenceList(references=[])
    if isinstance(value, DocumentReferenceList):
        return value
    if isinstance(value, str):
        return DocumentReferenceList.from_string_list([value])
    if isinstance(value, dict):
        for innerKey in ("documents", "references", "items", "files"):
            if innerKey in value and isinstance(value[innerKey], list):
                return coerceDocumentReferenceList(value[innerKey])
        docId = value.get("documentId") or value.get("id")
        if docId:
            return DocumentReferenceList(references=[
                DocumentItemReference(
                    documentId=str(docId),
                    fileName=value.get("fileName") or value.get("name"),
                )
            ])
        logger.warning(
            f"coerceDocumentReferenceList: unsupported dict shape "
            f"(keys={list(value.keys())}); returning empty list."
        )
        return DocumentReferenceList(references=[])
    if isinstance(value, list):
        if not value:
            return DocumentReferenceList(references=[])
        first = value[0]
        if isinstance(first, str):
            return DocumentReferenceList.from_string_list(value)
        if isinstance(first, dict):
            references: List[DocumentReference] = []
            for item in value:
                if not isinstance(item, dict):
                    continue
                docId = item.get("documentId") or item.get("id")
                if docId:
                    references.append(DocumentItemReference(
                        documentId=str(docId),
                        fileName=item.get("fileName") or item.get("name"),
                    ))
                elif item.get("label"):
                    references.append(DocumentListReference(
                        label=str(item["label"]),
                        messageId=item.get("messageId"),
                    ))
            return DocumentReferenceList(references=references)
        # Mixed/object list (e.g. inline ActionDocument-like): caller
        # must pre-handle that case before calling this coercer.
        logger.warning(
            f"coerceDocumentReferenceList: list element type "
            f"{type(first).__name__} not recognised; returning empty list."
        )
        return DocumentReferenceList(references=[])
    logger.warning(
        f"coerceDocumentReferenceList: unsupported value type "
        f"{type(value).__name__}; returning empty list."
    )
    return DocumentReferenceList(references=[])
--- a/modules/datamodels/datamodelExtraction.py
+++ b/modules/datamodels/datamodelExtraction.py
@ -95,7 +95,14 @@ class ExtractionOptions(BaseModel):
    imageQuality: int = Field(default=85, ge=1, le=100, description="Image quality (1-100)")
    # Merging strategy
-    mergeStrategy: MergeStrategy = Field(default_factory=MergeStrategy, description="Strategy for merging extraction results")
+    mergeStrategy: Optional[MergeStrategy] = Field(
        default_factory=MergeStrategy,
        description=(
            "Strategy for merging extraction results. Pass None to skip merging entirely "
            "(required for per-chunk ingestion pipelines like RAG, where per-page/per-section "
            "granularity must be preserved for embedding)."
        ),
    )
    # Optional chunking parameters (for backward compatibility)
    chunkAllowed: Optional[bool] = Field(default=None, description="Whether chunking is allowed")
--- a/modules/datamodels/datamodelKnowledge.py
+++ b/modules/datamodels/datamodelKnowledge.py
@ -90,6 +90,16 @@ class FileContentIndex(PowerOnModel):
        description="Data visibility scope: personal, featureInstance, mandate, global",
        json_schema_extra={"label": "Sichtbarkeit"},
    )
    sourceKind: str = Field(
        default="file",
        description="Origin of the indexed content: file, sharepoint_item, outlook_message, outlook_attachment, ...",
        json_schema_extra={"label": "Quellenart"},
    )
    connectionId: Optional[str] = Field(
        default=None,
        description="UserConnection ID if this index entry originates from an external connector",
        json_schema_extra={"label": "Connection-ID"},
    )
    neutralizationStatus: Optional[str] = Field(
        default=None,
        description="Neutralization status: completed, failed, skipped, None = not required",
--- a/modules/datamodels/datamodelUam.py
+++ b/modules/datamodels/datamodelUam.py
@ -475,7 +475,23 @@ class UserConnection(PowerOnModel):
        description="OAuth scopes granted for this connection",
        json_schema_extra={"frontend_type": "list", "frontend_readonly": True, "frontend_required": False, "label": "Gewährte Berechtigungen"},
    )
-    
+    knowledgeIngestionEnabled: bool = Field(
        default=False,
        description="Whether the user has consented to knowledge ingestion for this connection",
        json_schema_extra={"frontend_type": "boolean", "frontend_readonly": False, "frontend_required": False, "label": "Wissensdatenbank aktiv"},
    )
    knowledgePreferences: Optional[Dict[str, Any]] = Field(
        default=None,
        description=(
            "Per-connection knowledge ingestion preferences. schemaVersion=1 keys: "
            "neutralizeBeforeEmbed (bool), mailContentDepth (metadata|snippet|full), "
            "mailIndexAttachments (bool), filesIndexBinaries (bool), mimeAllowlist (list[str]), "
            "clickupScope (titles|title_description|with_comments), "
            "surfaceToggles (dict per authority), maxAgeDays (int)."
        ),
        json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False, "label": "Wissenspräferenzen"},
    )
    @computed_field
    @property
    def connectionReference(self) -> str:
--- a/modules/features/commcoach/serviceCommcoachIndexer.py
+++ b/modules/features/commcoach/serviceCommcoachIndexer.py
@ -174,14 +174,26 @@ async def indexSessionData(
            for c in chunks
        ]
-        await knowledgeService.indexFile(
+        from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
-            fileId=syntheticFileId,
+
-            fileName=f"coaching-session-{sessionId[:8]}",
+        await knowledgeService.requestIngestion(
-            mimeType="application/x-coaching-session",
+            IngestionJob(
-            userId=userId,
+                sourceKind="coaching_session",
-            featureInstanceId=featureInstanceId,
+                sourceId=syntheticFileId,
-            mandateId=mandateId,
+                fileName=f"coaching-session-{sessionId[:8]}",
-            contentObjects=contentObjects,
+                mimeType="application/x-coaching-session",
                userId=userId,
                featureInstanceId=featureInstanceId,
                mandateId=mandateId,
                contentObjects=contentObjects,
                provenance={
                    "lane": "feature",
                    "feature": "commcoach",
                    "sessionId": sessionId,
                    "contextId": contextId,
                    "messageCount": len(messages or []),
                },
            )
        )
        logger.info(f"Successfully indexed coaching session {sessionId} ({len(chunks)} chunks)")
    except Exception as e:
--- a/modules/features/graphicalEditor/routeFeatureGraphicalEditor.py
+++ b/modules/features/graphicalEditor/routeFeatureGraphicalEditor.py
@ -1160,6 +1160,9 @@ async def list_connection_services(
            "drive": "Google Drive",
            "gmail": "Gmail",
            "files": "Files (FTP)",
            "kdrive": "kDrive",
            "calendar": "Calendar",
            "contact": "Contacts",
        }
        _serviceIcons = {
            "sharepoint": "sharepoint",
@ -1170,6 +1173,9 @@ async def list_connection_services(
            "drive": "cloud",
            "gmail": "mail",
            "files": "folder",
            "kdrive": "cloud",
            "calendar": "calendar",
            "contact": "contact",
        }
        items = [
            {"service": s, "label": _serviceLabels.get(s, s), "icon": _serviceIcons.get(s, "folder")}
--- a/modules/features/workspace/routeFeatureWorkspace.py
+++ b/modules/features/workspace/routeFeatureWorkspace.py
@ -188,6 +188,9 @@ _SOURCE_TYPE_TO_SERVICE = {
    "gmailFolder": "gmail",
    "ftpFolder": "files",
    "clickupList": "clickup",
    "kdriveFolder": "kdrive",
    "calendarFolder": "calendar",
    "contactFolder": "contact",
 }
@ -1818,6 +1821,9 @@ async def listConnectionServices(
            "drive": "Google Drive",
            "gmail": "Gmail",
            "files": "Files (FTP)",
            "kdrive": "kDrive",
            "calendar": "Calendar",
            "contact": "Contacts",
        }
        _serviceIcons = {
            "sharepoint": "sharepoint",
@ -1827,6 +1833,9 @@ async def listConnectionServices(
            "drive": "cloud",
            "gmail": "mail",
            "files": "folder",
            "kdrive": "cloud",
            "calendar": "calendar",
            "contact": "contact",
        }
        items = [
            {
--- a/modules/interfaces/interfaceDbApp.py
+++ b/modules/interfaces/interfaceDbApp.py
@ -1268,19 +1268,7 @@ class AppObjects:
            result = []
            for conn_dict in connections:
                try:
-                    # Create UserConnection object
+                    connection = UserConnection.model_validate(conn_dict)
                    connection = UserConnection(
                        id=conn_dict["id"],
                        userId=conn_dict["userId"],
                        authority=conn_dict.get("authority"),
                        externalId=conn_dict.get("externalId", ""),
                        externalUsername=conn_dict.get("externalUsername", ""),
                        externalEmail=conn_dict.get("externalEmail"),
                        status=conn_dict.get("status", "pending"),
                        connectedAt=conn_dict.get("connectedAt"),
                        lastChecked=conn_dict.get("lastChecked"),
                        expiresAt=conn_dict.get("expiresAt"),
                    )
                    result.append(connection)
                except Exception as e:
                    logger.error(
@ -1293,6 +1281,28 @@ class AppObjects:
            logger.error(f"Error getting user connections: {str(e)}")
            return []
    def getActiveKnowledgeConnections(self) -> List[UserConnection]:
        """Return all UserConnections with knowledgeIngestionEnabled=True and status=active.
        Used by the daily re-sync scheduler to determine which connections to re-index.
        """
        try:
            rows = self.db.getRecordset(
                UserConnection,
                recordFilter={"knowledgeIngestionEnabled": True, "status": ConnectionStatus.ACTIVE.value},
            )
            result = []
            for row in rows or []:
                try:
                    conn = UserConnection.model_validate(row) if isinstance(row, dict) else row
                    result.append(conn)
                except Exception as _e:
                    logger.warning(f"getActiveKnowledgeConnections: could not parse row: {_e}")
            return result
        except Exception as e:
            logger.error(f"getActiveKnowledgeConnections failed: {e}")
            return []
    def getUserConnectionById(self, connectionId: str) -> Optional[UserConnection]:
        """Get a single UserConnection by ID or by reference string (connection:authority:username)."""
        try:
@ -1317,18 +1327,21 @@ class AppObjects:
            if connections:
                conn_dict = connections[0]
-                return UserConnection(
+                try:
-                    id=conn_dict["id"],
+                    return UserConnection.model_validate(conn_dict)
-                    userId=conn_dict["userId"],
+                except Exception:
-                    authority=conn_dict.get("authority"),
+                    return UserConnection(
-                    externalId=conn_dict.get("externalId", ""),
+                        id=conn_dict["id"],
-                    externalUsername=conn_dict.get("externalUsername", ""),
+                        userId=conn_dict["userId"],
-                    externalEmail=conn_dict.get("externalEmail"),
+                        authority=conn_dict.get("authority"),
-                    status=conn_dict.get("status", "pending"),
+                        externalId=conn_dict.get("externalId", ""),
-                    connectedAt=conn_dict.get("connectedAt"),
+                        externalUsername=conn_dict.get("externalUsername", ""),
-                    lastChecked=conn_dict.get("lastChecked"),
+                        externalEmail=conn_dict.get("externalEmail"),
-                    expiresAt=conn_dict.get("expiresAt"),
+                        status=conn_dict.get("status", "pending"),
-                )
+                        connectedAt=conn_dict.get("connectedAt"),
                        lastChecked=conn_dict.get("lastChecked"),
                        expiresAt=conn_dict.get("expiresAt"),
                    )
            return None
        except Exception as e:
            logger.error(f"Error getting user connection by ID: {str(e)}")
@ -3331,7 +3344,10 @@ class AppObjects:
            )
            if not tokens:
-                logger.warning(
+                # Pending connections legitimately have no token yet (PAT not
                # submitted, OAuth callback not completed). Keep at DEBUG to
                # avoid noisy warnings on every connection-list refresh.
                logger.debug(
                    f"No connection token found for connectionId: {connectionId}"
                )
                return None
--- a/modules/interfaces/interfaceDbKnowledge.py
+++ b/modules/interfaces/interfaceDbKnowledge.py
@ -93,6 +93,46 @@ class KnowledgeObjects:
        self.db.recordModify(FileContentIndex, fileId, {"status": status})
        return True
    def deleteFileContentIndexByConnectionId(self, connectionId: str) -> Dict[str, int]:
        """Delete all FileContentIndex rows (and their ContentChunks) for a connection.
        Used when a UserConnection is revoked / disconnected so the knowledge corpus
        no longer references data the user no longer grants access to. Returns a dict
        with counts to support observability logs.
        """
        if not connectionId:
            return {"indexRows": 0, "chunks": 0}
        rows = self.db.getRecordset(
            FileContentIndex, recordFilter={"connectionId": connectionId}
        )
        mandateIds: set = set()
        chunkCount = 0
        indexCount = 0
        for row in rows:
            fid = row.get("id") if isinstance(row, dict) else getattr(row, "id", None)
            mid = row.get("mandateId") if isinstance(row, dict) else getattr(row, "mandateId", "")
            if not fid:
                continue
            chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fid})
            for chunk in chunks:
                if self.db.recordDelete(ContentChunk, chunk["id"]):
                    chunkCount += 1
            if self.db.recordDelete(FileContentIndex, fid):
                indexCount += 1
                if mid:
                    mandateIds.add(str(mid))
        for mid in mandateIds:
            try:
                from modules.interfaces.interfaceDbBilling import _getRootInterface
                _getRootInterface().reconcileMandateStorageBilling(mid)
            except Exception as ex:
                logger.warning("reconcileMandateStorageBilling after connection purge failed: %s", ex)
        return {"indexRows": indexCount, "chunks": chunkCount}
    def deleteFileContentIndex(self, fileId: str) -> bool:
        """Delete a FileContentIndex and all associated ContentChunks."""
        existing = self.getFileContentIndex(fileId)
--- a/modules/interfaces/interfaceDbManagement.py
+++ b/modules/interfaces/interfaceDbManagement.py
@ -836,13 +836,25 @@ class ComponentObjects:
    def checkForDuplicateFile(self, fileHash: str, fileName: str) -> Optional[FileItem]:
        """Checks if a file with the same hash AND fileName already exists for the current user
        **within the same scope** (mandateId + featureInstanceId).
-        
+
-        Duplicate = same user + same fileHash + same fileName + same scope.
+        Duplicate = same user + same fileHash + same fileName + same scope + RBAC-visible.
        Same hash with different name is allowed (intentional copy by user).
        RBAC parity contract: this method must NEVER return a FileItem that
        ``getFile()`` would not return for the current user. Otherwise callers
        (``saveUploadedFile`` / ``createFile``) hand back an id that the very
        next ``updateFile`` / ``getFile`` then rejects with
        ``File with ID ... not found`` -- the well-known "ghost duplicate"
        symptom seen when ``interfaceDbComponent`` is initialised without an
        ``featureInstanceId`` (e.g. via ``serviceHub``) but a same-hash+name
        file exists in another featureInstance under the same mandate.
        We therefore cross-check the candidate through the RBAC-aware ``getFile``
        before returning it; if RBAC blocks it, we treat it as "no duplicate
        for this scope" and the caller will create a fresh per-scope copy.
        """
        if not self.userId:
            return None
-        
+
        recordFilter: dict = {
            "sysCreatedBy": self.userId,
            "fileHash": fileHash,
@ -857,10 +869,10 @@ class ComponentObjects:
            FileItem,
            recordFilter=recordFilter,
        )
-        
+
        if not matchingFiles:
            return None
-        
+
        file = matchingFiles[0]
        fileId = file["id"]
@ -869,16 +881,17 @@ class ComponentObjects:
            logger.warning(f"Duplicate FileItem {fileId} found but FileData missing — treating as new file")
            return None
-        return FileItem(
+        rbacVisible = self.getFile(fileId)
-            id=fileId,
+        if rbacVisible is None:
-            mandateId=file.get("mandateId", ""),
+            logger.info(
-            featureInstanceId=file.get("featureInstanceId", ""),
+                f"Duplicate FileItem {fileId} ('{fileName}', hash {fileHash[:12]}...) found via "
-            fileName=file["fileName"],
+                f"sysCreatedBy+hash+name match but is not RBAC-visible in current scope "
-            mimeType=file["mimeType"],
+                f"(mandateId={self.mandateId or '-'}, featureInstanceId={self.featureInstanceId or '-'}). "
-            fileHash=file["fileHash"],
+                f"Treating as no-duplicate so a fresh per-scope copy gets created."
-            fileSize=file["fileSize"],
+            )
-            sysCreatedAt=file.get("sysCreatedAt"),
+            return None
-        )
+
        return rbacVisible
    # Class-level cache — built once from the ExtractorRegistry
    _extensionToMime: Optional[Dict[str, str]] = None
--- a/modules/routes/routeDataConnections.py
+++ b/modules/routes/routeDataConnections.py
@ -351,11 +351,18 @@ def create_connection(
            externalUsername="",  # Will be set after OAuth
            status=ConnectionStatus.PENDING  # Start with PENDING status
        )
-        
+
        # Apply knowledge consent + preferences from request body before persisting
        knowledge_enabled = connection_data.get("knowledgeIngestionEnabled")
        if isinstance(knowledge_enabled, bool):
            connection.knowledgeIngestionEnabled = knowledge_enabled
        knowledge_prefs = connection_data.get("knowledgePreferences")
        if isinstance(knowledge_prefs, dict):
            connection.knowledgePreferences = knowledge_prefs
        # Save connection record - models now handle timestamp serialization automatically
        interface.db.recordModify(UserConnection, connection.id, connection.model_dump())
-        
+
        return connection
    except HTTPException:
@ -484,16 +491,23 @@ def update_connection(
 def connect_service(
    request: Request,
    connectionId: str = Path(..., description="The ID of the connection to connect"),
    body: Optional[Dict[str, Any]] = Body(default=None),
    currentUser: User = Depends(getCurrentUser)
 ) -> Dict[str, Any]:
-    """Connect a service for the current user
+    """Connect a service for the current user.
-    
+
    Optional body: ``{"reauth": true}`` -- forces the OAuth provider to re-show
    the consent screen, which is required when new scopes have been added (e.g.
    Calendar + Contacts after the connection was first created). Without this
    flag the provider silently re-uses the previous consent and never grants
    the new scopes, leaving the connection in a degraded state.
    SECURITY: This endpoint is secure - users can only connect their own connections.
    """
-    
+
    try:
        interface = getInterface(currentUser)
-        
+
        # Find the connection
        connection = None
        # SECURITY FIX: All users (including admins) can only connect their own connections
@ -503,29 +517,40 @@ def connect_service(
            if conn.id == connectionId:
                connection = conn
                break
-        
+
        if not connection:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail=routeApiMsg("Connection not found")
            )
-        
+
        reauth = bool((body or {}).get("reauth")) if isinstance(body, dict) else False
        reauthSuffix = "&reauth=1" if reauth else ""
        # Data-app OAuth (JWT state issued server-side in /auth/connect)
        auth_url = None
        if connection.authority == AuthAuthority.MSFT:
-            auth_url = f"/api/msft/auth/connect?connectionId={quote(connectionId, safe='')}"
+            auth_url = f"/api/msft/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}"
        elif connection.authority == AuthAuthority.GOOGLE:
-            auth_url = f"/api/google/auth/connect?connectionId={quote(connectionId, safe='')}"
+            auth_url = f"/api/google/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}"
        elif connection.authority == AuthAuthority.CLICKUP:
-            auth_url = f"/api/clickup/auth/connect?connectionId={quote(connectionId, safe='')}"
+            auth_url = f"/api/clickup/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}"
        elif connection.authority == AuthAuthority.INFOMANIAK:
-            auth_url = f"/api/infomaniak/auth/connect?connectionId={quote(connectionId, safe='')}"
+            # Infomaniak does not use OAuth for data access; the frontend posts a
            # Personal Access Token directly to /api/infomaniak/connections/{id}/token.
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail=routeApiMsg(
                    "Infomaniak uses a Personal Access Token instead of OAuth. "
                    "Submit the token via POST /api/infomaniak/connections/{connectionId}/token."
                ),
            )
        else:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail=f"Unsupported authority: {connection.authority}"
            )
-        
+
        return {"authUrl": auth_url}
    except HTTPException:
@ -568,8 +593,25 @@ def disconnect_service(
                detail=routeApiMsg("Connection not found")
            )
-        # Update connection status
+        # Fire revoked event BEFORE DB status change so knowledge purge and
-        connection.status = ConnectionStatus.INACTIVE
+        # status mutation form one logical step; subscribers see the
        # connection as it was. INACTIVE does not exist on the enum — REVOKED
        # is the correct terminal-but-retained state (deleted rows are
        # handled in DELETE /{id}).
        try:
            from modules.shared.callbackRegistry import callbackRegistry
            callbackRegistry.trigger(
                "connection.revoked",
                connectionId=connectionId,
                authority=str(getattr(connection.authority, "value", connection.authority) or ""),
                userId=str(currentUser.id),
                reason="disconnected",
            )
        except Exception as _cbErr:
            logger.warning("connection.revoked callback failed for %s: %s", connectionId, _cbErr)
        connection.status = ConnectionStatus.REVOKED
        connection.lastChecked = getUtcTimestamp()
        # Update connection record - models now handle timestamp serialization automatically
@ -618,6 +660,23 @@ def delete_connection(
                detail=routeApiMsg("Connection not found")
            )
        # Fire revoked event BEFORE the row disappears so consumers still
        # have authority/connection context for observability; purge itself
        # targets FileContentIndex rows by connectionId which are unaffected
        # by the UserConnection delete.
        try:
            from modules.shared.callbackRegistry import callbackRegistry
            callbackRegistry.trigger(
                "connection.revoked",
                connectionId=connectionId,
                authority=str(getattr(connection.authority, "value", connection.authority) or ""),
                userId=str(currentUser.id),
                reason="deleted",
            )
        except Exception as _cbErr:
            logger.warning("connection.revoked callback failed for %s: %s", connectionId, _cbErr)
        # Remove the connection - only need connectionId since permissions are verified
        interface.removeUserConnection(connectionId)
--- a/modules/routes/routeDataFiles.py
+++ b/modules/routes/routeDataFiles.py
@ -77,7 +77,7 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
    """Background task: pre-scan + extraction + knowledge indexing.
    Step 1: Structure Pre-Scan (AI-free) -> FileContentIndex (persisted)
    Step 2: Content extraction via runExtraction -> ContentParts
-    Step 3: KnowledgeService.indexFile -> chunking + embedding -> Knowledge Store"""
+    Step 3: KnowledgeService.requestIngestion -> idempotent chunking + embedding -> Knowledge Store"""
    userId = user.id if hasattr(user, "id") else str(user)
    try:
        mgmtInterface = interfaceDbManagement.getInterface(user)
@ -122,9 +122,30 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
            f"{contentIndex.totalObjects} objects"
        )
-        # Persist FileContentIndex immediately
+        # Persist FileContentIndex immediately.
        # IMPORTANT: preserve `_ingestion` metadata and `status="indexed"` from any
        # prior successful run — otherwise this upsert wipes the idempotency cache
        # and requestIngestion cannot detect duplicates (AC4 breaks).
        from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
        knowledgeDb = getKnowledgeInterface()
        try:
            _existing = knowledgeDb.getFileContentIndex(fileId)
        except Exception:
            _existing = None
        if _existing:
            _existingStruct = (
                _existing.get("structure") if isinstance(_existing, dict)
                else getattr(_existing, "structure", {})
            ) or {}
            _existingStatus = (
                _existing.get("status") if isinstance(_existing, dict)
                else getattr(_existing, "status", "")
            ) or ""
            if "_ingestion" in _existingStruct:
                contentIndex.structure = dict(contentIndex.structure or {})
                contentIndex.structure["_ingestion"] = _existingStruct["_ingestion"]
            if _existingStatus == "indexed":
                contentIndex.status = "indexed"
        knowledgeDb.upsertFileContentIndex(contentIndex)
        # Step 2: Content extraction (AI-free, produces ContentParts)
@ -134,7 +155,10 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
        extractorRegistry = ExtractorRegistry()
        chunkerRegistry = ChunkerRegistry()
-        options = ExtractionOptions()
+        # mergeStrategy=None: keep per-page / per-section granularity for RAG ingestion.
        # The default MergeStrategy concatenates all text parts into a single blob, which
        # collapses a 500-page PDF into one ContentChunk and destroys semantic retrieval.
        options = ExtractionOptions(mergeStrategy=None)
        extracted = runExtraction(
            extractorRegistry, chunkerRegistry,
@ -181,15 +205,21 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
        )
        knowledgeService = getService("knowledge", ctx)
-        await knowledgeService.indexFile(
+        from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
-            fileId=fileId,
+
-            fileName=fileName,
+        await knowledgeService.requestIngestion(
-            mimeType=mimeType,
+            IngestionJob(
-            userId=userId,
+                sourceKind="file",
-            featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
+                sourceId=fileId,
-            mandateId=str(mandate_id) if mandate_id else "",
+                fileName=fileName,
-            contentObjects=contentObjects,
+                mimeType=mimeType,
-            structure=contentIndex.structure,
+                userId=userId,
                featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
                mandateId=str(mandate_id) if mandate_id else "",
                contentObjects=contentObjects,
                structure=contentIndex.structure,
                provenance={"lane": "upload", "route": "routeDataFiles._autoIndexFile"},
            )
        )
        # Re-acquire interface after await to avoid stale user context from the singleton
--- a/modules/routes/routeSecurityClickup.py
+++ b/modules/routes/routeSecurityClickup.py
@ -241,6 +241,29 @@ async def auth_connect_callback(
        )
        interface.saveConnectionToken(token)
        try:
            from modules.shared.callbackRegistry import callbackRegistry
            if connection.knowledgeIngestionEnabled:
                callbackRegistry.trigger(
                    "connection.established",
                    connectionId=connection.id,
                    authority=str(getattr(connection.authority, "value", connection.authority) or "clickup"),
                    userId=str(user.id),
                )
            else:
                logger.info(
                    "ingestion.connection.bootstrap.skipped — knowledge ingestion disabled by user",
                    extra={
                        "event": "ingestion.connection.bootstrap.skipped",
                        "connectionId": connection.id,
                        "authority": "clickup",
                        "reason": "consent_disabled",
                    },
                )
        except Exception as _cbErr:
            logger.warning("connection.established callback failed for %s: %s", connection.id, _cbErr)
        return HTMLResponse(
            content=f"""
            <html>
--- a/modules/routes/routeSecurityGoogle.py
+++ b/modules/routes/routeSecurityGoogle.py
@ -281,9 +281,17 @@ async def auth_login_callback(
 def auth_connect(
    request: Request,
    connectionId: str = Query(..., description="UserConnection id"),
    reauth: Optional[int] = Query(0, description="If 1, force the consent screen so newly added scopes are granted"),
    currentUser: User = Depends(getCurrentUser),
 ) -> RedirectResponse:
-    """Start Google Data OAuth for an existing connection (requires gateway session)."""
+    """Start Google Data OAuth for an existing connection (requires gateway session).
    Google already defaults to ``prompt=consent`` here, but ``include_granted_scopes=true``
    can cause newly added scopes (e.g. calendar.readonly, contacts.readonly) to be
    silently dropped on subsequent re-authorisations. With ``reauth=1`` we drop
    ``include_granted_scopes`` so Google re-issues a token strictly for the
    current scope list.
    """
    try:
        _require_google_data_config()
        interface = getInterface(currentUser)
@ -310,9 +318,10 @@ def auth_connect(
        )
        extra_params: Dict[str, Any] = {
            "access_type": "offline",
            "include_granted_scopes": "true",
            "state": state_jwt,
        }
        if not reauth:
            extra_params["include_granted_scopes"] = "true"
        login_hint = connection.externalEmail or connection.externalUsername
        if login_hint:
            extra_params["login_hint"] = login_hint
@ -470,6 +479,29 @@ async def auth_connect_callback(
        )
        interface.saveConnectionToken(token)
        try:
            from modules.shared.callbackRegistry import callbackRegistry
            if connection.knowledgeIngestionEnabled:
                callbackRegistry.trigger(
                    "connection.established",
                    connectionId=connection.id,
                    authority=str(getattr(connection.authority, "value", connection.authority) or "google"),
                    userId=str(user.id),
                )
            else:
                logger.info(
                    "ingestion.connection.bootstrap.skipped — knowledge ingestion disabled by user",
                    extra={
                        "event": "ingestion.connection.bootstrap.skipped",
                        "connectionId": connection.id,
                        "authority": "google",
                        "reason": "consent_disabled",
                    },
                )
        except Exception as _cbErr:
            logger.warning("connection.established callback failed for %s: %s", connection.id, _cbErr)
        return HTMLResponse(
            content=f"""
            <html>
--- a/modules/routes/routeSecurityInfomaniak.py
+++ b/modules/routes/routeSecurityInfomaniak.py
@ -1,69 +1,66 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
-"""Infomaniak OAuth for data connections (UserConnection + Token).
+"""Infomaniak Personal-Access-Token onboarding for data connections.
-Pure DATA_CONNECTION flow -- Infomaniak is NOT a login authority for PowerOn.
+Infomaniak does NOT support OAuth scopes for kDrive/kSuite data access.
 The user must create a Personal Access Token (PAT) at
 https://manager.infomaniak.com/v3/ng/accounts/token/list with the API
 scopes:
 - ``drive``              -> kDrive   (active adapter)
 - ``workspace:calendar`` -> Calendar (active adapter)
 - ``workspace:contact``  -> Contacts (active adapter)
 - ``workspace:mail``     -> Mail     (adapter pending; scope reserved)
 Validation strategy
 -------------------
 The submit endpoint validates the PAT in two deterministic steps,
 each addressing one scope:
 1. ``listAccessibleDrives(pat)`` -> ``GET /2/drive/init?with=drives``
   proves the ``drive`` scope is on the PAT and -- as a side effect --
   confirms the user has at least one accessible kDrive. This is the
   *only* listing endpoint that returns drives where the user has
   ``role: 'user'`` (the documented ``/2/drive?account_id=...`` listing
   is filtered to admin-only drives and would silently return ``[]``
   for a standard kSuite member).
 2. ``resolveOwnerIdentity(pat)`` -> PIM Calendar (preferred) or PIM
   Contacts (fallback) yields the user's display name + their kSuite
   account_id, used purely for connection labelling. This also proves
   that at least one of ``workspace:calendar`` / ``workspace:contact``
   is on the PAT (the connection would otherwise be blank in the UI).
 Mail has no separate probe: its scope is recorded in ``grantedScopes``
 so a future adapter can pick it up without re-issuing the token.
 """
-from fastapi import APIRouter, HTTPException, Request, status, Depends, Query
+from fastapi import APIRouter, HTTPException, Request, status, Depends, Path, Body
 from fastapi.responses import HTMLResponse, RedirectResponse
 import logging
 import json
 import time
 from typing import Dict, Any
-from urllib.parse import urlencode
+import hashlib
 import httpx
 from jose import jwt as jose_jwt
 from jose import JWTError
-from modules.shared.configuration import APP_CONFIG
+from modules.interfaces.interfaceDbApp import getInterface
 from modules.interfaces.interfaceDbApp import getInterface, getRootInterface
 from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection
 from modules.datamodels.datamodelSecurity import Token, TokenPurpose
-from modules.auth import getCurrentUser, limiter, SECRET_KEY, ALGORITHM
+from modules.auth import getCurrentUser, limiter
-from modules.auth.oauthProviderConfig import infomaniakDataScopes
+from modules.shared.timeUtils import getUtcTimestamp, createExpirationTimestamp
 from modules.shared.timeUtils import createExpirationTimestamp, getUtcTimestamp, parseTimestamp
 from modules.shared.i18nRegistry import apiRouteContext
 from modules.connectors.providerInfomaniak.connectorInfomaniak import (
    resolveOwnerIdentity,
    listAccessibleDrives,
    InfomaniakIdentityError,
 )
 routeApiMsg = apiRouteContext("routeSecurityInfomaniak")
 logger = logging.getLogger(__name__)
-_FLOW_CONNECT = "infomaniak_connect"
+# Infomaniak PATs do not expire unless the user sets an explicit lifetime in
-
+# the Manager (up to 30 years). We persist a 10-year horizon so the central
-INFOMANIAK_AUTHORIZE_URL = "https://login.infomaniak.com/authorize"
+# tokenStatus helper does not flag the connection as "no token". Mirrors
-INFOMANIAK_TOKEN_URL = "https://login.infomaniak.com/token"
+# ClickUp.
-INFOMANIAK_API_BASE = "https://api.infomaniak.com"
+_INFOMANIAK_TOKEN_EXPIRES_IN_SEC = 10 * 365 * 24 * 3600
 CLIENT_ID = APP_CONFIG.get("Service_INFOMANIAK_DATA_CLIENT_ID")
 CLIENT_SECRET = APP_CONFIG.get("Service_INFOMANIAK_DATA_CLIENT_SECRET")
 REDIRECT_URI = APP_CONFIG.get("Service_INFOMANIAK_OAUTH_REDIRECT_URI")
 def _issue_oauth_state(claims: Dict[str, Any]) -> str:
    body = {**claims, "exp": int(time.time()) + 600}
    return jose_jwt.encode(body, SECRET_KEY, algorithm=ALGORITHM)
 def _parse_oauth_state(state: str) -> Dict[str, Any]:
    try:
        return jose_jwt.decode(state, SECRET_KEY, algorithms=[ALGORITHM])
    except JWTError as e:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid OAuth state: {e}"
        ) from e
 def _require_infomaniak_config():
    if not CLIENT_ID or not CLIENT_SECRET or not REDIRECT_URI:
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=routeApiMsg(
                "Infomaniak OAuth is not configured "
                "(Service_INFOMANIAK_DATA_CLIENT_ID, Service_INFOMANIAK_DATA_CLIENT_SECRET, "
                "Service_INFOMANIAK_OAUTH_REDIRECT_URI)"
            ),
        )
 router = APIRouter(
@ -78,251 +75,143 @@ router = APIRouter(
 )
-@router.get("/auth/connect")
+@router.post("/connections/{connectionId}/token")
-@limiter.limit("5/minute")
+@limiter.limit("10/minute")
-def auth_connect(
+async def submit_infomaniak_token(
    request: Request,
-    connectionId: str = Query(..., description="UserConnection id"),
+    connectionId: str = Path(..., description="UserConnection id"),
    body: Dict[str, Any] = Body(..., description="{ 'token': '<PAT>' }"),
    currentUser: User = Depends(getCurrentUser),
-) -> RedirectResponse:
+) -> Dict[str, Any]:
-    """Start Infomaniak OAuth for an existing connection (requires gateway session)."""
+    """Validate and persist an Infomaniak Personal Access Token (PAT).
    try:
        _require_infomaniak_config()
        interface = getInterface(currentUser)
        connections = interface.getUserConnections(currentUser.id)
        connection = None
        for conn in connections:
            if conn.id == connectionId and conn.authority == AuthAuthority.INFOMANIAK:
                connection = conn
                break
        if not connection:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail=routeApiMsg("Infomaniak connection not found"),
            )
-        state_jwt = _issue_oauth_state(
+    Body:
-            {
+        { "token": "<personal-access-token from Infomaniak Manager>" }
-                "flow": _FLOW_CONNECT,
+
-                "connectionId": connectionId,
+    Validation order (both must succeed before persisting):
-                "userId": str(currentUser.id),
+      1. ``listAccessibleDrives(pat)`` -> proves the ``drive`` scope
-            }
+         is on the PAT and confirms the user can see at least one
-        )
+         kDrive (uses ``/2/drive/init?with=drives`` so users with
-        query = urlencode(
+         ``role: 'user'`` are also covered).
-            {
+      2. ``resolveOwnerIdentity(pat)`` -> display name + kSuite
-                "client_id": CLIENT_ID,
+         account_id for the connection UI label (proves at least one
-                "response_type": "code",
+         of ``workspace:calendar`` / ``workspace:contact`` is present).
-                "access_type": "offline",
+
-                "redirect_uri": REDIRECT_URI,
+    No PAT-derived data is stored as adapter state -- both the drive
-                "scope": " ".join(infomaniakDataScopes),
+    list and the owner identity are re-resolved lazily by the adapters
-                "state": state_jwt,
+    at request time.
-            }
+    """
-        )
+    pat = (body or {}).get("token")
-        auth_url = f"{INFOMANIAK_AUTHORIZE_URL}?{query}"
+    if not isinstance(pat, str) or not pat.strip():
        return RedirectResponse(auth_url)
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error initiating Infomaniak connect: {str(e)}")
        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            status_code=status.HTTP_400_BAD_REQUEST,
-            detail=f"Failed to initiate Infomaniak connect: {str(e)}",
+            detail=routeApiMsg("Missing 'token' in request body"),
        )
    pat = pat.strip()
-
+    interface = getInterface(currentUser)
@router.get("/auth/connect/callback")
 async def auth_connect_callback(
    code: str = Query(...),
    state: str = Query(...),
 ) -> HTMLResponse:
    """OAuth callback for Infomaniak data connection."""
    state_data = _parse_oauth_state(state)
    if state_data.get("flow") != _FLOW_CONNECT:
        raise HTTPException(
            status_code=400, detail=routeApiMsg("Invalid OAuth flow for this callback")
        )
    connection_id = state_data.get("connectionId")
    user_id = state_data.get("userId")
    if not connection_id or not user_id:
        raise HTTPException(
            status_code=400, detail=routeApiMsg("Missing connection or user in OAuth state")
        )
    _require_infomaniak_config()
    async with httpx.AsyncClient() as client:
        token_resp = await client.post(
            INFOMANIAK_TOKEN_URL,
            data={
                "grant_type": "authorization_code",
                "client_id": CLIENT_ID,
                "client_secret": CLIENT_SECRET,
                "code": code,
                "redirect_uri": REDIRECT_URI,
            },
            headers={"Content-Type": "application/x-www-form-urlencoded"},
            timeout=30.0,
        )
    if token_resp.status_code != 200:
        logger.error(
            f"Infomaniak token exchange failed: {token_resp.status_code} {token_resp.text}"
        )
        return HTMLResponse(
            content=f"<html><body><h1>Connection Failed</h1><p>{token_resp.text}</p></body></html>",
            status_code=400,
        )
    token_json = token_resp.json()
    access_token = token_json.get("access_token")
    refresh_token = token_json.get("refresh_token", "")
    expires_in = int(token_json.get("expires_in", 0))
    granted_scopes = token_json.get("scope", "")
    if not access_token:
        return HTMLResponse(
            content="<html><body><h1>Connection Failed</h1><p>No access token.</p></body></html>",
            status_code=400,
        )
    rootInterface = getRootInterface()
    if not refresh_token:
        try:
            existing_tokens = rootInterface.getTokensByConnectionIdAndAuthority(
                connection_id, AuthAuthority.INFOMANIAK
            )
            if existing_tokens:
                existing_tokens.sort(
                    key=lambda x: parseTimestamp(x.createdAt, default=0), reverse=True
                )
                refresh_token = existing_tokens[0].tokenRefresh or ""
        except Exception:
            pass
    async with httpx.AsyncClient() as client:
        profile_resp = await client.get(
            f"{INFOMANIAK_API_BASE}/1/profile",
            headers={
                "Authorization": f"Bearer {access_token}",
                "Accept": "application/json",
            },
            timeout=30.0,
        )
    if profile_resp.status_code != 200:
        logger.error(
            f"Infomaniak profile lookup failed: {profile_resp.status_code} {profile_resp.text}"
        )
        return HTMLResponse(
            content="<html><body><h1>Connection Failed</h1><p>Could not load Infomaniak profile.</p></body></html>",
            status_code=400,
        )
    profile_payload = profile_resp.json()
    profile = profile_payload.get("data") if isinstance(profile_payload, dict) else None
    profile = profile or {}
    user = rootInterface.getUser(user_id)
    if not user:
        return HTMLResponse(
            content="""
            <html><body><script>
            if (window.opener) {
                window.opener.postMessage({ type: 'infomaniak_connection_error', error: 'User not found' }, '*');
                setTimeout(() => window.close(), 1000);
            } else window.close();
            </script></body></html>
            """,
            status_code=404,
        )
    interface = getInterface(user)
    connections = interface.getUserConnections(user_id)
    connection = None
-    for conn in connections:
+    for conn in interface.getUserConnections(currentUser.id):
-        if conn.id == connection_id:
+        if conn.id == connectionId and conn.authority == AuthAuthority.INFOMANIAK:
            connection = conn
            break
    if not connection:
-        return HTMLResponse(
+        raise HTTPException(
-            content="""
+            status_code=status.HTTP_404_NOT_FOUND,
-            <html><body><script>
+            detail=routeApiMsg("Infomaniak connection not found"),
            if (window.opener) {
                window.opener.postMessage({ type: 'infomaniak_connection_error', error: 'Connection not found' }, '*');
                setTimeout(() => window.close(), 1000);
            } else window.close();
            </script></body></html>
            """,
            status_code=404,
        )
-    ext_id = str(profile.get("id", "")) if profile.get("id") is not None else ""
+    try:
-    username = profile.get("login") or profile.get("email") or ext_id
+        drives = await listAccessibleDrives(pat)
-    email = profile.get("email")
+    except InfomaniakIdentityError as e:
        logger.warning(
            f"Infomaniak token submit for connection {connectionId} could not "
            f"list drives: {e}"
        )
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=routeApiMsg(
                "Token rejected by Infomaniak (missing scope 'drive'). "
                "Required scopes: 'drive' (kDrive) and "
                "'workspace:calendar' (or 'workspace:contact'). Mail "
                "scope 'workspace:mail' is reserved."
            ),
        )
-    expires_at = createExpirationTimestamp(expires_in)
+    try:
-    granted_scopes_list = (
+        identity = await resolveOwnerIdentity(pat)
-        granted_scopes
+    except InfomaniakIdentityError as e:
-        if isinstance(granted_scopes, list)
+        logger.warning(
-        else (granted_scopes.split(" ") if granted_scopes else infomaniakDataScopes)
+            f"Infomaniak token submit for connection {connectionId} could not "
-    )
+            f"resolve owner identity: {e}"
        )
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=routeApiMsg(
                "Could not derive your Infomaniak account from the token. "
                "Please ensure the PAT carries 'workspace:calendar' or "
                "'workspace:contact' so we can identify your account."
            ),
        )
    tokenFingerprint = "pat-" + hashlib.sha256(pat.encode("utf-8")).hexdigest()[:8]
    username = identity["displayName"] or f"infomaniak-{tokenFingerprint}"
    expiresAt = createExpirationTimestamp(_INFOMANIAK_TOKEN_EXPIRES_IN_SEC)
    try:
        connection.status = ConnectionStatus.ACTIVE
        connection.lastChecked = getUtcTimestamp()
-        connection.expiresAt = expires_at
+        connection.expiresAt = expiresAt
-        connection.externalId = ext_id
+        connection.externalId = str(identity["accountId"])
        connection.externalUsername = username
-        if email:
+        connection.grantedScopes = [
-            connection.externalEmail = email
+            "drive",
-        connection.grantedScopes = granted_scopes_list
+            "workspace:mail",
-        rootInterface.db.recordModify(UserConnection, connection_id, connection.model_dump())
+            "workspace:calendar",
            "workspace:contact",
        ]
        interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
        token = Token(
-            userId=user.id,
+            userId=currentUser.id,
            authority=AuthAuthority.INFOMANIAK,
-            connectionId=connection_id,
+            connectionId=connectionId,
            tokenPurpose=TokenPurpose.DATA_CONNECTION,
-            tokenAccess=access_token,
+            tokenAccess=pat,
-            tokenRefresh=refresh_token,
+            tokenRefresh=None,
-            tokenType=token_json.get("token_type", "bearer"),
+            tokenType="bearer",
-            expiresAt=expires_at,
+            expiresAt=expiresAt,
            createdAt=getUtcTimestamp(),
        )
        interface.saveConnectionToken(token)
-        return HTMLResponse(
+        driveSummary = [
-            content=f"""
+            {"id": d.get("id"), "name": d.get("name"), "role": d.get("role")}
-            <html>
+            for d in drives
-                <head><title>Connection Successful</title></head>
+        ]
-                <body>
+        logger.info(
-                    <script>
+            f"Infomaniak PAT stored for connection {connectionId} "
-                        if (window.opener) {{
+            f"(user {currentUser.id}, externalUsername={username}, "
-                            window.opener.postMessage({{
+            f"kSuiteAccountId={identity['accountId']}, "
-                                type: 'infomaniak_connection_success',
+            f"accessibleDrives={driveSummary})"
                                connection: {{
                                    id: '{connection.id}',
                                    status: 'connected',
                                    type: 'infomaniak',
                                    lastChecked: {getUtcTimestamp()},
                                    expiresAt: {expires_at}
                                }}
                            }}, '*');
                            setTimeout(() => window.close(), 1000);
                        }} else {{
                            window.close();
                        }}
                    </script>
                </body>
            </html>
            """
        )
        return {
            "id": connection.id,
            "status": "connected",
            "type": "infomaniak",
            "externalUsername": username,
            "externalEmail": None,
            "lastChecked": connection.lastChecked,
        }
    except HTTPException:
        raise
    except Exception as e:
-        logger.error(f"Error updating Infomaniak connection: {str(e)}", exc_info=True)
+        logger.error(
-        return HTMLResponse(
+            f"Error persisting Infomaniak token for connection {connectionId}: {e}",
-            content=f"""
+            exc_info=True,
-            <html><body><script>
+        )
-            if (window.opener) {{
+        raise HTTPException(
-                window.opener.postMessage({{ type: 'infomaniak_connection_error', error: {json.dumps(str(e))} }}, '*');
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                setTimeout(() => window.close(), 1000);
+            detail=routeApiMsg("Failed to store Infomaniak token"),
            }} else window.close();
            </script></body></html>
            """,
            status_code=500,
        )
--- a/modules/routes/routeSecurityMsft.py
+++ b/modules/routes/routeSecurityMsft.py
@ -244,9 +244,15 @@ async def auth_login_callback(
 def auth_connect(
    request: Request,
    connectionId: str = Query(..., description="UserConnection id"),
    reauth: Optional[int] = Query(0, description="If 1, force the consent screen so newly added scopes are granted"),
    currentUser: User = Depends(getCurrentUser),
 ) -> RedirectResponse:
-    """Start Microsoft Data OAuth for an existing connection."""
+    """Start Microsoft Data OAuth for an existing connection.
    With ``reauth=1`` the consent screen is forced (``prompt=consent``) so the
    user re-grants permissions and any newly added scopes (e.g. Calendars.Read,
    Contacts.Read) actually land on the access token.
    """
    try:
        _require_msft_data_config()
        interface = getInterface(currentUser)
@ -280,6 +286,8 @@ def auth_connect(
            if "@" in login_hint:
                login_kwargs["domain_hint"] = login_hint.split("@", 1)[1]
            login_kwargs["prompt"] = "login"
        if reauth:
            login_kwargs["prompt"] = "consent"
        auth_url = msal_app.get_authorization_request_url(
            scopes=msftDataScopes,
@ -412,6 +420,29 @@ async def auth_connect_callback(
        )
        interface.saveConnectionToken(token)
        try:
            from modules.shared.callbackRegistry import callbackRegistry
            if connection.knowledgeIngestionEnabled:
                callbackRegistry.trigger(
                    "connection.established",
                    connectionId=connection.id,
                    authority=str(getattr(connection.authority, "value", connection.authority) or "msft"),
                    userId=str(user.id),
                )
            else:
                logger.info(
                    "ingestion.connection.bootstrap.skipped — knowledge ingestion disabled by user",
                    extra={
                        "event": "ingestion.connection.bootstrap.skipped",
                        "connectionId": connection.id,
                        "authority": "msft",
                        "reason": "consent_disabled",
                    },
                )
        except Exception as _cbErr:
            logger.warning("connection.established callback failed for %s: %s", connection.id, _cbErr)
        return HTMLResponse(
            content=f"""
            <html>
--- a/modules/serviceCenter/services/serviceAgent/actionToolAdapter.py
+++ b/modules/serviceCenter/services/serviceAgent/actionToolAdapter.py
@ -187,7 +187,15 @@ def _catalogTypeToJsonSchema(typeStr: str, _depth: int = 0) -> Dict[str, Any]:
 def _createDispatchHandler(actionExecutor, methodName: str, actionName: str):
-    """Create an async handler that dispatches to the ActionExecutor."""
+    """Create an async handler that dispatches to the ActionExecutor.
    Parameter validation and Ref-payload normalization (collapsing
    ``{id: ..., featureCode: ...}`` from the agent's typed tool schema to the
    bare UUID expected by action implementations) happen centrally inside
    ``ActionExecutor.executeAction`` via ``parameterValidation``. This keeps
    a single source of truth for the action parameter contract regardless
    of caller (agent, workflow graph, REST route).
    """
    async def _handler(args: Dict[str, Any], context: Dict[str, Any]) -> ToolResult:
        try:
            if context:
--- a/modules/serviceCenter/services/serviceAgent/conversationManager.py
+++ b/modules/serviceCenter/services/serviceAgent/conversationManager.py
@ -392,6 +392,18 @@ def buildSystemPrompt(
        "- Prefer modular file structures over monolithic files.\n"
        "- When generating applications, create separate files for logical components.\n"
        "- Always plan the structure before writing code.\n\n"
        "### Document references for AI tools (CRITICAL)\n"
        "Tools that produce a file (`downloadFromDataSource`, `writeFile mode=create`, "
        "`renderDocument`, `generateImage`, `createChart`) return a result line with TWO ids:\n"
        "- `documentList ref: docItem:<chatDocId>` — pass this STRING VERBATIM as an entry of "
        "  `documentList` for `ai_process`, `ai_summarizeDocument`, `context_extractContent`, "
        "  `context_neutralizeData`, etc. Always as the literal `docItem:<id>` — do NOT wrap "
        "  in `{\"documents\":[{\"id\":...}]}` and do NOT use the file id here, the documentList "
        "  resolver only matches `docItem:` references.\n"
        "- `file id: <fileId>` — use for `readFile`, `searchInFileContent`, `writeFile mode=append`, "
        "  and image embeds (`![alt](file:<fileId>)`).\n"
        "Example: after `downloadFromDataSource` returns `docItem:abc123`, call "
        "`ai_summarizeDocument(documentList=[\"docItem:abc123\"], summaryLength=\"medium\")`.\n\n"
    )
    if toolsFormatted:
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_dataSourceTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_dataSourceTools.py
@ -9,7 +9,9 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
 from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
 from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
    _attachFileAsChatDocument,
    _buildResolverDbFromServices,
    _formatToolFileResult,
    _getOrCreateTempFolder,
    _looksLikeBinary,
    _resolveFileScope,
@ -37,6 +39,11 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
        return getattr(chatService, "interfaceDbComponent", None)
    # ---- DataSource convenience tools ----
    # Maps the FE-side `sourceType` literal (see SourcesTab.tsx
    # `_SERVICE_TO_SOURCE_TYPE`) to the Connector's `service` key in
    # `_SERVICE_MAP`. Keep this table in sync with both the FE and the
    # Connector `_SERVICE_MAP` entries -- a missing row produces
    # "Service '<sourceType>' not available" in the agent tools.
    _SOURCE_TYPE_TO_SERVICE = {
        "sharepointFolder": "sharepoint",
        "onedriveFolder": "onedrive",
@ -45,6 +52,9 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
        "gmailFolder": "gmail",
        "ftpFolder": "files",
        "clickupList": "clickup",
        "kdriveFolder": "kdrive",
        "calendarFolder": "calendar",
        "contactFolder": "contact",
    }
    async def _resolveDataSource(dsId: str):
@ -223,11 +233,27 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
            tempFolderId = _getOrCreateTempFolder(chatService)
            if tempFolderId:
                chatService.interfaceDbComponent.updateFile(fileItem.id, {"folderId": tempFolderId})
            chatDocId = _attachFileAsChatDocument(
                services, fileItem,
                label=f"datasource:{dsId or directService or 'download'}",
                userMessage=f"Downloaded {fileName} from external data source",
            )
            ext = fileName.rsplit(".", 1)[-1].lower() if "." in fileName else ""
-            hint = "Use readFile to read the text content." if ext in ("doc", "docx", "txt", "csv", "json", "xml", "html", "md", "rtf", "odt", "xls", "xlsx", "pptx", "pdf", "eml", "msg") else "Use readFile to access the content."
+            hint = (
                "Use readFile to read the text content."
                if ext in ("doc", "docx", "txt", "csv", "json", "xml", "html", "md", "rtf", "odt", "xls", "xlsx", "pptx", "pdf", "eml", "msg")
                else "Use readFile to access the content."
            )
            return ToolResult(
                toolCallId="", toolName="downloadFromDataSource", success=True,
-                data=f"Downloaded '{fileName}' ({len(fileBytes)} bytes) → local file id: {fileItem.id}. {hint}"
+                data=_formatToolFileResult(
                    fileItem=fileItem,
                    chatDocId=chatDocId,
                    actionLabel="Downloaded",
                    extraInfo=hint,
                ),
            )
        except Exception as e:
            return ToolResult(toolCallId="", toolName="downloadFromDataSource", success=False, error=str(e))
@ -300,8 +326,15 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
    registry.register(
        "downloadFromDataSource", _downloadFromDataSource,
        description=(
-            "Download a file or email from a data source into local storage. Returns a local file ID "
+            "Download a file or email from a data source into local storage. "
-            "to read with readFile. Accepts either dataSourceId OR connectionId+service. "
+            "The result line contains TWO ids you must use for different purposes:\n"
            "  - `documentList ref: docItem:<chatDocId>` -- pass this string verbatim "
            "    inside the `documentList` parameter of `ai_process`, "
            "    `ai_summarizeDocument`, `context_extractContent`, `context_neutralizeData`, etc. "
            "    Always use the `docItem:<chatDocId>` form, NOT the file id, NOT a `{\"documents\":[{\"id\":...}]}` "
            "    wrapper -- the documentList resolver only matches `docItem:` references against the workflow.\n"
            "  - `file id: <fileId>` -- pass this to `readFile`, `searchInFileContent`, image embeds (`file:<fileId>`).\n"
            "Accepts either dataSourceId OR connectionId+service. "
            "For email sources (Outlook, Gmail), browse/search only return subjects -- use this to get full content."
        ),
        parameters={
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_documentTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_documentTools.py
@ -11,8 +11,6 @@ from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistr
 from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
    _getOrCreateTempFolder,
    _looksLikeBinary,
    _resolveFileScope,
    _MAX_TOOL_RESULT_CHARS,
 )
@ -392,65 +390,7 @@ def _registerDocumentTools(registry: ToolRegistry, services):
                    if chunkMime:
                        mimeType = chunkMime
-            # 2) File not yet indexed -> trigger extraction via ExtractionService, then retry
+            # 2) Direct image file (not a container) - use raw file data
            if not imageData and knowledgeService and not knowledgeService.isFileIndexed(fileId):
                try:
                    chatService = services.chat
                    fileInfo = chatService.getFileInfo(fileId)
                    fileContent = chatService.getFileContent(fileId)
                    if fileContent and fileInfo:
                        rawData = fileContent.get("data", "")
                        if isinstance(rawData, str) and len(rawData) > 100:
                            rawBytes = _b64.b64decode(rawData)
                        elif isinstance(rawData, bytes):
                            rawBytes = rawData
                        else:
                            rawBytes = None
                        if rawBytes:
                            from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry
                            from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
                            from modules.datamodels.datamodelExtraction import ExtractionOptions
                            fileMime = fileInfo.get("mimeType", "application/octet-stream")
                            fileName = fileInfo.get("fileName", fileId)
                            extracted = runExtraction(
                                ExtractorRegistry(), None,
                                rawBytes, fileName, fileMime, ExtractionOptions(),
                            )
                            contentObjects = []
                            for part in extracted.parts:
                                tg = (part.typeGroup or "").lower()
                                ct = "image" if tg == "image" else "text"
                                if not part.data or not part.data.strip():
                                    continue
                                contentObjects.append({
                                    "contentObjectId": part.id,
                                    "contentType": ct,
                                    "data": part.data,
                                    "contextRef": {"containerPath": fileName, "location": part.label, **(part.metadata or {})},
                                })
                            if contentObjects:
                                _diFiId, _diMId = _resolveFileScope(fileId, context)
                                await knowledgeService.indexFile(
                                    fileId=fileId, fileName=fileName, mimeType=fileMime,
                                    userId=context.get("userId", ""), contentObjects=contentObjects,
                                    featureInstanceId=_diFiId,
                                    mandateId=_diMId,
                                )
                            chunks = knowledgeService._knowledgeDb.getContentChunks(fileId)
                            imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
                            if pageIndex is not None:
                                imageChunks = [c for c in imageChunks if c.get("contextRef", {}).get("pageIndex") == pageIndex]
                            if imageChunks:
                                imageData = imageChunks[0].get("data", "")
                except Exception as extractErr:
                    logger.warning(f"describeImage: on-demand extraction failed: {extractErr}")
            # 3) Direct image file (not a container) - use raw file data
            if not imageData:
                chatService = services.chat
                fileContent = chatService.getFileContent(fileId)
@ -460,7 +400,7 @@ def _registerDocumentTools(registry: ToolRegistry, services):
                        imageData = fileContent.get("data", "")
                        mimeType = fileMimeType
-            # 4) PDF page rendering: render the requested page as an image via PyMuPDF
+            # 3) PDF page rendering: render the requested page as an image via PyMuPDF
            if not imageData:
                chatService = services.chat
                fileInfo = chatService.getFileInfo(fileId) if hasattr(chatService, "getFileInfo") else None
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_helpers.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_helpers.py
@ -3,7 +3,8 @@
 """Shared helpers for core agent tools (file scope, binary detection, temp folder)."""
 import logging
-from typing import Any, Optional
+import uuid
 from typing import Any, Dict, Optional, Tuple
 logger = logging.getLogger(__name__)
@ -78,6 +79,138 @@ def _getOrCreateTempFolder(chatService) -> Optional[str]:
        return None
 def _attachFileAsChatDocument(
    services: Any,
    fileItem: Any,
    *,
    label: str = "agent_tool_output",
    userMessage: str = "",
    role: str = "assistant",
 ) -> Optional[str]:
    """Bind a persisted FileItem to the active workflow as a ChatDocument.
    This is the **single canonical bridge** between agent-tool-produced
    artefacts and the workflow's document model. Mirrors the pattern
    used by workflow actions (``workflowProcessor.persistTaskResult`` /
    ``methodTrustee.extractFromFiles``): every artefact a workflow step
    -- including agent tools -- materialises ends up addressable via
    ``docItem:<chatDocId>`` so downstream tools that consume
    ``documentList`` can resolve it against
    ``workflow.messages[*].documents[*].id``.
    Without this bind the agent's ``downloadFromDataSource`` /
    ``writeFile(create)`` / ``renderDocument`` / ``generateImage`` /
    ``createChart`` outputs are FileItem-only and unreachable from
    ``getChatDocumentsFromDocumentList`` -- the symptom is
    ``ai_summarizeDocument`` etc. running with 0 ContentParts.
    Args:
        services: agent-tool services container (must expose ``.chat``).
        fileItem: persisted FileItem (Pydantic obj or dict) returned
            from ``saveUploadedFile`` / ``createFile`` /
            ``saveGeneratedFile``.
        label: ``documentsLabel`` for the carrier ChatMessage --
            picked up by ``docList:<label>`` references.
        userMessage: optional human-readable message text.
        role: ``"assistant"`` (default) or ``"tool"``; affects only
            display semantics, not resolution.
    Returns:
        The new ``ChatDocument.id`` on success, or ``None`` when no
        active workflow is bound to the chat service (e.g. standalone
        agent calls outside a chat workflow). Never raises.
    """
    try:
        chatService = services.chat
        workflow = getattr(chatService, "_workflow", None)
        if not workflow or not getattr(workflow, "id", None):
            return None
        if isinstance(fileItem, dict):
            fileId = fileItem.get("id")
            fileName = fileItem.get("fileName")
            fileSize = fileItem.get("fileSize") or 0
            mimeType = fileItem.get("mimeType") or "application/octet-stream"
        else:
            fileId = getattr(fileItem, "id", None)
            fileName = getattr(fileItem, "fileName", None)
            fileSize = getattr(fileItem, "fileSize", None) or 0
            mimeType = getattr(fileItem, "mimeType", None) or "application/octet-stream"
        if not fileId:
            logger.warning("_attachFileAsChatDocument: fileItem has no id, skipping bind.")
            return None
        chatDoc: Dict[str, Any] = {
            "id": str(uuid.uuid4()),
            "fileId": fileId,
            "fileName": fileName or fileId,
            "fileSize": fileSize,
            "mimeType": mimeType,
            "roundNumber": getattr(workflow, "currentRound", None),
            "taskNumber": getattr(workflow, "currentTask", None),
            "actionNumber": getattr(workflow, "currentAction", None),
        }
        messageData: Dict[str, Any] = {
            "id": f"msg_tool_{uuid.uuid4().hex[:12]}",
            "role": role,
            "status": "step",
            "message": userMessage or f"Tool result: {fileName or fileId}",
            "documentsLabel": label,
        }
        createdMessage = chatService.storeMessageWithDocuments(
            workflow, messageData, [chatDoc],
        )
        if not createdMessage or not getattr(createdMessage, "documents", None):
            return None
        return createdMessage.documents[0].id
    except Exception as e:
        logger.warning(f"_attachFileAsChatDocument failed (fileItem id={getattr(fileItem, 'id', None) or (fileItem.get('id') if isinstance(fileItem, dict) else '?')}): {e}")
        return None
 def _formatToolFileResult(
    *,
    fileItem: Any,
    chatDocId: Optional[str],
    actionLabel: str = "Created",
    extraInfo: str = "",
 ) -> str:
    """Render the canonical agent-tool file result message.
    Always presents BOTH ids the agent needs:
    * ``docItem:<chatDocId>`` -- use as ``documentList`` entry for
      tools like ``ai_process`` / ``ai_summarizeDocument`` /
      ``context_extractContent`` (resolved through ChatDocument).
    * ``file id: <fileItem.id>`` -- use as ``fileId`` for direct
      reads via ``readFile`` / ``downloadFile`` / image embedding
      (``file:<fileItem.id>``).
    When no active workflow is bound, ``chatDocId`` is ``None`` and
    only the file-id line is shown -- the file is still usable for
    direct reads, just not for ``documentList`` references (those
    require a workflow context anyway).
    """
    if isinstance(fileItem, dict):
        fileId = fileItem.get("id", "?")
        fileName = fileItem.get("fileName", "")
        fileSize = fileItem.get("fileSize", 0)
    else:
        fileId = getattr(fileItem, "id", "?")
        fileName = getattr(fileItem, "fileName", "")
        fileSize = getattr(fileItem, "fileSize", 0)
    head = f"{actionLabel} '{fileName}' ({fileSize} bytes)" if fileName else f"{actionLabel} file ({fileSize} bytes)"
    parts = [head]
    if chatDocId:
        parts.append(f"  documentList ref: docItem:{chatDocId}")
    parts.append(f"  file id: {fileId}")
    if extraInfo:
        parts.append(extraInfo)
    return "\n".join(parts)
 def _buildResolverDbFromServices(services: Any):
    """DB adapter for ConnectorResolver: load UserConnections by id.
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
@ -9,6 +9,8 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
 from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
 from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
    _attachFileAsChatDocument,
    _formatToolFileResult,
    _getOrCreateTempFolder,
    _looksLikeBinary,
    _resolveFileScope,
@ -316,7 +318,13 @@ def _registerMediaTools(registry: ToolRegistry, services):
                    tempFolderId = _getOrCreateTempFolder(chatService)
                    if tempFolderId:
                        chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
-                    savedFiles.append(f"- {docName} (id: {fid})")
+                    chatDocId = _attachFileAsChatDocument(
                        services, fileItem,
                        label=f"renderDocument:{docName}",
                        userMessage=f"Rendered document {docName}",
                    )
                    refSuffix = f", doc ref: docItem:{chatDocId}" if chatDocId else ""
                    savedFiles.append(f"- {docName} (file id: {fid}{refSuffix})")
                    sideEvents.append({
                        "type": "fileCreated",
                        "data": {
@ -340,7 +348,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
            "Render markdown into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). "
            "For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with "
            "`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. "
-            "Images: ![alt text](file:fileId) in the markdown."
+            "Images: ![alt text](file:fileId) in the markdown. "
            "Each rendered file's result line contains `file id: <fileId>` (for embeds / readFile) AND "
            "`doc ref: docItem:<chatDocId>` -- pass the latter inside `documentList` of subsequent "
            "`ai_process` / `ai_summarizeDocument` / `context_extractContent` calls."
        ),
        parameters={
            "type": "object",
@ -588,7 +599,13 @@ def _registerMediaTools(registry: ToolRegistry, services):
                    tempFolderId = _getOrCreateTempFolder(chatService)
                    if tempFolderId:
                        chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
-                    savedFiles.append(f"- {docName} (id: {fid})")
+                    chatDocId = _attachFileAsChatDocument(
                        services, fileItem,
                        label=f"generateImage:{docName}",
                        userMessage=f"Generated image {docName}",
                    )
                    refSuffix = f", doc ref: docItem:{chatDocId}" if chatDocId else ""
                    savedFiles.append(f"- {docName} (file id: {fid}{refSuffix})")
                    sideEvents.append({
                        "type": "fileCreated",
                        "data": {
@ -612,7 +629,9 @@ def _registerMediaTools(registry: ToolRegistry, services):
            "Generate an image from a text description using AI (DALL-E). "
            "The generated image is saved as a file in the workspace. "
            "Use this when the user asks to create, generate, draw, or design an image, illustration, icon, logo, diagram, or any visual content. "
-            "Provide a detailed, descriptive prompt for best results."
+            "Provide a detailed, descriptive prompt for best results. "
            "Each image's result line carries `file id: <fileId>` (for embeds / readFile) and "
            "`doc ref: docItem:<chatDocId>` (use inside `documentList` for downstream AI tools)."
        ),
        parameters={
            "type": "object",
@ -743,14 +762,24 @@ def _registerMediaTools(registry: ToolRegistry, services):
            if tempFolderId and fid != "?":
                chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
            chatDocId = _attachFileAsChatDocument(
                services, fileItem,
                label=f"createChart:{fileName}",
                userMessage=f"Created chart {fileName}",
            )
            sideEvents = [{"type": "fileCreated", "data": {
                "fileId": fid, "fileName": fileName,
                "mimeType": "image/png", "fileSize": len(pngData),
            }}]
            return ToolResult(
                toolCallId="", toolName="createChart", success=True,
-                data=f"Chart saved as '{fileName}' (id: {fid}, {len(pngData)} bytes). "
+                data=_formatToolFileResult(
-                     f"Embed in documents with: ![{title}](file:{fid})",
+                    fileItem=fileItem,
                    chatDocId=chatDocId,
                    actionLabel="Chart saved as",
                    extraInfo=f"Embed in documents with: ![{title}](file:{fid})",
                ),
                sideEvents=sideEvents,
            )
@ -764,7 +793,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
            "Create a data chart/graph as a PNG image using matplotlib. "
            "Supported types: bar, horizontalBar, line, area, scatter, pie, donut. "
            "The chart is saved as a file in the workspace. "
-            "Use the returned fileId to embed in documents via renderDocument: ![title](file:fileId). "
+            "Use the returned `file id: <fileId>` to embed in documents via "
            "renderDocument: ![title](file:fileId). The result line also carries "
            "`doc ref: docItem:<chatDocId>` -- use it inside `documentList` for "
            "downstream AI tools that need the chart as a data source. "
            "Provide structured data with labels and datasets."
        ),
        parameters={
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py
@ -9,10 +9,11 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
 from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
 from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
    _attachFileAsChatDocument,
    _formatToolFileResult,
    _getOrCreateInstanceFolder,
    _getOrCreateTempFolder,
    _looksLikeBinary,
    _resolveFileScope,
    _MAX_TOOL_RESULT_CHARS,
 )
@ -48,6 +49,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
            return ToolResult(toolCallId="", toolName="readFile", success=False, error="fileId is required")
        try:
            knowledgeService = services.getService("knowledge") if hasattr(services, "getService") else None
            fileStatus = None
            # 1) Knowledge Store: return already-extracted text chunks
            if knowledgeService:
@ -75,7 +77,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
                        data=f"[File {fileId} is currently being processed (status: {fileStatus}). Try again shortly.]",
                    )
-            # 2) Not indexed yet: try on-demand extraction
+            # 2) Not indexed yet: inspect file type to decide how to serve the agent
            #    (binary -> instruct agent to wait / re-upload; text -> decode raw bytes inline)
            chatService = services.chat
            fileInfo = chatService.getFileInfo(fileId)
            if not fileInfo:
@ -98,83 +101,14 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
                isBinary = _looksLikeBinary(rawBytes)
            if isBinary:
                try:
                    from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry, ChunkerRegistry
                    from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
                    from modules.datamodels.datamodelExtraction import ExtractionOptions
                    extracted = runExtraction(
                        ExtractorRegistry(), ChunkerRegistry(),
                        rawBytes, fileName, mimeType, ExtractionOptions(),
                    )
                    contentObjects = []
                    for part in extracted.parts:
                        tg = (part.typeGroup or "").lower()
                        ct = "image" if tg == "image" else "text"
                        if not part.data or not part.data.strip():
                            continue
                        contentObjects.append({
                            "contentObjectId": part.id,
                            "contentType": ct,
                            "data": part.data,
                            "contextRef": {
                                "containerPath": fileName,
                                "location": part.label or "file",
                                **(part.metadata or {}),
                            },
                        })
                    if contentObjects:
                        if knowledgeService:
                            try:
                                userId = context.get("userId", "")
                                _fiId, _mId = _resolveFileScope(fileId, context)
                                await knowledgeService.indexFile(
                                    fileId=fileId, fileName=fileName, mimeType=mimeType,
                                    userId=userId, contentObjects=contentObjects,
                                    featureInstanceId=_fiId,
                                    mandateId=_mId,
                                )
                            except Exception as e:
                                logger.warning(f"readFile: knowledge indexing failed for {fileId}: {e}")
                        joined = ""
                        if knowledgeService:
                            _chunks = knowledgeService._knowledgeDb.getContentChunks(fileId)
                            _textChunks = [
                                c for c in (_chunks or [])
                                if c.get("contentType") != "image" and c.get("data")
                            ]
                            if _textChunks:
                                joined = "\n\n".join(c["data"] for c in _textChunks)
                        if not joined:
                            textParts = [o["data"] for o in contentObjects if o["contentType"] != "image"]
                            joined = "\n\n".join(textParts) if textParts else ""
                        if joined:
                            chunked = _applyOffsetLimit(joined, offset, limit)
                            if chunked is not None:
                                return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked)
                            if len(joined) > _MAX_TOOL_RESULT_CHARS:
                                joined = joined[:_MAX_TOOL_RESULT_CHARS] + f"\n\n[Truncated – showing first {_MAX_TOOL_RESULT_CHARS} chars of {len(joined)}. Use offset/limit to read specific sections.]"
                            return ToolResult(
                                toolCallId="", toolName="readFile", success=True,
                                data=joined,
                            )
                        imgCount = sum(1 for o in contentObjects if o["contentType"] == "image")
                        return ToolResult(
                            toolCallId="", toolName="readFile", success=True,
                            data=f"[Extracted {len(contentObjects)} content objects from '{fileName}' "
                                 f"({imgCount} images, no readable text). "
                                 f"Use describeImage(fileId='{fileId}') to analyze visual content.]",
                        )
                except Exception as extractErr:
                    logger.warning(f"readFile extraction failed for {fileId} ({fileName}): {extractErr}")
                return ToolResult(
                    toolCallId="", toolName="readFile", success=True,
-                    data=f"[Binary file: '{fileName}', type={mimeType}, size={len(rawBytes)} bytes. "
+                    data=(
-                         f"Text extraction not available. Use describeImage for images.]",
+                        f"[File '{fileName}' ({mimeType}) is not yet indexed "
                        f"(status: {fileStatus or 'unknown'}). Indexing runs automatically "
                        f"on upload. Please wait a few seconds and retry, or re-upload the file. "
                        f"For visual content use describeImage(fileId='{fileId}').]"
                    ),
                )
            # 3) Text file: decode raw bytes
@ -428,9 +362,19 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
                    dbMgmt.updateFile(fileItem.id, {"folderId": instanceFolderId})
            if args.get("tags"):
                dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
            chatDocId = _attachFileAsChatDocument(
                services, fileItem,
                label=f"writeFile:{name}",
                userMessage=f"Created {name} via writeFile",
            )
            return ToolResult(
                toolCallId="", toolName="writeFile", success=True,
-                data=f"File '{name}' created (id: {fileItem.id})",
+                data=_formatToolFileResult(
                    fileItem=fileItem,
                    chatDocId=chatDocId,
                    actionLabel="Created",
                ),
                sideEvents=[{
                    "type": "fileCreated",
                    "data": {
@ -573,7 +517,11 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
            "- create (default): create a new file (name required).\n"
            "- append: append content to an existing file (fileId required). "
            "Use for large content that exceeds a single tool call (~8000 chars per call).\n"
-            "- overwrite: replace entire file content (fileId required)."
+            "- overwrite: replace entire file content (fileId required).\n"
            "On `mode=create` the result line contains BOTH a `documentList ref: docItem:<chatDocId>` "
            "(use this for documentList parameters of `ai_process` / `ai_summarizeDocument` / "
            "`context_extractContent` etc., always as the literal string `docItem:<id>`) AND a "
            "`file id: <fileId>` (use this for `readFile`, `writeFile mode=append`, image embeds)."
        ),
        parameters={
            "type": "object",
--- a/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py
+++ b/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py
@ -178,6 +178,33 @@ class AgentService:
        if workflowId is None:
            workflowId = getattr(self.services.workflow, "id", "unknown") if self.services.workflow else "unknown"
        # Propagate the active workflow into every service's request
        # context so agent-tool side effects (e.g. _attachFileAsChatDocument
        # for downloadFromDataSource / writeFile / renderDocument) can
        # bind their FileItem outputs to the workflow as ChatDocuments.
        # Without this, chatService._workflow (= chatService._context.workflow)
        # stays None and the documentList resolver finds zero docs --
        # which is exactly the "Building structure prompt with 0 valid
        # ContentParts" symptom we see when the workspace route calls
        # runAgent for an attached single-file data source.
        # Mirrors workflowManager._propagateWorkflowToContext.
        if workflowId and workflowId != "unknown":
            try:
                workflow = getattr(self.services, "workflow", None)
                if workflow is None or getattr(workflow, "id", None) != workflowId:
                    workflow = self.services.chat.getWorkflow(workflowId)
                if workflow is not None:
                    self.services.workflow = workflow
                    ctx = getattr(self.services, "_service_context", None)
                    if ctx is not None:
                        ctx.workflow = workflow
                    for attr in ("chat", "ai", "extraction", "sharepoint", "clickup", "utils", "billing", "generation"):
                        svc = getattr(self.services, attr, None)
                        if svc is not None and hasattr(svc, "_context") and svc._context is not None:
                            svc._context.workflow = workflow
            except Exception as e:
                logger.warning(f"runAgent: could not propagate workflow {workflowId} into service contexts: {e}")
        resolvedLanguage = userLanguage or ""
        enrichedPrompt = await self._enrichPromptWithFiles(prompt, fileIds)
--- a/modules/serviceCenter/services/serviceAi/mainServiceAi.py
+++ b/modules/serviceCenter/services/serviceAi/mainServiceAi.py
@ -164,12 +164,29 @@ class AiService:
        # SPEECH_TEAMS: Dedicated pipeline, bypasses standard model selection
        if request.options and request.options.operationType == OperationTypeEnum.SPEECH_TEAMS:
            return await self._handleSpeechTeams(request)
-        
+
-        # FAIL-SAFE: Pre-flight billing validation (like 0 CHF credit card check)
+        _opType = request.options.operationType if request.options else None
-        self._preflightBillingCheck()
+        _isNeutralizationCall = _opType in (
-        
+            OperationTypeEnum.NEUTRALIZATION_TEXT,
-        # Balance & provider permission checks
+            OperationTypeEnum.NEUTRALIZATION_IMAGE,
-        await self._checkBillingBeforeAiCall()
+        )
        if not _isNeutralizationCall:
            # FAIL-SAFE: Pre-flight billing validation (like 0 CHF credit card check)
            self._preflightBillingCheck()
            # Balance & provider permission checks
            await self._checkBillingBeforeAiCall()
        else:
            # Neutralization calls are system-level operations (connector anonymization).
            # They run without a mandate context (e.g. personal-scope connections) and
            # are billed the same way as embedding calls: best-effort, skipped when no
            # billing settings exist for an empty mandate.
            logger.debug(
                "callAi: skipping billing preflight for neutralization call "
                "(operationType=%s, user=%s)",
                _opType,
                getattr(getattr(self.services, 'user', None), 'id', 'unknown'),
            )
        # Calculate effective allowedProviders: RBAC ∩ Workflow
        effectiveProviders = self._calculateEffectiveProviders()
@ -218,8 +235,15 @@ class AiService:
        Rehydration happens on the final AiCallResponse (not on individual str deltas).
        """
        await self.ensureAiObjectsInitialized()
-        self._preflightBillingCheck()
+
-        await self._checkBillingBeforeAiCall()
+        _streamOpType = request.options.operationType if request.options else None
        _isNeutralizationStream = _streamOpType in (
            OperationTypeEnum.NEUTRALIZATION_TEXT,
            OperationTypeEnum.NEUTRALIZATION_IMAGE,
        )
        if not _isNeutralizationStream:
            self._preflightBillingCheck()
            await self._checkBillingBeforeAiCall()
        effectiveProviders = self._calculateEffectiveProviders()
        if effectiveProviders and request.options:
--- a/modules/serviceCenter/services/serviceChat/mainServiceChat.py
+++ b/modules/serviceCenter/services/serviceChat/mainServiceChat.py
@ -463,36 +463,38 @@ class ChatService:
        Returns:
            List of file info dicts.
        """
        # `getAllFiles` returns `List[dict]` (each entry is a
        # `FileItem.model_dump()` enriched with label columns) -- not
        # Pydantic objects -- so we use dict-access throughout.
        allFiles = self.interfaceDbComponent.getAllFiles()
        results = []
        for fileItem in allFiles:
            if folderId is not None:
-                itemFolderId = getattr(fileItem, "folderId", None)
+                if fileItem.get("folderId") != folderId:
                if itemFolderId != folderId:
                    continue
            if tags:
-                itemTags = getattr(fileItem, "tags", None) or []
+                itemTags = fileItem.get("tags") or []
                if not any(t in itemTags for t in tags):
                    continue
            if search:
                searchLower = search.lower()
-                nameMatch = searchLower in (fileItem.fileName or "").lower()
+                nameMatch = searchLower in (fileItem.get("fileName") or "").lower()
-                descMatch = searchLower in (getattr(fileItem, "description", None) or "").lower()
+                descMatch = searchLower in (fileItem.get("description") or "").lower()
                if not nameMatch and not descMatch:
                    continue
            results.append({
-                "id": fileItem.id,
+                "id": fileItem.get("id"),
-                "fileName": fileItem.fileName,
+                "fileName": fileItem.get("fileName"),
-                "mimeType": fileItem.mimeType,
+                "mimeType": fileItem.get("mimeType"),
-                "fileSize": fileItem.fileSize,
+                "fileSize": fileItem.get("fileSize"),
-                "creationDate": fileItem.sysCreatedAt,
+                "creationDate": fileItem.get("sysCreatedAt"),
-                "tags": getattr(fileItem, "tags", None),
+                "tags": fileItem.get("tags"),
-                "folderId": getattr(fileItem, "folderId", None),
+                "folderId": fileItem.get("folderId"),
-                "description": getattr(fileItem, "description", None),
+                "description": fileItem.get("description"),
-                "status": getattr(fileItem, "status", None),
+                "status": fileItem.get("status"),
            })
        return results
--- a/modules/serviceCenter/services/serviceKnowledge/mainServiceKnowledge.py
+++ b/modules/serviceCenter/services/serviceKnowledge/mainServiceKnowledge.py
@ -2,9 +2,13 @@
 # All rights reserved.
 """Knowledge service: 3-tier RAG with indexing, semantic search, and context building."""
 import hashlib
 import json
 import logging
 import re
-from typing import Any, Callable, Dict, List, Optional
+import time
 from dataclasses import dataclass, field
 from typing import Any, Callable, Dict, List, Optional, Union
 from modules.datamodels.datamodelKnowledge import (
    FileContentIndex, ContentChunk, WorkflowMemory,
@ -20,6 +24,68 @@ DEFAULT_CHUNK_TOKENS = 400
 DEFAULT_CONTEXT_BUDGET = 12000
 # =============================================================================
 # Ingestion façade (P0 of unified-knowledge-indexing concept)
 # =============================================================================
@dataclass
 class IngestionJob:
    """One request to add or refresh content in the unified knowledge store.
    Callers from any lane (routes, feature hooks, agent tools, connector sync)
    describe the work they want done via this object; idempotency, scope
    resolution, and embedding are handled by KnowledgeService.requestIngestion.
    """
    sourceKind: str
    sourceId: str
    fileName: str
    mimeType: str
    userId: str
    contentObjects: List[Dict[str, Any]] = field(default_factory=list)
    featureInstanceId: str = ""
    mandateId: str = ""
    structure: Optional[Dict[str, Any]] = None
    containerPath: Optional[str] = None
    contentVersion: Optional[str] = None
    provenance: Optional[Dict[str, Any]] = None
    # Connector-driven neutralization: True when the user opted in via §2.6 preferences.
    # For sourceKind == "file", _indexFileInternal resolves this from FileItem.neutralize instead.
    neutralize: bool = False
@dataclass
 class IngestionHandle:
    """Result of requestIngestion. Stable across in-process and future queue impls."""
    jobId: str
    status: str
    contentHash: str
    fileId: str
    index: Optional[FileContentIndex] = None
    error: Optional[str] = None
 def _computeIngestionHash(contentObjects: List[Dict[str, Any]]) -> str:
    """Deterministic SHA256 over (contentType, data) tuples in extractor order.
    `contentObjectId` is intentionally excluded because extractors generate
    fresh UUIDs per run (`uuid.uuid4()`), which would make the hash unstable
    across re-extractions of the same source — defeating idempotency.
    Order is preserved (no sort) because two different documents can share the
    same multiset of parts but differ in arrangement (e.g. swapped pages).
    Text whitespace is preserved intentionally because chunk boundaries
    depend on it.
    """
    normalized = [
        (
            str(o.get("contentType", "text") or "text"),
            o.get("data", "") or "",
        )
        for o in (contentObjects or [])
    ]
    payload = json.dumps(normalized, ensure_ascii=False, separators=(",", ":"))
    return hashlib.sha256(payload.encode("utf-8")).hexdigest()
 class KnowledgeService:
    """Service for Knowledge Store operations: indexing, retrieval, and context building."""
@ -46,6 +112,224 @@ class KnowledgeService:
        results = await self._embed([text])
        return results[0] if results else []
    # =========================================================================
    # Ingestion façade (single entry point for all lanes)
    # =========================================================================
    async def requestIngestion(self, job: IngestionJob) -> IngestionHandle:
        """Unified entry point for filling the knowledge corpus.
        Applies idempotency based on a content hash (or caller-supplied
        `contentVersion`) persisted in `FileContentIndex.structure._ingestion`.
        Re-runs indexing only when the hash differs or the previous run did
        not reach `indexed` state. Runs embedding synchronously for now
        (callers already schedule background tasks where needed).
        """
        jobId = f"{job.sourceKind}:{job.sourceId}"
        startMs = time.time()
        contentHash = job.contentVersion or _computeIngestionHash(job.contentObjects)
        # 1. Check for duplicate via existing FileContentIndex row.
        existing = None
        try:
            existing = self._knowledgeDb.getFileContentIndex(job.sourceId)
        except Exception:
            existing = None
        if existing:
            existingStructure = (
                existing.get("structure") if isinstance(existing, dict)
                else getattr(existing, "structure", {})
            ) or {}
            existingMeta = existingStructure.get("_ingestion", {}) or {}
            existingStatus = (
                existing.get("status") if isinstance(existing, dict)
                else getattr(existing, "status", "")
            ) or ""
            if existingMeta.get("hash") == contentHash and existingStatus == "indexed":
                logger.info(
                    "ingestion.skipped.duplicate sourceKind=%s sourceId=%s hash=%s",
                    job.sourceKind, job.sourceId, contentHash[:12],
                    extra={
                        "event": "ingestion.skipped.duplicate",
                        "jobId": jobId,
                        "sourceKind": job.sourceKind,
                        "sourceId": job.sourceId,
                        "hash": contentHash,
                        "durationMs": int((time.time() - startMs) * 1000),
                    },
                )
                return IngestionHandle(
                    jobId=jobId,
                    status="duplicate",
                    contentHash=contentHash,
                    fileId=job.sourceId,
                    index=None,
                )
        # 2. Prepare ingestion metadata; stays in structure._ingestion so
        #    later connector revoke/purge can filter chunks by sourceKind /
        #    provenance.connectionId without a schema migration.
        ingestionMeta = {
            "hash": contentHash,
            "sourceKind": job.sourceKind,
            "sourceId": job.sourceId,
            "contentVersion": job.contentVersion,
            "indexedAt": getUtcTimestamp(),
            "provenance": dict(job.provenance or {}),
        }
        structure = dict(job.structure or {})
        structure["_ingestion"] = ingestionMeta
        logger.info(
            "ingestion.queued sourceKind=%s sourceId=%s objects=%d hash=%s",
            job.sourceKind, job.sourceId, len(job.contentObjects or []), contentHash[:12],
            extra={
                "event": "ingestion.queued",
                "jobId": jobId,
                "sourceKind": job.sourceKind,
                "sourceId": job.sourceId,
                "hash": contentHash,
                "objectCount": len(job.contentObjects or []),
            },
        )
        # 3. Run real indexing.
        try:
            index = await self._indexFileInternal(
                fileId=job.sourceId,
                fileName=job.fileName,
                mimeType=job.mimeType,
                userId=job.userId,
                featureInstanceId=job.featureInstanceId,
                mandateId=job.mandateId,
                contentObjects=job.contentObjects or [],
                structure=structure,
                containerPath=job.containerPath,
                sourceKind=job.sourceKind,
                connectionId=(job.provenance or {}).get("connectionId"),
                neutralize=job.neutralize,
            )
        except Exception as exc:
            logger.error(
                "ingestion.failed sourceKind=%s sourceId=%s error=%s",
                job.sourceKind, job.sourceId, exc,
                exc_info=True,
                extra={
                    "event": "ingestion.failed",
                    "jobId": jobId,
                    "sourceKind": job.sourceKind,
                    "sourceId": job.sourceId,
                    "hash": contentHash,
                    "error": str(exc),
                    "durationMs": int((time.time() - startMs) * 1000),
                },
            )
            try:
                self._knowledgeDb.updateFileStatus(job.sourceId, "failed")
            except Exception:
                pass
            return IngestionHandle(
                jobId=jobId,
                status="failed",
                contentHash=contentHash,
                fileId=job.sourceId,
                index=None,
                error=str(exc),
            )
        logger.info(
            "ingestion.indexed sourceKind=%s sourceId=%s objects=%d durationMs=%d",
            job.sourceKind, job.sourceId, len(job.contentObjects or []),
            int((time.time() - startMs) * 1000),
            extra={
                "event": "ingestion.indexed",
                "jobId": jobId,
                "sourceKind": job.sourceKind,
                "sourceId": job.sourceId,
                "hash": contentHash,
                "objectCount": len(job.contentObjects or []),
                "durationMs": int((time.time() - startMs) * 1000),
            },
        )
        return IngestionHandle(
            jobId=jobId,
            status="indexed",
            contentHash=contentHash,
            fileId=job.sourceId,
            index=index,
        )
    def purgeConnection(self, connectionId: str) -> Dict[str, int]:
        """Delete every FileContentIndex + ContentChunk linked to a UserConnection.
        Called on `connection.revoked` events so the knowledge corpus never
        holds chunks the user has withdrawn access to. Returns deletion counts
        for observability.
        """
        if not connectionId:
            return {"indexRows": 0, "chunks": 0}
        startMs = time.time()
        result = self._knowledgeDb.deleteFileContentIndexByConnectionId(connectionId)
        logger.info(
            "ingestion.connection.purged connectionId=%s rows=%d chunks=%d durationMs=%d",
            connectionId, result["indexRows"], result["chunks"],
            int((time.time() - startMs) * 1000),
            extra={
                "event": "ingestion.connection.purged",
                "connectionId": connectionId,
                "indexRows": result["indexRows"],
                "chunks": result["chunks"],
                "durationMs": int((time.time() - startMs) * 1000),
            },
        )
        return result
    def getIngestionStatus(
        self, handleOrJobId: Union[IngestionHandle, str]
    ) -> Dict[str, Any]:
        """Map a handle or `sourceKind:sourceId` jobId to a status snapshot."""
        if isinstance(handleOrJobId, IngestionHandle):
            sourceId = handleOrJobId.fileId
            jobId = handleOrJobId.jobId
        elif isinstance(handleOrJobId, str) and ":" in handleOrJobId:
            jobId = handleOrJobId
            sourceId = handleOrJobId.split(":", 1)[1]
        else:
            jobId = str(handleOrJobId)
            sourceId = str(handleOrJobId)
        row = None
        try:
            row = self._knowledgeDb.getFileContentIndex(sourceId)
        except Exception:
            row = None
        if not row:
            return {
                "jobId": jobId,
                "sourceId": sourceId,
                "status": "unknown",
                "contentHash": None,
            }
        structure = (
            row.get("structure") if isinstance(row, dict)
            else getattr(row, "structure", {})
        ) or {}
        meta = structure.get("_ingestion", {}) or {}
        status = (
            row.get("status") if isinstance(row, dict)
            else getattr(row, "status", "")
        ) or "unknown"
        return {
            "jobId": jobId,
            "sourceId": sourceId,
            "status": status,
            "contentHash": meta.get("hash"),
            "sourceKind": meta.get("sourceKind"),
            "indexedAt": meta.get("indexedAt"),
        }
    # =========================================================================
    # File Indexing (called after extraction, before embedding)
    # =========================================================================
@ -61,6 +345,57 @@ class KnowledgeService:
        contentObjects: List[Dict[str, Any]] = None,
        structure: Dict[str, Any] = None,
        containerPath: str = None,
    ) -> Optional[FileContentIndex]:
        """Backward-compatible wrapper delegating to requestIngestion.
        Existing callers that still invoke `indexFile` directly automatically
        participate in the idempotency/metrics layer. New callers should
        prefer `requestIngestion` so they can pass `sourceKind` and
        `provenance` for connector revoke/purge later.
        """
        job = IngestionJob(
            sourceKind="file",
            sourceId=fileId,
            fileName=fileName,
            mimeType=mimeType,
            userId=userId,
            featureInstanceId=featureInstanceId,
            mandateId=mandateId,
            contentObjects=list(contentObjects or []),
            structure=structure,
            containerPath=containerPath,
        )
        handle = await self.requestIngestion(job)
        if handle.index is not None:
            return handle.index
        if handle.status == "duplicate":
            row = None
            try:
                row = self._knowledgeDb.getFileContentIndex(fileId)
            except Exception:
                row = None
            if isinstance(row, dict):
                try:
                    return FileContentIndex(**row)
                except Exception:
                    return None
            return row
        return None
    async def _indexFileInternal(
        self,
        fileId: str,
        fileName: str,
        mimeType: str,
        userId: str,
        featureInstanceId: str = "",
        mandateId: str = "",
        contentObjects: List[Dict[str, Any]] = None,
        structure: Dict[str, Any] = None,
        containerPath: str = None,
        sourceKind: str = "file",
        connectionId: Optional[str] = None,
        neutralize: bool = False,
    ) -> FileContentIndex:
        """Index a file's content objects and create embeddings for text chunks.
@ -83,39 +418,41 @@ class KnowledgeService:
        """
        contentObjects = contentObjects or []
-        # 1. Resolve scope fields from FileItem (Single Source of Truth)
+        # 1. Resolve scope fields from FileItem (Single Source of Truth) for
-        #    FileItem lives in poweron_management; its scope/mandateId/featureInstanceId
+        #    uploaded files. Connector-sourced ingestion (sharepoint_item,
-        #    are authoritative and must be mirrored onto the FileContentIndex.
+        #    outlook_message, ...) has no FileItem row — trust the caller's
        #    scope + ids directly.
        resolvedScope = "personal"
        resolvedMandateId = mandateId
        resolvedFeatureInstanceId = featureInstanceId
        resolvedUserId = userId
-        _shouldNeutralize = False
+        _shouldNeutralize = neutralize  # caller-supplied flag (connector prefs / IngestionJob)
-        try:
+        if sourceKind == "file":
-            from modules.datamodels.datamodelFiles import FileItem as _FileItem
+            try:
-            _dbComponent = getattr(self._context, "interfaceDbComponent", None)
+                from modules.datamodels.datamodelFiles import FileItem as _FileItem
-            _fileRecords = _dbComponent.getRecordset(_FileItem, recordFilter={"id": fileId}) if _dbComponent else []
+                _dbComponent = getattr(self._context, "interfaceDbComponent", None)
-            if not _fileRecords:
+                _fileRecords = _dbComponent.getRecordset(_FileItem, recordFilter={"id": fileId}) if _dbComponent else []
-                from modules.interfaces.interfaceDbManagement import ComponentObjects
+                if not _fileRecords:
-                _row = ComponentObjects().db._loadRecord(_FileItem, fileId)
+                    from modules.interfaces.interfaceDbManagement import ComponentObjects
-                if _row:
+                    _row = ComponentObjects().db._loadRecord(_FileItem, fileId)
-                    _fileRecords = [_row]
+                    if _row:
-            if _fileRecords:
+                        _fileRecords = [_row]
-                _fileRecord = _fileRecords[0]
+                if _fileRecords:
-                _get = (lambda k, d=None: _fileRecord.get(k, d)) if isinstance(_fileRecord, dict) else (lambda k, d=None: getattr(_fileRecord, k, d))
+                    _fileRecord = _fileRecords[0]
-                _shouldNeutralize = bool(_get("neutralize", False))
+                    _get = (lambda k, d=None: _fileRecord.get(k, d)) if isinstance(_fileRecord, dict) else (lambda k, d=None: getattr(_fileRecord, k, d))
-                _fileScope = _get("scope")
+                    _shouldNeutralize = bool(_get("neutralize", False))  # FileItem is authoritative for uploads
-                if _fileScope:
+                    _fileScope = _get("scope")
-                    resolvedScope = _fileScope
+                    if _fileScope:
-                if not resolvedMandateId:
+                        resolvedScope = _fileScope
-                    resolvedMandateId = str(_get("mandateId", "") or "")
+                    if not resolvedMandateId:
-                if not resolvedFeatureInstanceId:
+                        resolvedMandateId = str(_get("mandateId", "") or "")
-                    resolvedFeatureInstanceId = str(_get("featureInstanceId", "") or "")
+                    if not resolvedFeatureInstanceId:
-                _fileCreatedBy = _get("sysCreatedBy")
+                        resolvedFeatureInstanceId = str(_get("featureInstanceId", "") or "")
-                if _fileCreatedBy:
+                    _fileCreatedBy = _get("sysCreatedBy")
-                    resolvedUserId = str(_fileCreatedBy)
+                    if _fileCreatedBy:
-        except Exception:
+                        resolvedUserId = str(_fileCreatedBy)
-            pass
+            except Exception:
                pass
        # 2. Create FileContentIndex with correct scope from the start
        index = FileContentIndex(
@ -124,6 +461,8 @@ class KnowledgeService:
            featureInstanceId=resolvedFeatureInstanceId,
            mandateId=resolvedMandateId,
            scope=resolvedScope,
            sourceKind=sourceKind,
            connectionId=connectionId,
            fileName=fileName,
            mimeType=mimeType,
            containerPath=containerPath,
@ -300,7 +639,12 @@ class KnowledgeService:
            Formatted context string for injection into the agent's system prompt.
        """
        queryVector = await self._embedSingle(currentPrompt)
        logger.debug(
            "buildAgentContext.start userId=%s featureInstanceId=%s mandateId=%s isSysAdmin=%s prompt=%r",
            userId, featureInstanceId, mandateId, isSysAdmin, (currentPrompt or "")[:120],
        )
        if not queryVector:
            logger.debug("buildAgentContext.abort reason=no_query_vector")
            return ""
        builder = _ContextBuilder(budget=contextBudget)
@ -327,9 +671,14 @@ class KnowledgeService:
            featureInstanceId=featureInstanceId,
            mandateId=mandateId,
            limit=15,
-            minScore=0.65,
+            minScore=0.35,
            isSysAdmin=isSysAdmin,
        )
        logger.debug(
            "buildAgentContext.layer1 instanceChunks=%d top_scores=%s",
            len(instanceChunks),
            [round(float(c.get("_score", 0) or 0), 3) for c in (instanceChunks or [])[:3]],
        )
        if instanceChunks:
            builder.add(priority=1, label="Relevant Documents", items=instanceChunks, maxChars=4000)
@ -338,7 +687,7 @@ class KnowledgeService:
            queryVector=queryVector,
            workflowId=workflowId,
            limit=10,
-            minScore=0.55,
+            minScore=0.35,
        )
        if roundMemories:
            memItems = []
@ -376,7 +725,7 @@ class KnowledgeService:
                scope="mandate",
                mandateId=mandateId,
                limit=10,
-                minScore=0.7,
+                minScore=0.35,
                isSysAdmin=isSysAdmin,
            )
            if mandateChunks:
@ -392,7 +741,12 @@ class KnowledgeService:
                maxChars=500,
            )
-        return builder.build()
+        _result = builder.build()
        logger.debug(
            "buildAgentContext.done totalChars=%d userId=%s",
            len(_result), userId,
        )
        return _result
    # =========================================================================
    # Workflow Memory
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py
@ -0,0 +1,334 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Connection-lifecycle consumer bridging OAuth events to ingestion jobs.
 Subscribes to `connection.established` and `connection.revoked` callbacks
 emitted by the OAuth callbacks / connection management routes and dispatches:
 - `connection.established` -> enqueue a `connection.bootstrap` BackgroundJob
  that walks the connector and ingests all reachable items via
  KnowledgeService.requestIngestion (file-like or virtual documents).
 - `connection.revoked` -> run `KnowledgeService.purgeConnection` synchronously
  so the knowledge corpus releases the data before the UI confirms the revoke.
 The consumer is registered once at process boot (see `app.py` lifespan).
 It intentionally does NOT hold a per-user service context; each callback
 creates whatever context it needs from the UserConnection row itself.
 """
 from __future__ import annotations
 import asyncio
 import logging
 from typing import Any, Dict, Optional
 from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
 from modules.shared.callbackRegistry import callbackRegistry
 from modules.serviceCenter.services.serviceBackgroundJobs import (
    registerJobHandler,
    startJob,
 )
 logger = logging.getLogger(__name__)
 BOOTSTRAP_JOB_TYPE = "connection.bootstrap"
 _registered = False
 def _onConnectionEstablished(
    *,
    connectionId: str,
    authority: str,
    userId: Optional[str] = None,
    **kwargs: Any,
 ) -> None:
    """Fire-and-forget bootstrap enqueue for a freshly connected UserConnection."""
    if not connectionId:
        logger.warning("connection.established without connectionId; ignoring")
        return
    payload: Dict[str, Any] = {
        "connectionId": connectionId,
        "authority": (authority or "").lower(),
        "userId": userId,
    }
    logger.info(
        "ingestion.connection.bootstrap.queued connectionId=%s authority=%s",
        connectionId, authority,
        extra={
            "event": "ingestion.connection.bootstrap.queued",
            "connectionId": connectionId,
            "authority": authority,
        },
    )
    async def _enqueue() -> None:
        try:
            await startJob(
                BOOTSTRAP_JOB_TYPE,
                payload,
                triggeredBy=userId,
            )
        except Exception as exc:
            logger.error(
                "ingestion.connection.bootstrap.enqueue_failed connectionId=%s error=%s",
                connectionId, exc, exc_info=True,
            )
    try:
        loop = asyncio.get_event_loop()
        if loop.is_running():
            loop.create_task(_enqueue())
        else:
            loop.run_until_complete(_enqueue())
    except RuntimeError:
        asyncio.run(_enqueue())
 def _onConnectionRevoked(
    *,
    connectionId: str,
    authority: Optional[str] = None,
    userId: Optional[str] = None,
    reason: Optional[str] = None,
    **kwargs: Any,
 ) -> None:
    """Run the knowledge purge synchronously so UI feedback is authoritative."""
    if not connectionId:
        logger.warning("connection.revoked without connectionId; ignoring")
        return
    try:
        # Purge lives on the DB interface to avoid ServiceCenter/user-context
        # plumbing here; the service method is a thin wrapper on top of this.
        result = getKnowledgeInterface(None).deleteFileContentIndexByConnectionId(connectionId)
    except Exception as exc:
        logger.error(
            "ingestion.connection.purged.failed connectionId=%s error=%s",
            connectionId, exc, exc_info=True,
        )
        return
    logger.info(
        "ingestion.connection.purged connectionId=%s authority=%s reason=%s rows=%d chunks=%d",
        connectionId, authority, reason,
        result.get("indexRows", 0), result.get("chunks", 0),
        extra={
            "event": "ingestion.connection.purged",
            "connectionId": connectionId,
            "authority": authority,
            "reason": reason,
            "indexRows": result.get("indexRows", 0),
            "chunks": result.get("chunks", 0),
        },
    )
 async def _bootstrapJobHandler(
    job: Dict[str, Any],
    progressCb,
 ) -> Dict[str, Any]:
    """Dispatch bootstrap by authority. Each authority runs its own sub-bootstraps."""
    payload = job.get("payload") or {}
    connectionId = payload.get("connectionId")
    authority = (payload.get("authority") or "").lower()
    if not connectionId:
        raise ValueError("connection.bootstrap requires payload.connectionId")
    progressCb(5, f"resolving {authority} connection")
    # Defensive consent check: if the connection has since disabled knowledge ingestion
    # (e.g. user toggled setting after the job was enqueued), skip all walkers.
    try:
        from modules.interfaces.interfaceDbApp import getRootInterface
        _root = getRootInterface()
        _conn = _root.getUserConnectionById(connectionId)
        if _conn and not getattr(_conn, "knowledgeIngestionEnabled", True):
            logger.info(
                "ingestion.connection.bootstrap.skipped — consent disabled connectionId=%s",
                connectionId,
                extra={
                    "event": "ingestion.connection.bootstrap.skipped",
                    "connectionId": connectionId,
                    "authority": authority,
                    "reason": "consent_disabled",
                },
            )
            return {"connectionId": connectionId, "authority": authority, "skipped": True, "reason": "consent_disabled"}
    except Exception as _guardErr:
        logger.debug("Could not load connection for consent guard: %s", _guardErr)
    def _normalize(res: Any, label: str) -> Dict[str, Any]:
        if isinstance(res, Exception):
            logger.error(
                "ingestion.connection.bootstrap.failed part=%s connectionId=%s error=%s",
                label, connectionId, res, exc_info=res,
            )
            return {"error": str(res)}
        return res or {}
    if authority == "msft":
        from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
            bootstrapSharepoint,
        )
        from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncOutlook import (
            bootstrapOutlook,
        )
        progressCb(10, "sharepoint + outlook")
        spResult, olResult = await asyncio.gather(
            bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb),
            bootstrapOutlook(connectionId=connectionId, progressCb=progressCb),
            return_exceptions=True,
        )
        return {
            "connectionId": connectionId,
            "authority": authority,
            "sharepoint": _normalize(spResult, "sharepoint"),
            "outlook": _normalize(olResult, "outlook"),
        }
    if authority == "google":
        from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGdrive import (
            bootstrapGdrive,
        )
        from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
            bootstrapGmail,
        )
        progressCb(10, "drive + gmail")
        gdResult, gmResult = await asyncio.gather(
            bootstrapGdrive(connectionId=connectionId, progressCb=progressCb),
            bootstrapGmail(connectionId=connectionId, progressCb=progressCb),
            return_exceptions=True,
        )
        return {
            "connectionId": connectionId,
            "authority": authority,
            "drive": _normalize(gdResult, "gdrive"),
            "gmail": _normalize(gmResult, "gmail"),
        }
    if authority == "clickup":
        from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup import (
            bootstrapClickup,
        )
        progressCb(10, "clickup tasks")
        cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb)
        return {
            "connectionId": connectionId,
            "authority": authority,
            "clickup": _normalize(cuResult, "clickup"),
        }
    logger.info(
        "ingestion.connection.bootstrap.skipped reason=unsupported_authority authority=%s connectionId=%s",
        authority, connectionId,
        extra={
            "event": "ingestion.connection.bootstrap.skipped",
            "authority": authority,
            "connectionId": connectionId,
            "reason": "unsupported_authority",
        },
    )
    return {
        "connectionId": connectionId,
        "authority": authority,
        "skipped": True,
        "reason": "unsupported_authority",
    }
 async def _scheduledDailyResync() -> None:
    """Enqueue a connection.bootstrap job for every active knowledge connection.
    Runs once per day (default 2 AM Europe/Zurich).  Each job re-walks the
    connector and hands new / changed items to KnowledgeService.requestIngestion.
    Unchanged items are deduplicated by content-hash and skipped automatically.
    """
    try:
        from modules.interfaces.interfaceDbApp import getRootInterface
        rootInterface = getRootInterface()
        connections = rootInterface.getActiveKnowledgeConnections()
    except Exception as exc:
        logger.error("knowledge.daily_resync: could not load connections: %s", exc, exc_info=True)
        return
    if not connections:
        logger.info("knowledge.daily_resync: no active knowledge connections — nothing to do")
        return
    logger.info(
        "knowledge.daily_resync: enqueuing bootstrap for %d connection(s)",
        len(connections),
        extra={"event": "knowledge.daily_resync.started", "count": len(connections)},
    )
    enqueued = 0
    skipped = 0
    for conn in connections:
        connectionId = str(conn.id)
        authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority)
        userId = str(conn.userId)
        payload: Dict[str, Any] = {
            "connectionId": connectionId,
            "authority": authority.lower(),
            "userId": userId,
        }
        try:
            await startJob(
                BOOTSTRAP_JOB_TYPE,
                payload,
                triggeredBy="scheduler.daily_resync",
            )
            enqueued += 1
            logger.debug(
                "knowledge.daily_resync: queued connectionId=%s authority=%s",
                connectionId, authority,
            )
        except Exception as exc:
            skipped += 1
            logger.error(
                "knowledge.daily_resync: failed to enqueue connectionId=%s: %s",
                connectionId, exc,
            )
    logger.info(
        "knowledge.daily_resync: done — enqueued=%d skipped=%d",
        enqueued, skipped,
        extra={"event": "knowledge.daily_resync.done", "enqueued": enqueued, "skipped": skipped},
    )
 def registerDailyResyncScheduler(*, hour: int = 2, minute: int = 0) -> None:
    """Register the daily knowledge re-sync cron job. Idempotent.
    Args:
        hour:   Hour of day to run (0–23, default 2 → 2 AM Europe/Zurich).
        minute: Minute within the hour (default 0).
    """
    try:
        from modules.shared.eventManagement import eventManager
        eventManager.registerCron(
            jobId="knowledge.daily_resync",
            func=_scheduledDailyResync,
            cronKwargs={"hour": str(hour), "minute": str(minute)},
        )
        logger.info(
            "knowledge.daily_resync scheduler registered (daily %02d:%02d Europe/Zurich)",
            hour, minute,
        )
    except Exception as exc:
        logger.warning("knowledge.daily_resync scheduler registration failed (non-critical): %s", exc)
 def registerKnowledgeIngestionConsumer() -> None:
    """Register callback subscribers + background job handler. Idempotent."""
    global _registered
    if _registered:
        return
    callbackRegistry.register("connection.established", _onConnectionEstablished)
    callbackRegistry.register("connection.revoked", _onConnectionRevoked)
    registerJobHandler(BOOTSTRAP_JOB_TYPE, _bootstrapJobHandler)
    registerDailyResyncScheduler()
    _registered = True
    logger.info("KnowledgeIngestionConsumer registered (established/revoked + %s handler + daily resync)", BOOTSTRAP_JOB_TYPE)
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorPrefs.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorPrefs.py
@ -0,0 +1,101 @@
 """Per-connection knowledge ingestion preference helpers.
 Walkers call `loadConnectionPrefs(connectionId)` once at bootstrap start and
 receive a `ConnectionIngestionPrefs` dataclass they can pass down into their
 inner loops.  All fields have safe defaults so walkers stay backward-compatible
 with connections that predate the §2.6 preference schema (knowledgePreferences
 is None).
 """
 from __future__ import annotations
 import logging
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional
 logger = logging.getLogger(__name__)
 _DEFAULT_MAX_AGE_DAYS = 90
 _DEFAULT_MAIL_DEPTH = "full"
 _DEFAULT_CLICKUP_SCOPE = "title_description"
@dataclass
 class ConnectionIngestionPrefs:
    """Parsed per-connection preferences for knowledge ingestion walkers."""
    # PII
    neutralizeBeforeEmbed: bool = False
    # Mail (Outlook + Gmail)
    mailContentDepth: str = _DEFAULT_MAIL_DEPTH          # "metadata" | "snippet" | "full"
    mailIndexAttachments: bool = False
    # Files (Drive / SharePoint / OneDrive)
    filesIndexBinaries: bool = True
    mimeAllowlist: List[str] = field(default_factory=list)  # empty = all allowed
    # ClickUp
    clickupScope: str = _DEFAULT_CLICKUP_SCOPE  # "titles" | "title_description" | "with_comments"
    clickupIndexAttachments: bool = False
    # Per-authority surface toggles (default everything on)
    gmailEnabled: bool = True
    driveEnabled: bool = True
    sharepointEnabled: bool = True
    outlookEnabled: bool = True
    # Time window
    maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS  # 0 = no limit
 def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs:
    """Load and parse per-connection preferences from the database.
    Returns safe defaults for any missing or unparseable values so walkers
    never fail due to missing preference data.
    """
    try:
        from modules.interfaces.interfaceDbApp import getRootInterface
        root = getRootInterface()
        conn = root.getUserConnectionById(connectionId)
        if not conn:
            logger.debug("loadConnectionPrefs: connection %s not found, using defaults", connectionId)
            return ConnectionIngestionPrefs()
        raw: Optional[Dict[str, Any]] = getattr(conn, "knowledgePreferences", None)
        if not raw or not isinstance(raw, dict):
            return ConnectionIngestionPrefs()
        def _bool(key: str, default: bool) -> bool:
            v = raw.get(key)
            return bool(v) if isinstance(v, bool) else default
        def _str(key: str, allowed: List[str], default: str) -> str:
            v = raw.get(key)
            return v if v in allowed else default
        def _int(key: str, default: int) -> int:
            v = raw.get(key)
            return int(v) if isinstance(v, int) else default
        surface = raw.get("surfaceToggles") or {}
        google_surf = surface.get("google") or {}
        msft_surf = surface.get("msft") or {}
        return ConnectionIngestionPrefs(
            neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False),
            mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH),
            mailIndexAttachments=_bool("mailIndexAttachments", False),
            filesIndexBinaries=_bool("filesIndexBinaries", True),
            mimeAllowlist=list(raw.get("mimeAllowlist") or []),
            clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE),
            clickupIndexAttachments=_bool("clickupIndexAttachments", False),
            gmailEnabled=bool(google_surf.get("gmail", True)),
            driveEnabled=bool(google_surf.get("drive", True)),
            sharepointEnabled=bool(msft_surf.get("sharepoint", True)),
            outlookEnabled=bool(msft_surf.get("outlook", True)),
            maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS),
        )
    except Exception as exc:
        logger.warning("loadConnectionPrefs failed for %s, using defaults: %s", connectionId, exc)
        return ConnectionIngestionPrefs()
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py
@ -0,0 +1,512 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """ClickUp bootstrap for the unified knowledge ingestion lane.
 ClickUp tasks are ingested as *virtual documents* — we never download file
 bytes. Each task becomes a `sourceKind="clickup_task"` IngestionJob whose
 `contentObjects` carry a summary header (name + status + metadata) and the
 task description / text content so retrieval finds them without a live API
 call.
 Hierarchy traversal: workspace (team) → spaces → folders / folderless lists →
 tasks. We cap the fan-out with `maxWorkspaces` / `maxListsPerWorkspace` /
 `maxTasks` and skip tasks older than `maxAgeDays` (default 180 d).
 Idempotency: `date_updated` from the ClickUp task payload is a millisecond
 timestamp and strictly monotonic per revision — used as `contentVersion`.
 """
 from __future__ import annotations
 import hashlib
 import logging
 import time
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta, timezone
 from typing import Any, Callable, Dict, List, Optional
 logger = logging.getLogger(__name__)
 MAX_TASKS_DEFAULT = 500
 MAX_WORKSPACES_DEFAULT = 3
 MAX_LISTS_PER_WORKSPACE_DEFAULT = 20
 MAX_DESCRIPTION_CHARS_DEFAULT = 8000
 MAX_AGE_DAYS_DEFAULT = 180
@dataclass
 class ClickupBootstrapLimits:
    maxTasks: int = MAX_TASKS_DEFAULT
    maxWorkspaces: int = MAX_WORKSPACES_DEFAULT
    maxListsPerWorkspace: int = MAX_LISTS_PER_WORKSPACE_DEFAULT
    maxDescriptionChars: int = MAX_DESCRIPTION_CHARS_DEFAULT
    # Only ingest tasks updated within the last N days. None disables filter.
    maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
    # Include closed/archived tasks if they still meet the recency filter.
    # ClickUp `closed` tasks often carry the most useful RAG context
    # ("why was this shipped the way it was?").
    includeClosed: bool = True
    # Pass-through to IngestionJob.neutralize
    neutralize: bool = False
    # Content scope: "titles" | "title_description" | "with_comments"
    clickupScope: str = "title_description"
@dataclass
 class ClickupBootstrapResult:
    connectionId: str
    indexed: int = 0
    skippedDuplicate: int = 0
    skippedPolicy: int = 0
    failed: int = 0
    workspaces: int = 0
    lists: int = 0
    errors: List[str] = field(default_factory=list)
 def _syntheticTaskId(connectionId: str, taskId: str) -> str:
    token = hashlib.sha256(f"{connectionId}:{taskId}".encode("utf-8")).hexdigest()[:16]
    return f"cu:{connectionId[:8]}:{token}"
 def _truncate(value: Any, limit: int) -> str:
    text = str(value or "").strip()
    if not text:
        return ""
    if len(text) <= limit:
        return text
    return text[:limit].rstrip() + "\n[truncated]"
 def _isRecent(dateUpdatedMs: Any, maxAgeDays: Optional[int]) -> bool:
    if not maxAgeDays:
        return True
    if not dateUpdatedMs:
        return True
    try:
        ts = datetime.fromtimestamp(int(dateUpdatedMs) / 1000.0, tz=timezone.utc)
    except Exception:
        return True
    cutoff = datetime.now(timezone.utc) - timedelta(days=maxAgeDays)
    return ts >= cutoff
 def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -> List[Dict[str, Any]]:
    """Header (name/status/metadata) + optional description + text_content.
    `limits.clickupScope` controls how much is embedded:
      - "titles":            task name + status metadata only
      - "title_description": header + description / text_content (default)
      - "with_comments":     header + description + text_content
                             (comments themselves are not yet fetched in v1)
    """
    name = task.get("name") or f"Task {task.get('id', '')}"
    status = ((task.get("status") or {}).get("status")) or ""
    assignees = ", ".join(
        filter(None, [
            (a.get("username") or a.get("email") or "")
            for a in (task.get("assignees") or [])
        ])
    )
    tags = ", ".join(filter(None, [t.get("name", "") for t in (task.get("tags") or [])]))
    listInfo = task.get("list") or {}
    folderInfo = task.get("folder") or {}
    spaceInfo = task.get("space") or {}
    dueMs = task.get("due_date")
    dueIso = ""
    if dueMs:
        try:
            dueIso = datetime.fromtimestamp(int(dueMs) / 1000.0, tz=timezone.utc).strftime("%Y-%m-%d")
        except Exception:
            dueIso = ""
    headerLines = [
        f"Task: {name}",
        f"Status: {status}" if status else "",
        f"List: {listInfo.get('name', '')}" if listInfo else "",
        f"Folder: {folderInfo.get('name', '')}" if folderInfo else "",
        f"Space: {spaceInfo.get('name', '')}" if spaceInfo else "",
        f"Assignees: {assignees}" if assignees else "",
        f"Tags: {tags}" if tags else "",
        f"Due: {dueIso}" if dueIso else "",
        f"Url: {task.get('url', '')}" if task.get("url") else "",
    ]
    header = "\n".join(line for line in headerLines if line)
    parts: List[Dict[str, Any]] = [{
        "contentObjectId": "header",
        "contentType": "text",
        "data": header,
        "contextRef": {"part": "header"},
    }]
    scope = getattr(limits, "clickupScope", "title_description")
    if scope in ("title_description", "with_comments"):
        description = _truncate(task.get("description"), limits.maxDescriptionChars)
        if description:
            parts.append({
                "contentObjectId": "description",
                "contentType": "text",
                "data": description,
                "contextRef": {"part": "description"},
            })
        # text_content is ClickUp's rendered-markdown version; include if it adds
        # something beyond the plain description (common for bullet lists, checklists).
        textContent = _truncate(task.get("text_content"), limits.maxDescriptionChars)
        if textContent and textContent != description:
            parts.append({
                "contentObjectId": "text_content",
                "contentType": "text",
                "data": textContent,
                "contextRef": {"part": "text_content"},
            })
    return parts
 async def bootstrapClickup(
    connectionId: str,
    *,
    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
    adapter: Any = None,
    connection: Any = None,
    knowledgeService: Any = None,
    limits: Optional[ClickupBootstrapLimits] = None,
 ) -> Dict[str, Any]:
    """Walk workspaces → lists → tasks and ingest each task as a virtual doc."""
    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
    prefs = loadConnectionPrefs(connectionId)
    if not limits:
        limits = ClickupBootstrapLimits(
            maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
            neutralize=prefs.neutralizeBeforeEmbed,
            clickupScope=prefs.clickupScope,
        )
    startMs = time.time()
    result = ClickupBootstrapResult(connectionId=connectionId)
    logger.info(
        "ingestion.connection.bootstrap.started part=clickup connectionId=%s",
        connectionId,
        extra={
            "event": "ingestion.connection.bootstrap.started",
            "part": "clickup",
            "connectionId": connectionId,
        },
    )
    if adapter is None or knowledgeService is None or connection is None:
        adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
    mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
    userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
    svc = getattr(adapter, "_svc", None)
    if svc is None:
        result.errors.append("adapter missing _svc instance")
        return _finalizeResult(connectionId, result, startMs)
    try:
        teamsResp = await svc.getAuthorizedTeams()
    except Exception as exc:
        logger.error("clickup team discovery failed for %s: %s", connectionId, exc, exc_info=True)
        result.errors.append(f"teams: {exc}")
        return _finalizeResult(connectionId, result, startMs)
    teams = (teamsResp or {}).get("teams") or []
    for team in teams[: limits.maxWorkspaces]:
        if result.indexed + result.skippedDuplicate >= limits.maxTasks:
            break
        teamId = str(team.get("id", "") or "")
        if not teamId:
            continue
        result.workspaces += 1
        try:
            await _walkTeam(
                svc=svc,
                knowledgeService=knowledgeService,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                team=team,
                limits=limits,
                result=result,
                progressCb=progressCb,
            )
        except Exception as exc:
            logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
            result.errors.append(f"team({teamId}): {exc}")
    return _finalizeResult(connectionId, result, startMs)
 async def _resolveDependencies(connectionId: str):
    from modules.interfaces.interfaceDbApp import getRootInterface
    from modules.auth import TokenManager
    from modules.connectors.providerClickup.connectorClickup import ClickupConnector
    from modules.serviceCenter import getService
    from modules.serviceCenter.context import ServiceCenterContext
    from modules.security.rootAccess import getRootUser
    rootInterface = getRootInterface()
    connection = rootInterface.getUserConnectionById(connectionId)
    if connection is None:
        raise ValueError(f"UserConnection not found: {connectionId}")
    token = TokenManager().getFreshToken(connectionId)
    if not token or not token.tokenAccess:
        raise ValueError(f"No valid token for connection {connectionId}")
    provider = ClickupConnector(connection, token.tokenAccess)
    adapter = provider.getServiceAdapter("clickup")
    rootUser = getRootUser()
    ctx = ServiceCenterContext(
        user=rootUser,
        mandate_id=str(getattr(connection, "mandateId", "") or ""),
    )
    knowledgeService = getService("knowledge", ctx)
    return adapter, connection, knowledgeService
 async def _walkTeam(
    *,
    svc,
    knowledgeService,
    connectionId: str,
    mandateId: str,
    userId: str,
    team: Dict[str, Any],
    limits: ClickupBootstrapLimits,
    result: ClickupBootstrapResult,
    progressCb: Optional[Callable[[int, Optional[str]], None]],
 ) -> None:
    teamId = str(team.get("id", "") or "")
    spacesResp = await svc.getSpaces(teamId)
    spaces = (spacesResp or {}).get("spaces") or []
    listsCollected: List[Dict[str, Any]] = []
    for space in spaces:
        if len(listsCollected) >= limits.maxListsPerWorkspace:
            break
        spaceId = str(space.get("id", "") or "")
        if not spaceId:
            continue
        # Folderless lists directly under the space
        folderless = await svc.getFolderlessLists(spaceId)
        for lst in (folderless or {}).get("lists") or []:
            if len(listsCollected) >= limits.maxListsPerWorkspace:
                break
            listsCollected.append({**lst, "_space": space})
        # Lists inside folders
        foldersResp = await svc.getFolders(spaceId)
        for folder in (foldersResp or {}).get("folders") or []:
            if len(listsCollected) >= limits.maxListsPerWorkspace:
                break
            folderId = str(folder.get("id", "") or "")
            if not folderId:
                continue
            folderLists = await svc.getListsInFolder(folderId)
            for lst in (folderLists or {}).get("lists") or []:
                if len(listsCollected) >= limits.maxListsPerWorkspace:
                    break
                listsCollected.append({**lst, "_space": space, "_folder": folder})
    for lst in listsCollected:
        if result.indexed + result.skippedDuplicate >= limits.maxTasks:
            return
        result.lists += 1
        await _walkList(
            svc=svc,
            knowledgeService=knowledgeService,
            connectionId=connectionId,
            mandateId=mandateId,
            userId=userId,
            teamId=teamId,
            lst=lst,
            limits=limits,
            result=result,
            progressCb=progressCb,
        )
 async def _walkList(
    *,
    svc,
    knowledgeService,
    connectionId: str,
    mandateId: str,
    userId: str,
    teamId: str,
    lst: Dict[str, Any],
    limits: ClickupBootstrapLimits,
    result: ClickupBootstrapResult,
    progressCb: Optional[Callable[[int, Optional[str]], None]],
 ) -> None:
    listId = str(lst.get("id", "") or "")
    if not listId:
        return
    page = 0
    while result.indexed + result.skippedDuplicate < limits.maxTasks:
        resp = await svc.getTasksInList(
            listId,
            page=page,
            include_closed=limits.includeClosed,
            subtasks=True,
        )
        if isinstance(resp, dict) and resp.get("error"):
            logger.warning("clickup tasks list=%s page=%d error: %s", listId, page, resp.get("error"))
            result.errors.append(f"list({listId}): {resp.get('error')}")
            return
        tasks = (resp or {}).get("tasks") or []
        if not tasks:
            return
        for task in tasks:
            if result.indexed + result.skippedDuplicate >= limits.maxTasks:
                return
            if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
                result.skippedPolicy += 1
                continue
            # Inject the list/folder/space metadata we already loaded.
            task["list"] = task.get("list") or {"id": listId, "name": lst.get("name")}
            task["folder"] = task.get("folder") or lst.get("_folder") or {}
            task["space"] = task.get("space") or lst.get("_space") or {}
            await _ingestTask(
                knowledgeService=knowledgeService,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                teamId=teamId,
                task=task,
                limits=limits,
                result=result,
                progressCb=progressCb,
            )
        if len(tasks) < 100:  # ClickUp page-size hint: fewer than 100 => last page
            return
        page += 1
 async def _ingestTask(
    *,
    knowledgeService,
    connectionId: str,
    mandateId: str,
    userId: str,
    teamId: str,
    task: Dict[str, Any],
    limits: ClickupBootstrapLimits,
    result: ClickupBootstrapResult,
    progressCb: Optional[Callable[[int, Optional[str]], None]],
 ) -> None:
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
    taskId = str(task.get("id", "") or "")
    if not taskId:
        result.skippedPolicy += 1
        return
    revision = str(task.get("date_updated") or task.get("date_created") or "")
    name = task.get("name") or f"Task {taskId}"
    syntheticId = _syntheticTaskId(connectionId, taskId)
    fileName = f"{name[:80].strip() or taskId}.task.json"
    contentObjects = _buildContentObjects(task, limits)
    try:
        handle = await knowledgeService.requestIngestion(
            IngestionJob(
                sourceKind="clickup_task",
                sourceId=syntheticId,
                fileName=fileName,
                mimeType="application/vnd.clickup.task+json",
                userId=userId,
                mandateId=mandateId,
                contentObjects=contentObjects,
                contentVersion=revision or None,
                neutralize=limits.neutralize,
                provenance={
                    "connectionId": connectionId,
                    "authority": "clickup",
                    "service": "clickup",
                    "externalItemId": taskId,
                    "teamId": teamId,
                    "listId": ((task.get("list") or {}).get("id")),
                    "spaceId": ((task.get("space") or {}).get("id")),
                    "url": task.get("url"),
                    "status": ((task.get("status") or {}).get("status")),
                    "tier": limits.clickupScope,
                },
            )
        )
    except Exception as exc:
        logger.error("clickup ingestion %s failed: %s", taskId, exc, exc_info=True)
        result.failed += 1
        result.errors.append(f"ingest({taskId}): {exc}")
        return
    if handle.status == "duplicate":
        result.skippedDuplicate += 1
    elif handle.status == "indexed":
        result.indexed += 1
    else:
        result.failed += 1
    if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
        processed = result.indexed + result.skippedDuplicate
        try:
            progressCb(
                min(90, 10 + int(80 * processed / max(1, limits.maxTasks))),
                f"clickup processed={processed}",
            )
        except Exception:
            pass
        logger.info(
            "ingestion.connection.bootstrap.progress part=clickup processed=%d skippedDup=%d failed=%d",
            processed, result.skippedDuplicate, result.failed,
            extra={
                "event": "ingestion.connection.bootstrap.progress",
                "part": "clickup",
                "connectionId": connectionId,
                "processed": processed,
                "skippedDup": result.skippedDuplicate,
                "failed": result.failed,
            },
        )
 def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs: float) -> Dict[str, Any]:
    durationMs = int((time.time() - startMs) * 1000)
    logger.info(
        "ingestion.connection.bootstrap.done part=clickup connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d workspaces=%d lists=%d durationMs=%d",
        connectionId,
        result.indexed, result.skippedDuplicate, result.skippedPolicy,
        result.failed, result.workspaces, result.lists, durationMs,
        extra={
            "event": "ingestion.connection.bootstrap.done",
            "part": "clickup",
            "connectionId": connectionId,
            "indexed": result.indexed,
            "skippedDup": result.skippedDuplicate,
            "skippedPolicy": result.skippedPolicy,
            "failed": result.failed,
            "workspaces": result.workspaces,
            "lists": result.lists,
            "durationMs": durationMs,
        },
    )
    return {
        "connectionId": result.connectionId,
        "indexed": result.indexed,
        "skippedDuplicate": result.skippedDuplicate,
        "skippedPolicy": result.skippedPolicy,
        "failed": result.failed,
        "workspaces": result.workspaces,
        "lists": result.lists,
        "durationMs": durationMs,
        "errors": result.errors[:20],
    }
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py
@ -0,0 +1,443 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Google Drive bootstrap for the unified knowledge ingestion lane.
 Mirrors the SharePoint pilot (see subConnectorSyncSharepoint.py). Walks the
 user's *My Drive* tree from the virtual `root` folder, downloads each
 file-like item via `DriveAdapter.download` (which handles native Google docs
 via export), runs the standard extraction pipeline and routes results through
 `KnowledgeService.requestIngestion` with `sourceKind="gdrive_item"` and
 `contentVersion = modifiedTime` (monotonic per-revision).
 """
 from __future__ import annotations
 import hashlib
 import logging
 import time
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta, timezone
 from typing import Any, Callable, Dict, List, Optional
 from modules.datamodels.datamodelExtraction import ExtractionOptions
 logger = logging.getLogger(__name__)
 MAX_ITEMS_DEFAULT = 500
 MAX_BYTES_DEFAULT = 200 * 1024 * 1024
 MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
 SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
 MAX_DEPTH_DEFAULT = 4
 MAX_AGE_DAYS_DEFAULT = 365
 # Google Drive uses virtual mime-types for folders and non-downloadable assets.
 FOLDER_MIME = "application/vnd.google-apps.folder"
@dataclass
 class GdriveBootstrapLimits:
    maxItems: int = MAX_ITEMS_DEFAULT
    maxBytes: int = MAX_BYTES_DEFAULT
    maxFileSize: int = MAX_FILE_SIZE_DEFAULT
    skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
    maxDepth: int = MAX_DEPTH_DEFAULT
    # Only ingest files modified within the last N days. None disables filter.
    maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
    # Pass-through to IngestionJob.neutralize
    neutralize: bool = False
    # Whether to skip binary/non-text files
    filesIndexBinaries: bool = True
@dataclass
 class GdriveBootstrapResult:
    connectionId: str
    indexed: int = 0
    skippedDuplicate: int = 0
    skippedPolicy: int = 0
    failed: int = 0
    bytesProcessed: int = 0
    errors: List[str] = field(default_factory=list)
 def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
    token = hashlib.sha256(f"{connectionId}:{externalItemId}".encode("utf-8")).hexdigest()[:16]
    return f"gd:{connectionId[:8]}:{token}"
 def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
    parts = getattr(extracted, "parts", None) or []
    out: List[Dict[str, Any]] = []
    for part in parts:
        data = getattr(part, "data", None) or ""
        if not data or not str(data).strip():
            continue
        typeGroup = getattr(part, "typeGroup", "text") or "text"
        contentType = "text"
        if typeGroup == "image":
            contentType = "image"
        elif typeGroup in ("binary", "container"):
            contentType = "other"
        out.append({
            "contentObjectId": getattr(part, "id", ""),
            "contentType": contentType,
            "data": data,
            "contextRef": {
                "containerPath": fileName,
                "location": getattr(part, "label", None) or "file",
                **(getattr(part, "metadata", None) or {}),
            },
        })
    return out
 def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
    if not maxAgeDays:
        return True
    if not modifiedIso:
        # No timestamp -> be permissive (Drive native docs sometimes omit it on export).
        return True
    try:
        # Google returns RFC 3339 with `Z` or offset; python 3.11+ parses both.
        ts = datetime.fromisoformat(modifiedIso.replace("Z", "+00:00"))
    except Exception:
        return True
    cutoff = datetime.now(timezone.utc) - timedelta(days=maxAgeDays)
    if ts.tzinfo is None:
        ts = ts.replace(tzinfo=timezone.utc)
    return ts >= cutoff
 async def bootstrapGdrive(
    connectionId: str,
    *,
    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
    adapter: Any = None,
    connection: Any = None,
    knowledgeService: Any = None,
    limits: Optional[GdriveBootstrapLimits] = None,
    runExtractionFn: Optional[Callable[..., Any]] = None,
 ) -> Dict[str, Any]:
    """Walk My Drive starting from the virtual root folder."""
    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
    prefs = loadConnectionPrefs(connectionId)
    if not limits:
        limits = GdriveBootstrapLimits(
            maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
            neutralize=prefs.neutralizeBeforeEmbed,
            filesIndexBinaries=prefs.filesIndexBinaries,
        )
    startMs = time.time()
    result = GdriveBootstrapResult(connectionId=connectionId)
    logger.info(
        "ingestion.connection.bootstrap.started part=gdrive connectionId=%s",
        connectionId,
        extra={
            "event": "ingestion.connection.bootstrap.started",
            "part": "gdrive",
            "connectionId": connectionId,
        },
    )
    if adapter is None or knowledgeService is None or connection is None:
        adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
    if runExtractionFn is None:
        from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
        from modules.serviceCenter.services.serviceExtraction.subRegistry import (
            ExtractorRegistry, ChunkerRegistry,
        )
        extractorRegistry = ExtractorRegistry()
        chunkerRegistry = ChunkerRegistry()
        def runExtractionFn(bytesData, name, mime, options):  # type: ignore[no-redef]
            return runExtraction(extractorRegistry, chunkerRegistry, bytesData, name, mime, options)
    mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
    userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
    try:
        await _walkFolder(
            adapter=adapter,
            knowledgeService=knowledgeService,
            runExtractionFn=runExtractionFn,
            connectionId=connectionId,
            mandateId=mandateId,
            userId=userId,
            folderPath="/",  # DriveAdapter.browse maps "" / "/" -> "root"
            depth=0,
            limits=limits,
            result=result,
            progressCb=progressCb,
        )
    except Exception as exc:
        logger.error("gdrive walk failed for %s: %s", connectionId, exc, exc_info=True)
        result.errors.append(f"walk: {exc}")
    return _finalizeResult(connectionId, result, startMs)
 async def _resolveDependencies(connectionId: str):
    from modules.interfaces.interfaceDbApp import getRootInterface
    from modules.auth import TokenManager
    from modules.connectors.providerGoogle.connectorGoogle import GoogleConnector
    from modules.serviceCenter import getService
    from modules.serviceCenter.context import ServiceCenterContext
    from modules.security.rootAccess import getRootUser
    rootInterface = getRootInterface()
    connection = rootInterface.getUserConnectionById(connectionId)
    if connection is None:
        raise ValueError(f"UserConnection not found: {connectionId}")
    token = TokenManager().getFreshToken(connectionId)
    if not token or not token.tokenAccess:
        raise ValueError(f"No valid token for connection {connectionId}")
    provider = GoogleConnector(connection, token.tokenAccess)
    adapter = provider.getServiceAdapter("drive")
    rootUser = getRootUser()
    ctx = ServiceCenterContext(
        user=rootUser,
        mandate_id=str(getattr(connection, "mandateId", "") or ""),
    )
    knowledgeService = getService("knowledge", ctx)
    return adapter, connection, knowledgeService
 async def _walkFolder(
    *,
    adapter,
    knowledgeService,
    runExtractionFn,
    connectionId: str,
    mandateId: str,
    userId: str,
    folderPath: str,
    depth: int,
    limits: GdriveBootstrapLimits,
    result: GdriveBootstrapResult,
    progressCb: Optional[Callable[[int, Optional[str]], None]],
 ) -> None:
    if depth > limits.maxDepth:
        return
    try:
        entries = await adapter.browse(folderPath)
    except Exception as exc:
        logger.warning("gdrive browse %s failed: %s", folderPath, exc)
        result.errors.append(f"browse({folderPath}): {exc}")
        return
    for entry in entries:
        if result.indexed + result.skippedDuplicate >= limits.maxItems:
            return
        if result.bytesProcessed >= limits.maxBytes:
            return
        entryPath = getattr(entry, "path", "") or ""
        metadata = getattr(entry, "metadata", {}) or {}
        mimeType = getattr(entry, "mimeType", None) or metadata.get("mimeType")
        if getattr(entry, "isFolder", False) or mimeType == FOLDER_MIME:
            await _walkFolder(
                adapter=adapter,
                knowledgeService=knowledgeService,
                runExtractionFn=runExtractionFn,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                folderPath=entryPath,
                depth=depth + 1,
                limits=limits,
                result=result,
                progressCb=progressCb,
            )
            continue
        effectiveMime = mimeType or "application/octet-stream"
        if any(effectiveMime.startswith(prefix) for prefix in limits.skipMimePrefixes):
            result.skippedPolicy += 1
            continue
        size = int(getattr(entry, "size", 0) or 0)
        if size and size > limits.maxFileSize:
            result.skippedPolicy += 1
            continue
        modifiedTime = metadata.get("modifiedTime")
        if not _isRecent(modifiedTime, limits.maxAgeDays):
            result.skippedPolicy += 1
            continue
        externalItemId = metadata.get("id") or entryPath
        revision = modifiedTime
        await _ingestOne(
            adapter=adapter,
            knowledgeService=knowledgeService,
            runExtractionFn=runExtractionFn,
            connectionId=connectionId,
            mandateId=mandateId,
            userId=userId,
            entry=entry,
            entryPath=entryPath,
            mimeType=effectiveMime,
            externalItemId=externalItemId,
            revision=revision,
            limits=limits,
            result=result,
            progressCb=progressCb,
        )
 async def _ingestOne(
    *,
    adapter,
    knowledgeService,
    runExtractionFn,
    connectionId: str,
    mandateId: str,
    userId: str,
    entry,
    entryPath: str,
    mimeType: str,
    externalItemId: str,
    revision: Optional[str],
    limits: GdriveBootstrapLimits,
    result: GdriveBootstrapResult,
    progressCb: Optional[Callable[[int, Optional[str]], None]],
 ) -> None:
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
    syntheticFileId = _syntheticFileId(connectionId, externalItemId)
    fileName = getattr(entry, "name", "") or externalItemId
    try:
        downloaded = await adapter.download(entryPath)
    except Exception as exc:
        logger.warning("gdrive download %s failed: %s", entryPath, exc)
        result.failed += 1
        result.errors.append(f"download({entryPath}): {exc}")
        return
    # Adapter.download returns raw bytes today; guard DownloadResult shape too.
    fileBytes: bytes
    if isinstance(downloaded, (bytes, bytearray)):
        fileBytes = bytes(downloaded)
    else:
        fileBytes = bytes(getattr(downloaded, "data", b"") or b"")
        if getattr(downloaded, "mimeType", None):
            mimeType = downloaded.mimeType  # export may have changed the type
    if not fileBytes:
        result.failed += 1
        return
    if len(fileBytes) > limits.maxFileSize:
        result.skippedPolicy += 1
        return
    result.bytesProcessed += len(fileBytes)
    try:
        extracted = runExtractionFn(
            fileBytes, fileName, mimeType,
            ExtractionOptions(mergeStrategy=None),
        )
    except Exception as exc:
        logger.warning("gdrive extraction %s failed: %s", entryPath, exc)
        result.failed += 1
        result.errors.append(f"extract({entryPath}): {exc}")
        return
    contentObjects = _toContentObjects(extracted, fileName)
    if not contentObjects:
        result.skippedPolicy += 1
        return
    try:
        handle = await knowledgeService.requestIngestion(
            IngestionJob(
                sourceKind="gdrive_item",
                sourceId=syntheticFileId,
                fileName=fileName,
                mimeType=mimeType,
                userId=userId,
                mandateId=mandateId,
                contentObjects=contentObjects,
                contentVersion=revision,
                neutralize=limits.neutralize,
                provenance={
                    "connectionId": connectionId,
                    "authority": "google",
                    "service": "drive",
                    "externalItemId": externalItemId,
                    "entryPath": entryPath,
                    "tier": "body",
                },
            )
        )
    except Exception as exc:
        logger.error("gdrive ingestion %s failed: %s", entryPath, exc, exc_info=True)
        result.failed += 1
        result.errors.append(f"ingest({entryPath}): {exc}")
        return
    if handle.status == "duplicate":
        result.skippedDuplicate += 1
    elif handle.status == "indexed":
        result.indexed += 1
    else:
        result.failed += 1
    if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
        processed = result.indexed + result.skippedDuplicate
        try:
            progressCb(
                min(90, 10 + int(80 * processed / max(1, limits.maxItems))),
                f"gdrive processed={processed}",
            )
        except Exception:
            pass
        logger.info(
            "ingestion.connection.bootstrap.progress part=gdrive processed=%d skippedDup=%d failed=%d",
            processed, result.skippedDuplicate, result.failed,
            extra={
                "event": "ingestion.connection.bootstrap.progress",
                "part": "gdrive",
                "connectionId": connectionId,
                "processed": processed,
                "skippedDup": result.skippedDuplicate,
                "failed": result.failed,
            },
        )
 def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
    durationMs = int((time.time() - startMs) * 1000)
    logger.info(
        "ingestion.connection.bootstrap.done part=gdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d bytes=%d durationMs=%d",
        connectionId,
        result.indexed, result.skippedDuplicate, result.skippedPolicy,
        result.failed, result.bytesProcessed, durationMs,
        extra={
            "event": "ingestion.connection.bootstrap.done",
            "part": "gdrive",
            "connectionId": connectionId,
            "indexed": result.indexed,
            "skippedDup": result.skippedDuplicate,
            "skippedPolicy": result.skippedPolicy,
            "failed": result.failed,
            "bytes": result.bytesProcessed,
            "durationMs": durationMs,
        },
    )
    return {
        "connectionId": result.connectionId,
        "indexed": result.indexed,
        "skippedDuplicate": result.skippedDuplicate,
        "skippedPolicy": result.skippedPolicy,
        "failed": result.failed,
        "bytesProcessed": result.bytesProcessed,
        "durationMs": durationMs,
        "errors": result.errors[:20],
    }
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py
@ -0,0 +1,606 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Gmail bootstrap for the unified knowledge ingestion lane.
 Mirrors the Outlook pilot (see subConnectorSyncOutlook.py) but talks to Google
 Mail's REST API. Messages become `sourceKind="gmail_message"` virtual documents
 with header / snippet / cleaned body content-objects; attachments are optional
 child jobs with `sourceKind="gmail_attachment"`.
 Idempotency: Gmail's stable `historyId` (or `internalDate` as fallback) is
 passed as `contentVersion`, so rerunning the bootstrap yields
 `ingestion.skipped.duplicate` for unchanged messages.
 """
 from __future__ import annotations
 import asyncio
 import base64
 import hashlib
 import logging
 import time
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta, timezone
 from typing import Any, Callable, Dict, List, Optional
 from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
 logger = logging.getLogger(__name__)
 MAX_MESSAGES_DEFAULT = 500
 MAX_BODY_CHARS_DEFAULT = 8000
 MAX_ATTACHMENT_BYTES_DEFAULT = 10 * 1024 * 1024
 DEFAULT_LABELS = ("INBOX", "SENT")
@dataclass
 class GmailBootstrapLimits:
    maxMessages: int = MAX_MESSAGES_DEFAULT
    labels: tuple = DEFAULT_LABELS
    maxBodyChars: int = MAX_BODY_CHARS_DEFAULT
    includeAttachments: bool = False
    maxAttachmentBytes: int = MAX_ATTACHMENT_BYTES_DEFAULT
    # Only fetch messages newer than N days. None disables filter.
    maxAgeDays: Optional[int] = 90
    # Content depth: "metadata" | "snippet" | "full"
    mailContentDepth: str = "full"
    # Pass-through to IngestionJob.neutralize
    neutralize: bool = False
@dataclass
 class GmailBootstrapResult:
    connectionId: str
    indexed: int = 0
    skippedDuplicate: int = 0
    skippedPolicy: int = 0
    failed: int = 0
    attachmentsIndexed: int = 0
    errors: List[str] = field(default_factory=list)
 def _syntheticMessageId(connectionId: str, messageId: str) -> str:
    token = hashlib.sha256(f"{connectionId}:{messageId}".encode("utf-8")).hexdigest()[:16]
    return f"gm:{connectionId[:8]}:{token}"
 def _syntheticAttachmentId(connectionId: str, messageId: str, attachmentId: str) -> str:
    token = hashlib.sha256(
        f"{connectionId}:{messageId}:{attachmentId}".encode("utf-8")
    ).hexdigest()[:16]
    return f"ga:{connectionId[:8]}:{token}"
 def _decodeBase64Url(data: str) -> bytes:
    if not data:
        return b""
    # Gmail uses URL-safe base64 without padding.
    padding = 4 - (len(data) % 4)
    if padding != 4:
        data = data + ("=" * padding)
    try:
        return base64.urlsafe_b64decode(data)
    except Exception:
        return b""
 def _walkPayloadForBody(payload: Dict[str, Any]) -> Dict[str, str]:
    """Return {"text": ..., "html": ...} by walking MIME parts.
    Gmail `payload` is a tree of parts. We prefer `text/plain` for the cleaned
    body, but capture `text/html` as a fallback so `cleanEmailBody` can strip
    markup if plain is missing.
    """
    found: Dict[str, str] = {"text": "", "html": ""}
    def _walk(part: Dict[str, Any]) -> None:
        mime = (part.get("mimeType") or "").lower()
        body = part.get("body") or {}
        raw = body.get("data") or ""
        if raw and mime.startswith("text/"):
            decoded = _decodeBase64Url(raw).decode("utf-8", errors="replace")
            key = "text" if mime == "text/plain" else ("html" if mime == "text/html" else "")
            if key and not found[key]:
                found[key] = decoded
        for sub in part.get("parts") or []:
            _walk(sub)
    _walk(payload or {})
    return found
 def _headerMap(payload: Dict[str, Any]) -> Dict[str, str]:
    return {
        (h.get("name") or "").lower(): (h.get("value") or "")
        for h in (payload.get("headers") or [])
    }
 def _buildContentObjects(
    message: Dict[str, Any],
    maxBodyChars: int,
    mailContentDepth: str = "full",
 ) -> List[Dict[str, Any]]:
    """Build content objects for a Gmail message.
    `mailContentDepth` controls how much is embedded:
      - "metadata": header only (subject, from, to, date)
      - "snippet":  header + Gmail snippet (~155 chars, no full body)
      - "full":     header + snippet + cleaned full body (default)
    """
    payload = message.get("payload") or {}
    headers = _headerMap(payload)
    subject = headers.get("subject") or "(no subject)"
    fromAddr = headers.get("from") or ""
    toAddr = headers.get("to") or ""
    ccAddr = headers.get("cc") or ""
    date = headers.get("date") or ""
    snippet = message.get("snippet") or ""
    parts: List[Dict[str, Any]] = []
    header = (
        f"Subject: {subject}\n"
        f"From: {fromAddr}\n"
        f"To: {toAddr}\n"
        + (f"Cc: {ccAddr}\n" if ccAddr else "")
        + f"Date: {date}"
    )
    parts.append({
        "contentObjectId": "header",
        "contentType": "text",
        "data": header,
        "contextRef": {"part": "header"},
    })
    if mailContentDepth in ("snippet", "full") and snippet:
        parts.append({
            "contentObjectId": "snippet",
            "contentType": "text",
            "data": snippet,
            "contextRef": {"part": "snippet"},
        })
    if mailContentDepth == "full":
        bodies = _walkPayloadForBody(payload)
        rawBody = bodies["text"] or bodies["html"]
        cleanedBody = cleanEmailBody(rawBody, maxChars=maxBodyChars) if rawBody else ""
        if cleanedBody:
            parts.append({
                "contentObjectId": "body",
                "contentType": "text",
                "data": cleanedBody,
                "contextRef": {"part": "body"},
            })
    return parts
 async def bootstrapGmail(
    connectionId: str,
    *,
    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
    adapter: Any = None,
    connection: Any = None,
    knowledgeService: Any = None,
    limits: Optional[GmailBootstrapLimits] = None,
    googleGetFn: Optional[Callable[..., Any]] = None,
 ) -> Dict[str, Any]:
    """Enumerate Gmail labels (INBOX + SENT default) and ingest messages."""
    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
    prefs = loadConnectionPrefs(connectionId)
    if not limits:
        limits = GmailBootstrapLimits(
            includeAttachments=prefs.mailIndexAttachments,
            maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
            mailContentDepth=prefs.mailContentDepth,
            neutralize=prefs.neutralizeBeforeEmbed,
        )
    startMs = time.time()
    result = GmailBootstrapResult(connectionId=connectionId)
    logger.info(
        "ingestion.connection.bootstrap.started part=gmail connectionId=%s",
        connectionId,
        extra={
            "event": "ingestion.connection.bootstrap.started",
            "part": "gmail",
            "connectionId": connectionId,
        },
    )
    if adapter is None or knowledgeService is None or connection is None:
        adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
    if googleGetFn is None:
        from modules.connectors.providerGoogle.connectorGoogle import _googleGet as _defaultGet
        token = getattr(adapter, "_token", "")
        async def googleGetFn(url: str) -> Dict[str, Any]:  # type: ignore[no-redef]
            return await _defaultGet(token, url)
    mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
    userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
    for labelId in limits.labels:
        if result.indexed + result.skippedDuplicate >= limits.maxMessages:
            break
        try:
            await _ingestLabel(
                googleGetFn=googleGetFn,
                knowledgeService=knowledgeService,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                labelId=labelId,
                limits=limits,
                result=result,
                progressCb=progressCb,
            )
        except Exception as exc:
            logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
            result.errors.append(f"label({labelId}): {exc}")
    return _finalizeResult(connectionId, result, startMs)
 async def _resolveDependencies(connectionId: str):
    from modules.interfaces.interfaceDbApp import getRootInterface
    from modules.auth import TokenManager
    from modules.connectors.providerGoogle.connectorGoogle import GoogleConnector
    from modules.serviceCenter import getService
    from modules.serviceCenter.context import ServiceCenterContext
    from modules.security.rootAccess import getRootUser
    rootInterface = getRootInterface()
    connection = rootInterface.getUserConnectionById(connectionId)
    if connection is None:
        raise ValueError(f"UserConnection not found: {connectionId}")
    token = TokenManager().getFreshToken(connectionId)
    if not token or not token.tokenAccess:
        raise ValueError(f"No valid token for connection {connectionId}")
    provider = GoogleConnector(connection, token.tokenAccess)
    adapter = provider.getServiceAdapter("gmail")
    rootUser = getRootUser()
    ctx = ServiceCenterContext(
        user=rootUser,
        mandate_id=str(getattr(connection, "mandateId", "") or ""),
    )
    knowledgeService = getService("knowledge", ctx)
    return adapter, connection, knowledgeService
 async def _ingestLabel(
    *,
    googleGetFn,
    knowledgeService,
    connectionId: str,
    mandateId: str,
    userId: str,
    labelId: str,
    limits: GmailBootstrapLimits,
    result: GmailBootstrapResult,
    progressCb: Optional[Callable[[int, Optional[str]], None]],
 ) -> None:
    remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
    if remaining <= 0:
        return
    pageSize = min(100, remaining)
    query = ""
    if limits.maxAgeDays:
        cutoff = datetime.now(timezone.utc) - timedelta(days=limits.maxAgeDays)
        # Gmail uses YYYY/MM/DD.
        query = f"after:{cutoff.strftime('%Y/%m/%d')}"
    baseUrl = (
        "https://gmail.googleapis.com/gmail/v1/users/me/messages"
        f"?labelIds={labelId}&maxResults={pageSize}"
    )
    if query:
        baseUrl = f"{baseUrl}&q={query}"
    nextPageToken: Optional[str] = None
    while (result.indexed + result.skippedDuplicate) < limits.maxMessages:
        url = baseUrl if not nextPageToken else f"{baseUrl}&pageToken={nextPageToken}"
        page = await googleGetFn(url)
        if not isinstance(page, dict) or "error" in page:
            err = (page or {}).get("error") if isinstance(page, dict) else "unknown"
            logger.warning("gmail list page error for label %s: %s", labelId, err)
            result.errors.append(f"list({labelId}): {err}")
            return
        messageStubs = page.get("messages") or []
        for stub in messageStubs:
            if result.indexed + result.skippedDuplicate >= limits.maxMessages:
                break
            msgId = stub.get("id")
            if not msgId:
                continue
            detailUrl = (
                f"https://gmail.googleapis.com/gmail/v1/users/me/messages/{msgId}?format=full"
            )
            detail = await googleGetFn(detailUrl)
            if not isinstance(detail, dict) or "error" in detail:
                result.failed += 1
                continue
            await _ingestMessage(
                googleGetFn=googleGetFn,
                knowledgeService=knowledgeService,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                labelId=labelId,
                message=detail,
                limits=limits,
                result=result,
                progressCb=progressCb,
            )
        nextPageToken = page.get("nextPageToken")
        if not nextPageToken:
            break
 async def _ingestMessage(
    *,
    googleGetFn,
    knowledgeService,
    connectionId: str,
    mandateId: str,
    userId: str,
    labelId: str,
    message: Dict[str, Any],
    limits: GmailBootstrapLimits,
    result: GmailBootstrapResult,
    progressCb: Optional[Callable[[int, Optional[str]], None]],
 ) -> None:
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
    messageId = message.get("id")
    if not messageId:
        result.skippedPolicy += 1
        return
    revision = message.get("historyId") or message.get("internalDate")
    headers = _headerMap(message.get("payload") or {})
    subject = headers.get("subject") or "(no subject)"
    syntheticId = _syntheticMessageId(connectionId, messageId)
    fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml"
    contentObjects = _buildContentObjects(
        message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth
    )
    try:
        handle = await knowledgeService.requestIngestion(
            IngestionJob(
                sourceKind="gmail_message",
                sourceId=syntheticId,
                fileName=fileName,
                mimeType="message/rfc822",
                userId=userId,
                mandateId=mandateId,
                contentObjects=contentObjects,
                contentVersion=str(revision) if revision else None,
                neutralize=limits.neutralize,
                provenance={
                    "connectionId": connectionId,
                    "authority": "google",
                    "service": "gmail",
                    "externalItemId": messageId,
                    "label": labelId,
                    "threadId": message.get("threadId"),
                    "tier": limits.mailContentDepth,
                },
            )
        )
    except Exception as exc:
        logger.error("gmail ingestion %s failed: %s", messageId, exc, exc_info=True)
        result.failed += 1
        result.errors.append(f"ingest({messageId}): {exc}")
        return
    if handle.status == "duplicate":
        result.skippedDuplicate += 1
    elif handle.status == "indexed":
        result.indexed += 1
    else:
        result.failed += 1
    if limits.includeAttachments:
        try:
            await _ingestAttachments(
                googleGetFn=googleGetFn,
                knowledgeService=knowledgeService,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                message=message,
                parentSyntheticId=syntheticId,
                limits=limits,
                result=result,
            )
        except Exception as exc:
            logger.warning("gmail attachments %s failed: %s", messageId, exc)
            result.errors.append(f"attachments({messageId}): {exc}")
    if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
        processed = result.indexed + result.skippedDuplicate
        try:
            progressCb(
                min(90, 10 + int(80 * processed / max(1, limits.maxMessages))),
                f"gmail processed={processed}",
            )
        except Exception:
            pass
        logger.info(
            "ingestion.connection.bootstrap.progress part=gmail processed=%d skippedDup=%d failed=%d",
            processed, result.skippedDuplicate, result.failed,
            extra={
                "event": "ingestion.connection.bootstrap.progress",
                "part": "gmail",
                "connectionId": connectionId,
                "processed": processed,
                "skippedDup": result.skippedDuplicate,
                "failed": result.failed,
            },
        )
    await asyncio.sleep(0)
 async def _ingestAttachments(
    *,
    googleGetFn,
    knowledgeService,
    connectionId: str,
    mandateId: str,
    userId: str,
    message: Dict[str, Any],
    parentSyntheticId: str,
    limits: GmailBootstrapLimits,
    result: GmailBootstrapResult,
 ) -> None:
    """Child ingestion jobs for file attachments. Skips inline images (cid: refs)."""
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
    from modules.datamodels.datamodelExtraction import ExtractionOptions
    from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
    from modules.serviceCenter.services.serviceExtraction.subRegistry import (
        ExtractorRegistry, ChunkerRegistry,
    )
    messageId = message.get("id") or ""
    def _collectAttachmentStubs(part: Dict[str, Any], acc: List[Dict[str, Any]]) -> None:
        filename = part.get("filename") or ""
        body = part.get("body") or {}
        attId = body.get("attachmentId")
        if filename and attId:
            acc.append({
                "filename": filename,
                "mimeType": part.get("mimeType") or "application/octet-stream",
                "attachmentId": attId,
                "size": int(body.get("size") or 0),
            })
        for sub in part.get("parts") or []:
            _collectAttachmentStubs(sub, acc)
    stubs: List[Dict[str, Any]] = []
    _collectAttachmentStubs(message.get("payload") or {}, stubs)
    if not stubs:
        return
    extractorRegistry = ExtractorRegistry()
    chunkerRegistry = ChunkerRegistry()
    for stub in stubs:
        if stub["size"] and stub["size"] > limits.maxAttachmentBytes:
            result.skippedPolicy += 1
            continue
        attUrl = (
            f"https://gmail.googleapis.com/gmail/v1/users/me/messages/{messageId}"
            f"/attachments/{stub['attachmentId']}"
        )
        detail = await googleGetFn(attUrl)
        if not isinstance(detail, dict) or "error" in detail:
            result.failed += 1
            continue
        rawBytes = _decodeBase64Url(detail.get("data") or "")
        if not rawBytes:
            continue
        fileName = stub["filename"]
        mimeType = stub["mimeType"]
        syntheticId = _syntheticAttachmentId(connectionId, messageId, stub["attachmentId"])
        try:
            extracted = runExtraction(
                extractorRegistry, chunkerRegistry,
                rawBytes, fileName, mimeType,
                ExtractionOptions(mergeStrategy=None),
            )
        except Exception as exc:
            logger.warning("gmail attachment extract %s failed: %s", stub["attachmentId"], exc)
            result.failed += 1
            continue
        contentObjects: List[Dict[str, Any]] = []
        for part in getattr(extracted, "parts", None) or []:
            data = getattr(part, "data", None) or ""
            if not data or not str(data).strip():
                continue
            typeGroup = getattr(part, "typeGroup", "text") or "text"
            contentType = "text"
            if typeGroup == "image":
                contentType = "image"
            elif typeGroup in ("binary", "container"):
                contentType = "other"
            contentObjects.append({
                "contentObjectId": getattr(part, "id", ""),
                "contentType": contentType,
                "data": data,
                "contextRef": {
                    "containerPath": fileName,
                    "location": getattr(part, "label", None) or "attachment",
                    **(getattr(part, "metadata", None) or {}),
                },
            })
        if not contentObjects:
            result.skippedPolicy += 1
            continue
        try:
            await knowledgeService.requestIngestion(
                IngestionJob(
                    sourceKind="gmail_attachment",
                    sourceId=syntheticId,
                    fileName=fileName,
                    mimeType=mimeType,
                    userId=userId,
                    mandateId=mandateId,
                    contentObjects=contentObjects,
                    provenance={
                        "connectionId": connectionId,
                        "authority": "google",
                        "service": "gmail",
                        "parentId": parentSyntheticId,
                        "externalItemId": stub["attachmentId"],
                        "parentMessageId": messageId,
                    },
                )
            )
            result.attachmentsIndexed += 1
        except Exception as exc:
            logger.warning("gmail attachment ingest %s failed: %s", stub["attachmentId"], exc)
            result.failed += 1
 def _finalizeResult(connectionId: str, result: GmailBootstrapResult, startMs: float) -> Dict[str, Any]:
    durationMs = int((time.time() - startMs) * 1000)
    logger.info(
        "ingestion.connection.bootstrap.done part=gmail connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d attachments=%d failed=%d durationMs=%d",
        connectionId,
        result.indexed, result.skippedDuplicate, result.skippedPolicy,
        result.attachmentsIndexed, result.failed, durationMs,
        extra={
            "event": "ingestion.connection.bootstrap.done",
            "part": "gmail",
            "connectionId": connectionId,
            "indexed": result.indexed,
            "skippedDup": result.skippedDuplicate,
            "skippedPolicy": result.skippedPolicy,
            "attachmentsIndexed": result.attachmentsIndexed,
            "failed": result.failed,
            "durationMs": durationMs,
        },
    )
    return {
        "connectionId": result.connectionId,
        "indexed": result.indexed,
        "skippedDuplicate": result.skippedDuplicate,
        "skippedPolicy": result.skippedPolicy,
        "attachmentsIndexed": result.attachmentsIndexed,
        "failed": result.failed,
        "durationMs": durationMs,
        "errors": result.errors[:20],
    }
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py
@ -0,0 +1,576 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Outlook bootstrap for the unified knowledge ingestion lane.
 Unlike SharePoint, Outlook messages are "virtual documents" — we never persist
 file bytes in the store. Each message becomes a `sourceKind="outlook_message"`
 IngestionJob whose `contentObjects` carry the header, snippet and cleaned body
 so retrieval can show a compact answer without fetching Graph again.
 Attachments are optional (`includeAttachments` limit flag) and enqueued as
 child jobs with `sourceKind="outlook_attachment"` + `provenance.parentId`.
 """
 from __future__ import annotations
 import asyncio
 import hashlib
 import logging
 import time
 from dataclasses import dataclass, field
 from typing import Any, Callable, Dict, List, Optional
 from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
 logger = logging.getLogger(__name__)
 MAX_MESSAGES_DEFAULT = 500
 MAX_FOLDERS_DEFAULT = 5
 MAX_BODY_CHARS_DEFAULT = 8000
 MAX_ATTACHMENT_BYTES_DEFAULT = 10 * 1024 * 1024
 WELL_KNOWN_FOLDERS = ("inbox", "sentitems")
@dataclass
 class OutlookBootstrapLimits:
    maxMessages: int = MAX_MESSAGES_DEFAULT
    maxFolders: int = MAX_FOLDERS_DEFAULT
    maxBodyChars: int = MAX_BODY_CHARS_DEFAULT
    includeAttachments: bool = False
    maxAttachmentBytes: int = MAX_ATTACHMENT_BYTES_DEFAULT
    # Only fetch messages newer than N days. None disables filter.
    maxAgeDays: Optional[int] = 90
    # Content depth: "metadata" | "snippet" | "full"
    mailContentDepth: str = "full"
    # Pass-through to IngestionJob.neutralize
    neutralize: bool = False
@dataclass
 class OutlookBootstrapResult:
    connectionId: str
    indexed: int = 0
    skippedDuplicate: int = 0
    skippedPolicy: int = 0
    failed: int = 0
    attachmentsIndexed: int = 0
    errors: List[str] = field(default_factory=list)
 def _syntheticMessageId(connectionId: str, messageId: str) -> str:
    token = hashlib.sha256(f"{connectionId}:{messageId}".encode("utf-8")).hexdigest()[:16]
    return f"om:{connectionId[:8]}:{token}"
 def _syntheticAttachmentId(connectionId: str, messageId: str, attachmentId: str) -> str:
    token = hashlib.sha256(
        f"{connectionId}:{messageId}:{attachmentId}".encode("utf-8")
    ).hexdigest()[:16]
    return f"oa:{connectionId[:8]}:{token}"
 def _extractRecipient(recipient: Dict[str, Any]) -> str:
    email = (recipient or {}).get("emailAddress") or {}
    name = email.get("name") or ""
    addr = email.get("address") or ""
    if name and addr:
        return f"{name} <{addr}>"
    return addr or name
 def _joinRecipients(recipients: List[Dict[str, Any]]) -> str:
    return ", ".join(filter(None, [_extractRecipient(r) for r in recipients or []]))
 def _buildContentObjects(
    message: Dict[str, Any],
    maxBodyChars: int,
    mailContentDepth: str = "full",
 ) -> List[Dict[str, Any]]:
    """Build content objects for an Outlook message.
    `mailContentDepth` mirrors the Gmail walker:
      - "metadata": header only
      - "snippet":  header + bodyPreview (~255 chars)
      - "full":     header + snippet + cleaned body (default)
    """
    subject = message.get("subject") or "(no subject)"
    fromAddr = _extractRecipient(message.get("from") or {})
    toAddr = _joinRecipients(message.get("toRecipients") or [])
    ccAddr = _joinRecipients(message.get("ccRecipients") or [])
    received = message.get("receivedDateTime") or ""
    snippet = message.get("bodyPreview") or ""
    parts: List[Dict[str, Any]] = []
    header = (
        f"Subject: {subject}\n"
        f"From: {fromAddr}\n"
        f"To: {toAddr}\n"
        + (f"Cc: {ccAddr}\n" if ccAddr else "")
        + f"Date: {received}"
    )
    parts.append({
        "contentObjectId": "header",
        "contentType": "text",
        "data": header,
        "contextRef": {"part": "header"},
    })
    if mailContentDepth in ("snippet", "full") and snippet:
        parts.append({
            "contentObjectId": "snippet",
            "contentType": "text",
            "data": snippet,
            "contextRef": {"part": "snippet"},
        })
    if mailContentDepth == "full":
        body = message.get("body") or {}
        bodyContent = body.get("content") or ""
        cleanedBody = cleanEmailBody(bodyContent, maxChars=maxBodyChars) if bodyContent else ""
        if cleanedBody:
            parts.append({
                "contentObjectId": "body",
                "contentType": "text",
                "data": cleanedBody,
                "contextRef": {"part": "body"},
            })
    return parts
 async def bootstrapOutlook(
    connectionId: str,
    *,
    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
    adapter: Any = None,
    connection: Any = None,
    knowledgeService: Any = None,
    limits: Optional[OutlookBootstrapLimits] = None,
 ) -> Dict[str, Any]:
    """Enumerate Outlook folders (inbox + sent by default) and ingest messages."""
    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
    prefs = loadConnectionPrefs(connectionId)
    if not limits:
        limits = OutlookBootstrapLimits(
            includeAttachments=prefs.mailIndexAttachments,
            maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
            mailContentDepth=prefs.mailContentDepth,
            neutralize=prefs.neutralizeBeforeEmbed,
        )
    startMs = time.time()
    result = OutlookBootstrapResult(connectionId=connectionId)
    logger.info(
        "ingestion.connection.bootstrap.started part=outlook connectionId=%s",
        connectionId,
        extra={
            "event": "ingestion.connection.bootstrap.started",
            "part": "outlook",
            "connectionId": connectionId,
        },
    )
    if adapter is None or knowledgeService is None or connection is None:
        adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
    mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
    userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
    folderIds = await _selectFolderIds(adapter, limits)
    for folderId in folderIds:
        if result.indexed + result.skippedDuplicate >= limits.maxMessages:
            break
        try:
            await _ingestFolder(
                adapter=adapter,
                knowledgeService=knowledgeService,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                folderId=folderId,
                limits=limits,
                result=result,
                progressCb=progressCb,
            )
        except Exception as exc:
            logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
            result.errors.append(f"folder({folderId}): {exc}")
    return _finalizeResult(connectionId, result, startMs)
 async def _resolveDependencies(connectionId: str):
    from modules.interfaces.interfaceDbApp import getRootInterface
    from modules.auth import TokenManager
    from modules.connectors.providerMsft.connectorMsft import MsftConnector
    from modules.serviceCenter import getService
    from modules.serviceCenter.context import ServiceCenterContext
    from modules.security.rootAccess import getRootUser
    rootInterface = getRootInterface()
    connection = rootInterface.getUserConnectionById(connectionId)
    if connection is None:
        raise ValueError(f"UserConnection not found: {connectionId}")
    token = TokenManager().getFreshToken(connectionId)
    if not token or not token.tokenAccess:
        raise ValueError(f"No valid token for connection {connectionId}")
    provider = MsftConnector(connection, token.tokenAccess)
    adapter = provider.getServiceAdapter("outlook")
    rootUser = getRootUser()
    ctx = ServiceCenterContext(
        user=rootUser,
        mandate_id=str(getattr(connection, "mandateId", "") or ""),
    )
    knowledgeService = getService("knowledge", ctx)
    return adapter, connection, knowledgeService
 async def _selectFolderIds(adapter, limits: OutlookBootstrapLimits) -> List[str]:
    """Prefer well-known folders (inbox, sentitems); fall back to browse()."""
    folderIds: List[str] = []
    for wellKnown in WELL_KNOWN_FOLDERS:
        if len(folderIds) >= limits.maxFolders:
            break
        try:
            row = await adapter._graphGet(f"me/mailFolders/{wellKnown}")
        except Exception:
            row = None
        if isinstance(row, dict) and "error" not in row and row.get("id"):
            folderIds.append(row["id"])
    if len(folderIds) < limits.maxFolders:
        try:
            entries = await adapter.browse("/")
        except Exception:
            entries = []
        for entry in entries:
            metadata = getattr(entry, "metadata", {}) or {}
            fid = metadata.get("id")
            if fid and fid not in folderIds:
                folderIds.append(fid)
            if len(folderIds) >= limits.maxFolders:
                break
    return folderIds
 async def _ingestFolder(
    *,
    adapter,
    knowledgeService,
    connectionId: str,
    mandateId: str,
    userId: str,
    folderId: str,
    limits: OutlookBootstrapLimits,
    result: OutlookBootstrapResult,
    progressCb: Optional[Callable[[int, Optional[str]], None]],
 ) -> None:
    remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
    if remaining <= 0:
        return
    pageSize = min(100, remaining)
    select = (
        "id,subject,from,toRecipients,ccRecipients,receivedDateTime,"
        "bodyPreview,body,internetMessageId,hasAttachments,changeKey"
    )
    endpoint: Optional[str] = (
        f"me/mailFolders/{folderId}/messages"
        f"?$top={pageSize}&$orderby=receivedDateTime desc&$select={select}"
    )
    # Keep header-based age filter in Graph itself to avoid shipping ancient
    # messages we'd discard client-side.
    if limits.maxAgeDays:
        from datetime import datetime, timezone, timedelta
        cutoff = datetime.now(timezone.utc) - timedelta(days=limits.maxAgeDays)
        cutoffIso = cutoff.strftime("%Y-%m-%dT%H:%M:%SZ")
        endpoint = f"{endpoint}&$filter=receivedDateTime ge {cutoffIso}"
    while endpoint and (result.indexed + result.skippedDuplicate) < limits.maxMessages:
        try:
            page = await adapter._graphGet(endpoint)
        except Exception as exc:
            logger.warning("outlook graph page failed for folder %s: %s", folderId, exc)
            result.errors.append(f"graph({folderId}): {exc}")
            return
        if not isinstance(page, dict) or "error" in page:
            err = (page or {}).get("error") if isinstance(page, dict) else "unknown"
            logger.warning("outlook graph page error for folder %s: %s", folderId, err)
            result.errors.append(f"graph({folderId}): {err}")
            return
        for message in page.get("value", []) or []:
            if result.indexed + result.skippedDuplicate >= limits.maxMessages:
                break
            await _ingestMessage(
                adapter=adapter,
                knowledgeService=knowledgeService,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                message=message,
                limits=limits,
                result=result,
                progressCb=progressCb,
            )
        nextLink = page.get("@odata.nextLink")
        if not nextLink:
            break
        # Strip Graph base so adapter._graphGet accepts the relative path.
        from modules.connectors.providerMsft.connectorMsft import _stripGraphBase
        endpoint = _stripGraphBase(nextLink)
 async def _ingestMessage(
    *,
    adapter,
    knowledgeService,
    connectionId: str,
    mandateId: str,
    userId: str,
    message: Dict[str, Any],
    limits: OutlookBootstrapLimits,
    result: OutlookBootstrapResult,
    progressCb: Optional[Callable[[int, Optional[str]], None]],
 ) -> None:
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
    messageId = message.get("id")
    if not messageId:
        result.skippedPolicy += 1
        return
    revision = message.get("changeKey") or message.get("internetMessageId")
    subject = message.get("subject") or "(no subject)"
    syntheticId = _syntheticMessageId(connectionId, messageId)
    fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml"
    contentObjects = _buildContentObjects(
        message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth
    )
    # Always at least the header is emitted, so `contentObjects` is non-empty.
    try:
        handle = await knowledgeService.requestIngestion(
            IngestionJob(
                sourceKind="outlook_message",
                sourceId=syntheticId,
                fileName=fileName,
                mimeType="message/rfc822",
                userId=userId,
                mandateId=mandateId,
                contentObjects=contentObjects,
                contentVersion=revision,
                neutralize=limits.neutralize,
                provenance={
                    "connectionId": connectionId,
                    "authority": "msft",
                    "service": "outlook",
                    "externalItemId": messageId,
                    "internetMessageId": message.get("internetMessageId"),
                    "tier": limits.mailContentDepth,
                },
            )
        )
    except Exception as exc:
        logger.error("outlook ingestion %s failed: %s", messageId, exc, exc_info=True)
        result.failed += 1
        result.errors.append(f"ingest({messageId}): {exc}")
        return
    if handle.status == "duplicate":
        result.skippedDuplicate += 1
    elif handle.status == "indexed":
        result.indexed += 1
    else:
        result.failed += 1
    if limits.includeAttachments and message.get("hasAttachments"):
        try:
            await _ingestAttachments(
                adapter=adapter,
                knowledgeService=knowledgeService,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                messageId=messageId,
                parentSyntheticId=syntheticId,
                limits=limits,
                result=result,
            )
        except Exception as exc:
            logger.warning("outlook attachments %s failed: %s", messageId, exc)
            result.errors.append(f"attachments({messageId}): {exc}")
    if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
        processed = result.indexed + result.skippedDuplicate
        try:
            progressCb(
                min(90, 10 + int(80 * processed / max(1, limits.maxMessages))),
                f"outlook processed={processed}",
            )
        except Exception:
            pass
        logger.info(
            "ingestion.connection.bootstrap.progress part=outlook processed=%d skippedDup=%d failed=%d",
            processed, result.skippedDuplicate, result.failed,
            extra={
                "event": "ingestion.connection.bootstrap.progress",
                "part": "outlook",
                "connectionId": connectionId,
                "processed": processed,
                "skippedDup": result.skippedDuplicate,
                "failed": result.failed,
            },
        )
    await asyncio.sleep(0)
 async def _ingestAttachments(
    *,
    adapter,
    knowledgeService,
    connectionId: str,
    mandateId: str,
    userId: str,
    messageId: str,
    parentSyntheticId: str,
    limits: OutlookBootstrapLimits,
    result: OutlookBootstrapResult,
 ) -> None:
    """Child ingestion jobs for file attachments (skip inline & oversized)."""
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
    from modules.datamodels.datamodelExtraction import ExtractionOptions
    from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
    from modules.serviceCenter.services.serviceExtraction.subRegistry import (
        ExtractorRegistry, ChunkerRegistry,
    )
    import base64
    page = await adapter._graphGet(f"me/messages/{messageId}/attachments")
    if not isinstance(page, dict) or "error" in page:
        return
    extractorRegistry = ExtractorRegistry()
    chunkerRegistry = ChunkerRegistry()
    for attachment in page.get("value", []) or []:
        if attachment.get("@odata.type") != "#microsoft.graph.fileAttachment":
            continue
        if attachment.get("isInline"):
            continue
        size = int(attachment.get("size") or 0)
        if size and size > limits.maxAttachmentBytes:
            result.skippedPolicy += 1
            continue
        contentBytesB64 = attachment.get("contentBytes")
        if not contentBytesB64:
            continue
        try:
            rawBytes = base64.b64decode(contentBytesB64)
        except Exception:
            result.skippedPolicy += 1
            continue
        fileName = attachment.get("name") or "attachment"
        mimeType = attachment.get("contentType") or "application/octet-stream"
        attachmentId = attachment.get("id") or fileName
        syntheticId = _syntheticAttachmentId(connectionId, messageId, attachmentId)
        try:
            extracted = runExtraction(
                extractorRegistry, chunkerRegistry,
                rawBytes, fileName, mimeType,
                ExtractionOptions(mergeStrategy=None),
            )
        except Exception as exc:
            logger.warning("outlook attachment extract %s failed: %s", attachmentId, exc)
            result.failed += 1
            continue
        contentObjects: List[Dict[str, Any]] = []
        for part in getattr(extracted, "parts", None) or []:
            data = getattr(part, "data", None) or ""
            if not data or not str(data).strip():
                continue
            typeGroup = getattr(part, "typeGroup", "text") or "text"
            contentType = "text"
            if typeGroup == "image":
                contentType = "image"
            elif typeGroup in ("binary", "container"):
                contentType = "other"
            contentObjects.append({
                "contentObjectId": getattr(part, "id", ""),
                "contentType": contentType,
                "data": data,
                "contextRef": {
                    "containerPath": fileName,
                    "location": getattr(part, "label", None) or "attachment",
                    **(getattr(part, "metadata", None) or {}),
                },
            })
        if not contentObjects:
            result.skippedPolicy += 1
            continue
        try:
            await knowledgeService.requestIngestion(
                IngestionJob(
                    sourceKind="outlook_attachment",
                    sourceId=syntheticId,
                    fileName=fileName,
                    mimeType=mimeType,
                    userId=userId,
                    mandateId=mandateId,
                    contentObjects=contentObjects,
                    neutralize=limits.neutralize,
                    provenance={
                        "connectionId": connectionId,
                        "authority": "msft",
                        "service": "outlook",
                        "parentId": parentSyntheticId,
                        "externalItemId": attachmentId,
                        "parentMessageId": messageId,
                    },
                )
            )
            result.attachmentsIndexed += 1
        except Exception as exc:
            logger.warning("outlook attachment ingest %s failed: %s", attachmentId, exc)
            result.failed += 1
 def _finalizeResult(connectionId: str, result: OutlookBootstrapResult, startMs: float) -> Dict[str, Any]:
    durationMs = int((time.time() - startMs) * 1000)
    logger.info(
        "ingestion.connection.bootstrap.done part=outlook connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d attachments=%d failed=%d durationMs=%d",
        connectionId,
        result.indexed, result.skippedDuplicate, result.skippedPolicy,
        result.attachmentsIndexed, result.failed, durationMs,
        extra={
            "event": "ingestion.connection.bootstrap.done",
            "part": "outlook",
            "connectionId": connectionId,
            "indexed": result.indexed,
            "skippedDup": result.skippedDuplicate,
            "skippedPolicy": result.skippedPolicy,
            "attachmentsIndexed": result.attachmentsIndexed,
            "failed": result.failed,
            "durationMs": durationMs,
        },
    )
    return {
        "connectionId": result.connectionId,
        "indexed": result.indexed,
        "skippedDuplicate": result.skippedDuplicate,
        "skippedPolicy": result.skippedPolicy,
        "attachmentsIndexed": result.attachmentsIndexed,
        "failed": result.failed,
        "durationMs": durationMs,
        "errors": result.errors[:20],
    }
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py
@ -0,0 +1,433 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """SharePoint bootstrap for the unified knowledge ingestion lane.
 Walks the SharePoint drive(s) reachable via a UserConnection, downloads each
 file-like item, runs the standard content extraction pipeline and hands the
 result to `KnowledgeService.requestIngestion`. Idempotency is provided by the
 ingestion façade itself; repeat bootstraps therefore produce
 `ingestion.skipped.duplicate` for every unchanged item because we pass the
 Graph `eTag` as `contentVersion`.
 """
 from __future__ import annotations
 import asyncio
 import hashlib
 import logging
 import time
 from dataclasses import dataclass, field
 from typing import Any, Callable, Dict, List, Optional
 from modules.datamodels.datamodelExtraction import ExtractionOptions
 logger = logging.getLogger(__name__)
 MAX_ITEMS_DEFAULT = 500
 MAX_BYTES_DEFAULT = 200 * 1024 * 1024
 MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
 SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
 MAX_DEPTH_DEFAULT = 4
 MAX_SITES_DEFAULT = 3
@dataclass
 class SharepointBootstrapLimits:
    maxItems: int = MAX_ITEMS_DEFAULT
    maxBytes: int = MAX_BYTES_DEFAULT
    maxFileSize: int = MAX_FILE_SIZE_DEFAULT
    skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
    maxDepth: int = MAX_DEPTH_DEFAULT
    maxSites: int = MAX_SITES_DEFAULT
    # Pass-through to IngestionJob.neutralize
    neutralize: bool = False
@dataclass
 class SharepointBootstrapResult:
    connectionId: str
    indexed: int = 0
    skippedDuplicate: int = 0
    skippedPolicy: int = 0
    failed: int = 0
    bytesProcessed: int = 0
    errors: List[str] = field(default_factory=list)
 def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
    """Deterministic synthetic FileContentIndex id for a SharePoint item.
    Stable across bootstraps → idempotency works; independent of file name so
    moves/renames don't duplicate chunks.
    """
    token = hashlib.sha256(f"{connectionId}:{externalItemId}".encode("utf-8")).hexdigest()[:16]
    return f"sp:{connectionId[:8]}:{token}"
 def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
    """Translate ExtractionResult → content objects accepted by requestIngestion."""
    parts = getattr(extracted, "parts", None) or []
    out: List[Dict[str, Any]] = []
    for part in parts:
        data = getattr(part, "data", None) or ""
        if not data or not str(data).strip():
            continue
        typeGroup = getattr(part, "typeGroup", "text") or "text"
        contentType = "text"
        if typeGroup == "image":
            contentType = "image"
        elif typeGroup in ("binary", "container"):
            contentType = "other"
        out.append({
            "contentObjectId": getattr(part, "id", ""),
            "contentType": contentType,
            "data": data,
            "contextRef": {
                "containerPath": fileName,
                "location": getattr(part, "label", None) or "file",
                **(getattr(part, "metadata", None) or {}),
            },
        })
    return out
 async def bootstrapSharepoint(
    connectionId: str,
    *,
    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
    adapter: Any = None,
    connection: Any = None,
    knowledgeService: Any = None,
    limits: Optional[SharepointBootstrapLimits] = None,
    runExtractionFn: Optional[Callable[..., Any]] = None,
 ) -> Dict[str, Any]:
    """Enumerate SharePoint drives and ingest every reachable file via the façade.
    Parameters allow injection for tests; production callers pass only
    `connectionId` (and optionally a progressCb) and everything else is
    resolved against the registered services.
    """
    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
    prefs = loadConnectionPrefs(connectionId)
    if not limits:
        limits = SharepointBootstrapLimits(neutralize=prefs.neutralizeBeforeEmbed)
    startMs = time.time()
    result = SharepointBootstrapResult(connectionId=connectionId)
    logger.info(
        "ingestion.connection.bootstrap.started part=sharepoint connectionId=%s",
        connectionId,
        extra={
            "event": "ingestion.connection.bootstrap.started",
            "part": "sharepoint",
            "connectionId": connectionId,
        },
    )
    if adapter is None or knowledgeService is None or connection is None:
        adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
    if runExtractionFn is None:
        from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
        from modules.serviceCenter.services.serviceExtraction.subRegistry import (
            ExtractorRegistry, ChunkerRegistry,
        )
        extractorRegistry = ExtractorRegistry()
        chunkerRegistry = ChunkerRegistry()
        def runExtractionFn(bytesData, name, mime, options):  # type: ignore[no-redef]
            return runExtraction(extractorRegistry, chunkerRegistry, bytesData, name, mime, options)
    mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
    userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
    try:
        sites = await adapter.browse("/", limit=limits.maxSites)
    except Exception as exc:
        logger.error("sharepoint site discovery failed for %s: %s", connectionId, exc, exc_info=True)
        result.errors.append(f"site_discovery: {exc}")
        return _finalizeResult(connectionId, result, startMs)
    for site in sites[: limits.maxSites]:
        if result.indexed + result.skippedDuplicate >= limits.maxItems:
            break
        sitePath = getattr(site, "path", "") or ""
        try:
            await _walkFolder(
                adapter=adapter,
                knowledgeService=knowledgeService,
                runExtractionFn=runExtractionFn,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                folderPath=sitePath,
                depth=0,
                limits=limits,
                result=result,
                progressCb=progressCb,
            )
        except Exception as exc:
            logger.error("sharepoint walk failed for site %s: %s", sitePath, exc, exc_info=True)
            result.errors.append(f"walk({sitePath}): {exc}")
    return _finalizeResult(connectionId, result, startMs)
 async def _resolveDependencies(connectionId: str):
    """Load connection, instantiate SharepointAdapter, and build a KnowledgeService.
    Runs with root privileges: bootstrap is a system operation triggered by an
    authenticated user via callback; it must not be gated by a per-user
    service-center context.
    """
    from modules.interfaces.interfaceDbApp import getRootInterface
    from modules.auth import TokenManager
    from modules.connectors.providerMsft.connectorMsft import MsftConnector
    from modules.serviceCenter import getService
    from modules.serviceCenter.context import ServiceCenterContext
    from modules.security.rootAccess import getRootUser
    rootInterface = getRootInterface()
    connection = rootInterface.getUserConnectionById(connectionId)
    if connection is None:
        raise ValueError(f"UserConnection not found: {connectionId}")
    token = TokenManager().getFreshToken(connectionId)
    if not token or not token.tokenAccess:
        raise ValueError(f"No valid token for connection {connectionId}")
    provider = MsftConnector(connection, token.tokenAccess)
    adapter = provider.getServiceAdapter("sharepoint")
    rootUser = getRootUser()
    ctx = ServiceCenterContext(
        user=rootUser,
        mandate_id=str(getattr(connection, "mandateId", "") or ""),
    )
    knowledgeService = getService("knowledge", ctx)
    return adapter, connection, knowledgeService
 async def _walkFolder(
    *,
    adapter,
    knowledgeService,
    runExtractionFn,
    connectionId: str,
    mandateId: str,
    userId: str,
    folderPath: str,
    depth: int,
    limits: SharepointBootstrapLimits,
    result: SharepointBootstrapResult,
    progressCb: Optional[Callable[[int, Optional[str]], None]],
 ) -> None:
    if depth > limits.maxDepth:
        return
    try:
        entries = await adapter.browse(folderPath)
    except Exception as exc:
        logger.warning("sharepoint browse %s failed: %s", folderPath, exc)
        result.errors.append(f"browse({folderPath}): {exc}")
        return
    for entry in entries:
        if result.indexed + result.skippedDuplicate >= limits.maxItems:
            return
        if result.bytesProcessed >= limits.maxBytes:
            return
        entryPath = getattr(entry, "path", "") or ""
        if getattr(entry, "isFolder", False):
            await _walkFolder(
                adapter=adapter,
                knowledgeService=knowledgeService,
                runExtractionFn=runExtractionFn,
                connectionId=connectionId,
                mandateId=mandateId,
                userId=userId,
                folderPath=entryPath,
                depth=depth + 1,
                limits=limits,
                result=result,
                progressCb=progressCb,
            )
            continue
        mimeType = getattr(entry, "mimeType", None) or "application/octet-stream"
        if any(mimeType.startswith(prefix) for prefix in limits.skipMimePrefixes):
            result.skippedPolicy += 1
            continue
        size = int(getattr(entry, "size", 0) or 0)
        if size and size > limits.maxFileSize:
            result.skippedPolicy += 1
            continue
        metadata = getattr(entry, "metadata", {}) or {}
        externalItemId = metadata.get("id") or entryPath
        revision = metadata.get("revision") or metadata.get("lastModifiedDateTime")
        await _ingestOne(
            adapter=adapter,
            knowledgeService=knowledgeService,
            runExtractionFn=runExtractionFn,
            connectionId=connectionId,
            mandateId=mandateId,
            userId=userId,
            entry=entry,
            entryPath=entryPath,
            mimeType=mimeType,
            externalItemId=externalItemId,
            revision=revision,
            limits=limits,
            result=result,
            progressCb=progressCb,
        )
 async def _ingestOne(
    *,
    adapter,
    knowledgeService,
    runExtractionFn,
    connectionId: str,
    mandateId: str,
    userId: str,
    entry,
    entryPath: str,
    mimeType: str,
    externalItemId: str,
    revision: Optional[str],
    limits: SharepointBootstrapLimits,
    result: SharepointBootstrapResult,
    progressCb: Optional[Callable[[int, Optional[str]], None]],
 ) -> None:
    from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
    syntheticFileId = _syntheticFileId(connectionId, externalItemId)
    fileName = getattr(entry, "name", "") or externalItemId
    try:
        fileBytes = await adapter.download(entryPath)
    except Exception as exc:
        logger.warning("sharepoint download %s failed: %s", entryPath, exc)
        result.failed += 1
        result.errors.append(f"download({entryPath}): {exc}")
        return
    if not fileBytes:
        result.failed += 1
        return
    result.bytesProcessed += len(fileBytes)
    try:
        extracted = runExtractionFn(
            fileBytes, fileName, mimeType,
            ExtractionOptions(mergeStrategy=None),
        )
    except Exception as exc:
        logger.warning("sharepoint extraction %s failed: %s", entryPath, exc)
        result.failed += 1
        result.errors.append(f"extract({entryPath}): {exc}")
        return
    contentObjects = _toContentObjects(extracted, fileName)
    if not contentObjects:
        result.skippedPolicy += 1
        return
    provenance: Dict[str, Any] = {
        "connectionId": connectionId,
        "authority": "msft",
        "service": "sharepoint",
        "externalItemId": externalItemId,
        "externalPath": entryPath,
        "revision": revision,
    }
    try:
        handle = await knowledgeService.requestIngestion(
            IngestionJob(
                sourceKind="sharepoint_item",
                sourceId=syntheticFileId,
                fileName=fileName,
                mimeType=mimeType,
                userId=userId,
                mandateId=mandateId,
                contentObjects=contentObjects,
                contentVersion=revision,
                neutralize=limits.neutralize,
                provenance=provenance,
            )
        )
    except Exception as exc:
        logger.error("sharepoint ingestion %s failed: %s", entryPath, exc, exc_info=True)
        result.failed += 1
        result.errors.append(f"ingest({entryPath}): {exc}")
        return
    if handle.status == "duplicate":
        result.skippedDuplicate += 1
    elif handle.status == "indexed":
        result.indexed += 1
    else:
        result.failed += 1
        if handle.error:
            result.errors.append(f"ingest({entryPath}): {handle.error}")
    if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
        processed = result.indexed + result.skippedDuplicate
        try:
            progressCb(
                min(90, 10 + int(80 * processed / max(1, limits.maxItems))),
                f"sharepoint processed={processed}",
            )
        except Exception:
            pass
        logger.info(
            "ingestion.connection.bootstrap.progress part=sharepoint processed=%d skippedDup=%d failed=%d",
            processed, result.skippedDuplicate, result.failed,
            extra={
                "event": "ingestion.connection.bootstrap.progress",
                "part": "sharepoint",
                "connectionId": connectionId,
                "processed": processed,
                "skippedDup": result.skippedDuplicate,
                "failed": result.failed,
            },
        )
    # Yield so the event loop can interleave other tasks (download/extract are
    # CPU-ish and extraction uses sync libs; cooperative scheduling prevents
    # starving other workers).
    await asyncio.sleep(0)
 def _finalizeResult(connectionId: str, result: SharepointBootstrapResult, startMs: float) -> Dict[str, Any]:
    durationMs = int((time.time() - startMs) * 1000)
    logger.info(
        "ingestion.connection.bootstrap.done part=sharepoint connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d",
        connectionId,
        result.indexed, result.skippedDuplicate, result.skippedPolicy, result.failed,
        durationMs,
        extra={
            "event": "ingestion.connection.bootstrap.done",
            "part": "sharepoint",
            "connectionId": connectionId,
            "indexed": result.indexed,
            "skippedDup": result.skippedDuplicate,
            "skippedPolicy": result.skippedPolicy,
            "failed": result.failed,
            "durationMs": durationMs,
        },
    )
    return {
        "connectionId": result.connectionId,
        "indexed": result.indexed,
        "skippedDuplicate": result.skippedDuplicate,
        "skippedPolicy": result.skippedPolicy,
        "failed": result.failed,
        "bytesProcessed": result.bytesProcessed,
        "durationMs": durationMs,
        "errors": result.errors[:20],
    }
--- a/modules/serviceCenter/services/serviceKnowledge/subTextClean.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subTextClean.py
@ -0,0 +1,107 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Text normalisation utilities used by knowledge ingestion.
 The email body cleaning logic is intentionally regex-based and works on plain
 text after an HTML→text pass so we never store unsanitised HTML/JS in the
 knowledge store and retrieval stays robust (no extraneous markup tokens
 eating embedding budget).
 """
 from __future__ import annotations
 import re
 from typing import Optional
 DEFAULT_MAX_CHARS = 8000
 _QUOTE_MARKER_PATTERNS = [
    re.compile(r"^\s*(?:On\s.+?\swrote:)\s*$", re.MULTILINE | re.IGNORECASE),
    re.compile(r"^\s*(?:Am\s.+?\sschrieb.+?:)\s*$", re.MULTILINE | re.IGNORECASE),
    re.compile(r"^\s*-{2,}\s*Original\s*Message\s*-{2,}\s*$", re.MULTILINE | re.IGNORECASE),
    re.compile(r"^\s*-{2,}\s*Urspr.+Nachricht\s*-{2,}\s*$", re.MULTILINE | re.IGNORECASE),
    re.compile(r"^\s*From:\s+.+$", re.MULTILINE | re.IGNORECASE),
    re.compile(r"^\s*Von:\s+.+$", re.MULTILINE | re.IGNORECASE),
    re.compile(r"^\s*Sent:\s+.+$", re.MULTILINE | re.IGNORECASE),
    re.compile(r"^\s*Gesendet:\s+.+$", re.MULTILINE | re.IGNORECASE),
 ]
 _SIGNATURE_MARKERS = [
    re.compile(r"^\s*-{2,}\s*$", re.MULTILINE),
    re.compile(r"^\s*—\s*$", re.MULTILINE),
    re.compile(r"^\s*Best regards\b.*$", re.MULTILINE | re.IGNORECASE),
    re.compile(r"^\s*Kind regards\b.*$", re.MULTILINE | re.IGNORECASE),
    re.compile(r"^\s*Mit freundlichen Gr[üu]ßen\b.*$", re.MULTILINE | re.IGNORECASE),
    re.compile(r"^\s*Viele Gr[üu]ße\b.*$", re.MULTILINE | re.IGNORECASE),
    re.compile(r"^\s*Best,\s*$", re.MULTILINE | re.IGNORECASE),
 ]
 def _htmlToText(html: str) -> str:
    """Prefer BeautifulSoup when available, fall back to regex."""
    try:
        from bs4 import BeautifulSoup  # type: ignore
        soup = BeautifulSoup(html, "html.parser")
        for tag in soup(["script", "style", "head"]):
            tag.decompose()
        for br in soup.find_all(["br"]):
            br.replace_with("\n")
        for p in soup.find_all(["p", "div", "li", "tr"]):
            p.append("\n")
        text = soup.get_text()
    except Exception:
        # Minimal fallback: strip tags crudely.
        text = re.sub(r"<br\s*/?>", "\n", html, flags=re.IGNORECASE)
        text = re.sub(r"</(?:p|div|li|tr)>", "\n", text, flags=re.IGNORECASE)
        text = re.sub(r"<[^>]+>", "", text)
    # Collapse non-breaking + zero-width whitespace.
    text = text.replace("\u00a0", " ").replace("\u200b", "")
    return text
 def _stripQuotedThread(text: str) -> str:
    """Remove reply-chain content so only the author's own contribution remains."""
    earliest = len(text)
    for pattern in _QUOTE_MARKER_PATTERNS:
        match = pattern.search(text)
        if match and match.start() < earliest:
            earliest = match.start()
    # Drop any block starting with "> " quoted lines (often Gmail/Thunderbird).
    quotedBlock = re.search(r"^(?:\s*>.*\n?)+", text, re.MULTILINE)
    if quotedBlock and quotedBlock.start() < earliest:
        earliest = quotedBlock.start()
    return text[:earliest].rstrip()
 def _stripSignature(text: str) -> str:
    earliest = len(text)
    for pattern in _SIGNATURE_MARKERS:
        match = pattern.search(text)
        if match and match.start() < earliest:
            earliest = match.start()
    return text[:earliest].rstrip()
 def _collapseWhitespace(text: str) -> str:
    text = re.sub(r"[ \t]+", " ", text)
    text = re.sub(r"\n{3,}", "\n\n", text)
    return text.strip()
 def cleanEmailBody(html: str, maxChars: Optional[int] = DEFAULT_MAX_CHARS) -> str:
    """Return a compact plain-text view of an email body suitable for embedding.
    Steps: HTML → text, remove quoted reply chain, remove signature, collapse
    whitespace, truncate to maxChars. Always returns a string (possibly empty).
    """
    if not html:
        return ""
    text = _htmlToText(html) if "<" in html and ">" in html else html
    text = _stripQuotedThread(text)
    text = _stripSignature(text)
    text = _collapseWhitespace(text)
    if maxChars and len(text) > maxChars:
        text = text[:maxChars].rstrip() + "…"
    return text
--- a/modules/workflows/methods/methodAi/actions/process.py
+++ b/modules/workflows/methods/methodAi/actions/process.py
@ -100,12 +100,18 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
        # Update progress - preparing parameters
        self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
-        from modules.datamodels.datamodelDocref import DocumentReferenceList
+        from modules.datamodels.datamodelDocref import (
-        
+            DocumentReferenceList,
            coerceDocumentReferenceList,
        )
        documentListParam = parameters.get("documentList")
        inline_content_parts: Optional[List[ContentPart]] = None
-        
+
-        # Handle inline ActionDocuments (e.g. from SharePoint/email in automation2 – no persistence)
+        # Inline ActionDocuments (SharePoint/email in automation2, no
        # persistence) are list[ActionDocument-like dict] -- handled
        # separately because they carry pre-extracted content. Everything
        # else is normalised through the tolerant coercer.
        is_inline = (
            isinstance(documentListParam, list)
            and len(documentListParam) > 0
@ -117,28 +123,12 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
            logger.info(
                f"ai.process: Extracted {len(inline_content_parts)} ContentParts from {len(documentListParam)} inline ActionDocuments (no persistence)"
            )
        elif documentListParam is None:
            documentList = DocumentReferenceList(references=[])
            logger.debug(f"ai.process: documentList is None, using empty DocumentReferenceList")
        elif isinstance(documentListParam, DocumentReferenceList):
            documentList = documentListParam
            logger.info(f"ai.process: Received DocumentReferenceList with {len(documentList.references)} references")
            for idx, ref in enumerate(documentList.references):
                logger.info(f"  Reference {idx + 1}: documentId={ref.documentId}, type={type(ref).__name__}")
        elif isinstance(documentListParam, str):
            documentList = DocumentReferenceList.from_string_list([documentListParam])
            logger.info(f"ai.process: Converted string to DocumentReferenceList with {len(documentList.references)} references")
        elif isinstance(documentListParam, list):
            first = documentListParam[0] if documentListParam else None
            logger.info(
                f"ai.process: documentList is list of {len(documentListParam)} items, "
                f"first type={type(first).__name__}, has_documentData={_is_action_document_like(first) if first else False}"
            )
            documentList = DocumentReferenceList.from_string_list(documentListParam)
            logger.info(f"ai.process: Converted list to DocumentReferenceList with {len(documentList.references)} references")
        else:
-            logger.error(f"Invalid documentList type: {type(documentListParam)}")
+            documentList = coerceDocumentReferenceList(documentListParam)
-            documentList = DocumentReferenceList(references=[])
+            logger.info(
                f"ai.process: Coerced documentList ({type(documentListParam).__name__}) "
                f"to DocumentReferenceList with {len(documentList.references)} references"
            )
        # Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only.
        resultType = parameters.get("resultType")
--- a/modules/workflows/methods/methodContext/actions/extractContent.py
+++ b/modules/workflows/methods/methodContext/actions/extractContent.py
@ -5,7 +5,10 @@ import logging
 import time
 from typing import Dict, Any
 from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-from modules.datamodels.datamodelDocref import DocumentReferenceList
+from modules.datamodels.datamodelDocref import (
    DocumentReferenceList,
    coerceDocumentReferenceList,
 )
 from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
 logger = logging.getLogger(__name__)
@ -16,20 +19,17 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
        workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
        operationId = f"context_extract_{workflowId}_{int(time.time())}"
        # Extract documentList from parameters dict
        documentListParam = parameters.get("documentList")
        if not documentListParam:
            return ActionResult.isFailure(error="documentList is required")
-        
+
-        # Convert to DocumentReferenceList if needed
+        documentList = coerceDocumentReferenceList(documentListParam)
-        if isinstance(documentListParam, DocumentReferenceList):
+        if not documentList.references:
-            documentList = documentListParam
+            return ActionResult.isFailure(
-        elif isinstance(documentListParam, str):
+                error=f"documentList could not be parsed (type={type(documentListParam).__name__}); "
-            documentList = DocumentReferenceList.from_string_list([documentListParam])
+                      f"expected DocumentReferenceList, list of strings/dicts, or "
-        elif isinstance(documentListParam, list):
+                      f"a wrapper dict like {{'documents': [...]}}"
-            documentList = DocumentReferenceList.from_string_list(documentListParam)
+            )
        else:
            return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
        # Start progress tracking
        parentOperationId = parameters.get('parentOperationId')
--- a/modules/workflows/methods/methodContext/actions/neutralizeData.py
+++ b/modules/workflows/methods/methodContext/actions/neutralizeData.py
@ -5,7 +5,10 @@ import logging
 import time
 from typing import Dict, Any
 from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-from modules.datamodels.datamodelDocref import DocumentReferenceList
+from modules.datamodels.datamodelDocref import (
    DocumentReferenceList,
    coerceDocumentReferenceList,
 )
 from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
 logger = logging.getLogger(__name__)
@ -26,20 +29,15 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
        if not neutralizationEnabled:
            logger.info("Neutralization is not enabled, returning documents unchanged")
            # Return original documents if neutralization is disabled
            # Get documents from documentList
            documentListParam = parameters.get("documentList")
            if not documentListParam:
                return ActionResult.isFailure(error="documentList is required")
-            
+
-            # Convert to DocumentReferenceList if needed
+            documentList = coerceDocumentReferenceList(documentListParam)
-            if isinstance(documentListParam, DocumentReferenceList):
+            if not documentList.references:
-                documentList = documentListParam
+                return ActionResult.isFailure(
-            elif isinstance(documentListParam, str):
+                    error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
-                documentList = DocumentReferenceList.from_string_list([documentListParam])
+                )
            elif isinstance(documentListParam, list):
                documentList = DocumentReferenceList.from_string_list(documentListParam)
            else:
                return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
            # Get ChatDocuments from documentList
            chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
@ -65,20 +63,15 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
            return ActionResult.isSuccess(documents=actionDocuments)
        # Extract documentList from parameters dict
        documentListParam = parameters.get("documentList")
        if not documentListParam:
            return ActionResult.isFailure(error="documentList is required")
-        
+
-        # Convert to DocumentReferenceList if needed
+        documentList = coerceDocumentReferenceList(documentListParam)
-        if isinstance(documentListParam, DocumentReferenceList):
+        if not documentList.references:
-            documentList = documentListParam
+            return ActionResult.isFailure(
-        elif isinstance(documentListParam, str):
+                error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
-            documentList = DocumentReferenceList.from_string_list([documentListParam])
+            )
        elif isinstance(documentListParam, list):
            documentList = DocumentReferenceList.from_string_list(documentListParam)
        else:
            return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
        # Start progress tracking
        parentOperationId = parameters.get('parentOperationId')
--- a/modules/workflows/processing/core/actionExecutor.py
+++ b/modules/workflows/processing/core/actionExecutor.py
@ -9,6 +9,9 @@ from modules.datamodels.datamodelChat import ActionResult, ActionItem, TaskStep
 from modules.datamodels.datamodelChat import ChatWorkflow
 from modules.workflows.processing.shared.methodDiscovery import methods
 from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
 from modules.workflows.processing.shared.parameterValidation import (
    InvalidActionParameterError, validateAndCoerceParameters,
 )
 logger = logging.getLogger(__name__)
@ -20,20 +23,32 @@ class ActionExecutor:
    async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
-        """Execute a method action"""
+        """Execute a method action with validated/coerced parameters.
        Parameter validation is centralised here so the contract holds for
        every execution path (agent tool calls, workflow graph nodes,
        REST routes) — actions can rely on declared types without
        defensive isinstance branches.
        """
        try:
            if methodName not in methods:
                raise ValueError(f"Unknown method: {methodName}")
-                
+
            method = methods[methodName]
            if actionName not in method['actions']:
                raise ValueError(f"Unknown action: {actionName} for method {methodName}")
-                
+
            action = method['actions'][actionName]
-            
+
-            # Execute the action
+            actionDef = method['instance']._actions.get(actionName)
            if actionDef is not None:
                parameters = validateAndCoerceParameters(actionDef, parameters or {})
            return await action['method'](parameters)
-            
+
        except InvalidActionParameterError as e:
            logger.error(f"Invalid parameters for {methodName}.{actionName}: {e}")
            raise
        except Exception as e:
            logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}")
            raise
--- a/modules/workflows/processing/shared/parameterValidation.py
+++ b/modules/workflows/processing/shared/parameterValidation.py
@ -0,0 +1,198 @@
 # Copyright (c) 2026 Patrick Motsch
 # All rights reserved.
 """Universal parameter validation + coercion for workflow actions.
 Workflow actions historically received their ``parameters`` as a raw
 ``Dict[str, Any]`` with no enforcement of the declared parameter schema.
 That implicit contract masked two whole classes of bugs:
 1. **Type confusion at the agent boundary.** The agent's tool schema
   (Phase-3 Typed Action Architecture) exposes ``FeatureInstanceRef`` /
   ``ConnectionRef`` etc. as typed *objects* with ``id`` plus a
   discriminator (``featureCode`` / ``authority``) so the LLM can pick
   the right instance among several. The action implementations, however,
   use the value as a bare UUID string in ``recordFilter={"col": <value>}``.
   Without normalization Postgres fails with "can't adapt type 'dict'",
   the connector's previous swallow-and-return-[] hid the failure, and the
   action returned the misleading "no record found" error.
 2. **Unchecked optional flags.** ``forceRefresh`` arriving as the string
   ``"true"`` instead of a real bool, ``periodMonth`` arriving as ``"12"``
   instead of ``12``, etc.  Every action grew its own ad-hoc coercion code.
 This module centralises validation and coercion at exactly one boundary:
 ``ActionExecutor.executeAction``. By the time the action body runs, the
 ``parameters`` dict is guaranteed to satisfy the declared schema.
 Unknown extra keys (e.g. ``parentOperationId`` injected by the executor,
 ``expectedDocumentFormats`` from action items) are passed through
 untouched — the schema only constrains *declared* parameters.
 """
 from __future__ import annotations
 import logging
 from typing import Any, Dict, Optional
 logger = logging.getLogger(__name__)
 class InvalidActionParameterError(ValueError):
    """Raised when a declared action parameter is missing, malformed, or
    cannot be coerced into the declared type.
    The message identifies the action and parameter so the agent and
    workflow log can pinpoint the offending call instead of getting an
    opaque downstream "no record found" or "can't adapt type 'X'".
    """
    def __init__(self, actionId: str, paramName: str, reason: str):
        super().__init__(f"{actionId}.{paramName}: {reason}")
        self.actionId = actionId
        self.paramName = paramName
        self.reason = reason
 _TRUE_STRINGS = {"true", "1", "yes", "on"}
 _FALSE_STRINGS = {"false", "0", "no", "off", ""}
 def _isRefSchema(typeStr: str) -> bool:
    """A declared type is a Ref-Schema iff its name ends with ``Ref`` AND it
    resolves to a PORT_TYPE_CATALOG schema with an ``id`` field.
    The catalog is imported lazily to keep this module light at startup.
    """
    if not typeStr or not typeStr.endswith("Ref"):
        return False
    from modules.features.graphicalEditor.portTypes import PORT_TYPE_CATALOG
    schema = PORT_TYPE_CATALOG.get(typeStr)
    if schema is None:
        return False
    return any(f.name == "id" for f in schema.fields)
 def _coerceRef(actionId: str, paramName: str, value: Any) -> Optional[str]:
    """Collapse a Ref payload to its ``id`` string.
    Accepts:
      * already a string → returned as-is (workflow execution path),
      * dict with non-empty ``id`` field → returns the id (agent path),
      * ``None`` → returned as-is so optional Ref params stay optional.
    """
    if value is None or isinstance(value, str):
        return value
    if isinstance(value, dict):
        refId = value.get("id")
        if isinstance(refId, str) and refId:
            return refId
        raise InvalidActionParameterError(
            actionId, paramName,
            f"Ref payload missing or empty 'id' field: {value!r}",
        )
    raise InvalidActionParameterError(
        actionId, paramName,
        f"Ref must be a string id or {{'id': ...}} dict, got {type(value).__name__}",
    )
 def _coercePrimitive(actionId: str, paramName: str, value: Any, typeStr: str) -> Any:
    """Best-effort coercion of primitive types from string-form payloads.
    The agent's JSON tool calls deliver everything as strings/numbers; the
    workflow executor passes through raw template values which are also
    often strings. Coercing here removes ad-hoc ``isinstance(x, str)``
    branches inside every action.
    """
    if value is None:
        return None
    if typeStr == "bool":
        if isinstance(value, bool):
            return value
        if isinstance(value, str):
            lower = value.strip().lower()
            if lower in _TRUE_STRINGS:
                return True
            if lower in _FALSE_STRINGS:
                return False
        if isinstance(value, (int, float)):
            return bool(value)
        raise InvalidActionParameterError(
            actionId, paramName, f"cannot coerce {value!r} to bool",
        )
    if typeStr == "int":
        if isinstance(value, bool):
            return int(value)
        if isinstance(value, int):
            return value
        if isinstance(value, str) and value.strip():
            try:
                return int(value.strip(), 10)
            except ValueError:
                pass
        if isinstance(value, float) and value.is_integer():
            return int(value)
        raise InvalidActionParameterError(
            actionId, paramName, f"cannot coerce {value!r} to int",
        )
    if typeStr == "float":
        if isinstance(value, (int, float)):
            return float(value)
        if isinstance(value, str) and value.strip():
            try:
                return float(value.strip())
            except ValueError:
                pass
        raise InvalidActionParameterError(
            actionId, paramName, f"cannot coerce {value!r} to float",
        )
    return value
 def validateAndCoerceParameters(actionDef, parameters: Dict[str, Any]) -> Dict[str, Any]:
    """Validate and coerce ``parameters`` against ``actionDef.parameters``.
    Behaviour per declared parameter:
    * **Missing + required** → raises ``InvalidActionParameterError``.
    * **Missing + optional** → left absent (action uses its own default).
    * **Present + Ref-Schema (e.g. FeatureInstanceRef)** → ``{id: ..., ...}``
      collapsed to the bare id string; pass-through if already a string.
    * **Present + primitive (bool/int/float)** → coerced from common
      string forms (e.g. ``"true"`` → ``True``).
    * **Present + other types** (catalog objects, ``str``, ``Any``,
      containers) → passed through untouched.
    Unknown keys (e.g. ``parentOperationId``, ``expectedDocumentFormats``,
    ad-hoc fields injected by the executor) are passed through unchanged.
    Returns a new dict (does not mutate the caller's parameters).
    """
    if not parameters:
        parameters = {}
    actionId = getattr(actionDef, "actionId", None) or "<unknown.action>"
    declared = getattr(actionDef, "parameters", {}) or {}
    coerced: Dict[str, Any] = dict(parameters)
    for paramName, paramSchema in declared.items():
        typeStr = getattr(paramSchema, "type", None) or "Any"
        required = bool(getattr(paramSchema, "required", False))
        if paramName not in coerced or coerced[paramName] is None:
            if required:
                raise InvalidActionParameterError(
                    actionId, paramName, "required parameter missing",
                )
            continue
        rawValue = coerced[paramName]
        if _isRefSchema(typeStr):
            coerced[paramName] = _coerceRef(actionId, paramName, rawValue)
            continue
        if typeStr in ("bool", "int", "float"):
            coerced[paramName] = _coercePrimitive(actionId, paramName, rawValue, typeStr)
            continue
    return coerced
--- a/tests/unit/aicore/init.py
+++ b/tests/unit/aicore/init.py
--- a/tests/unit/aicore/test_aicorePluginOpenai_temperature.py
+++ b/tests/unit/aicore/test_aicorePluginOpenai_temperature.py
@ -0,0 +1,66 @@
 # Copyright (c) 2026 Patrick Motsch
 # All rights reserved.
 """Unit tests: temperature handling for OpenAI chat-completions models.
 Historical regression: every payload sent ``temperature=0.2``. After the
 GPT-5 launch OpenAI rejects any non-default temperature for the GPT-5.x
 and o-series (o1/o3/o4) reasoning models with HTTP 400::
    "Unsupported value: 'temperature' does not support 0.2 with this
    model. Only the default (1) value is supported."
 The fix is a single helper, ``_supportsCustomTemperature``, that is
 consulted before adding the field to the outgoing payload. These tests
 pin the contract:
 * legacy chat models (gpt-4o, gpt-4o-mini, gpt-4.1, gpt-3.5-*) keep
  honoring custom temperatures,
 * every gpt-5.x and o1/o3/o4 variant must omit the field entirely.
 """
 from __future__ import annotations
 import pytest
 from modules.aicore.aicorePluginOpenai import _supportsCustomTemperature
 class TestSupportsCustomTemperature:
    """Pure model-name classification - no network, no payload assembly."""
    @pytest.mark.parametrize(
        "modelName",
        [
            "gpt-4o",
            "gpt-4o-mini",
            "gpt-4.1",
            "gpt-3.5-turbo",
            "text-embedding-3-small",
            "dall-e-3",
        ],
    )
    def testLegacyModelsAcceptCustomTemperature(self, modelName):
        assert _supportsCustomTemperature(modelName) is True
    @pytest.mark.parametrize(
        "modelName",
        [
            "gpt-5",
            "gpt-5.4",
            "gpt-5.4-mini",
            "gpt-5.4-nano",
            "gpt-5.5",
            "GPT-5.5",
            "o1",
            "o1-mini",
            "o3",
            "o3-mini",
            "o4-mini",
        ],
    )
    def testReasoningModelsRejectCustomTemperature(self, modelName):
        assert _supportsCustomTemperature(modelName) is False
    def testEmptyOrNoneModelDefaultsToSupported(self):
        # Defensive: unknown/empty names should not silently break legacy paths.
        assert _supportsCustomTemperature("") is True
        assert _supportsCustomTemperature(None) is True
--- a/tests/unit/connectors/init.py
+++ b/tests/unit/connectors/init.py
--- a/tests/unit/connectors/test_connectorDbPostgre_failLoud.py
+++ b/tests/unit/connectors/test_connectorDbPostgre_failLoud.py
@ -0,0 +1,158 @@
 # Copyright (c) 2026 Patrick Motsch
 # All rights reserved.
 """Unit tests: PostgreSQL connector raises DatabaseQueryError on real failures.
 Historical regression: ``getRecordset`` and friends used to swallow every
 exception (``except Exception: log; return []``), which turned every kind of
 broken query into "no rows found". That hid bugs like:
 * dict passed where Postgres expected a UUID string ("can't adapt type 'dict'"),
 * missing/renamed columns after an incomplete schema migration,
 * dropped tables, lost connections, etc.
 These tests pin the new contract: empty result sets still return ``[]`` /
 ``None`` (normal), but any exception inside the query path propagates as
 ``DatabaseQueryError`` with the table name attached. The transaction is
 rolled back so the connection is usable for subsequent queries.
 """
 from __future__ import annotations
 from unittest.mock import MagicMock
 import pytest
 import psycopg2.errors
 from modules.connectors.connectorDbPostgre import (
    DatabaseConnector,
    DatabaseQueryError,
    _rollbackQuietly,
 )
 class DummyTable:
    """Stand-in for a Pydantic model so we can drive the connector without a real DB.
    The connector reads ``model_class.__name__`` to derive the SQL table name,
    so the class name itself becomes the asserted table name in tests.
    """
    model_fields = {}
 def _makeConnector(cursorBehavior):
    """Build a ``DatabaseConnector`` skeleton with mocked connection/cursor.
    ``cursorBehavior`` is a callable invoked with the cursor mock so the test
    can configure ``execute``/``fetchall``/``fetchone`` per scenario.
    """
    connector = DatabaseConnector.__new__(DatabaseConnector)
    cursor = MagicMock()
    cursorContext = MagicMock()
    cursorContext.__enter__ = MagicMock(return_value=cursor)
    cursorContext.__exit__ = MagicMock(return_value=False)
    connection = MagicMock()
    connection.cursor.return_value = cursorContext
    connector.connection = connection
    connector._ensureTableExists = MagicMock(return_value=True)
    connector._systemTableName = "_system"
    cursorBehavior(cursor)
    return connector, connection, cursor
 class TestGetRecordsetFailLoud:
    def test_emptyResultStillReturnsList(self):
        """No rows → []; this is the normal happy path, not a failure."""
        def behavior(cursor):
            cursor.execute.return_value = None
            cursor.fetchall.return_value = []
        connector, connection, _ = _makeConnector(behavior)
        result = connector.getRecordset(DummyTable)
        assert result == []
        connection.rollback.assert_not_called()
    def test_dictAdaptErrorRaisesDatabaseQueryError(self):
        """Reproduces the Trustee bug: passing a dict in WHERE → can't adapt → raise."""
        def behavior(cursor):
            cursor.execute.side_effect = psycopg2.ProgrammingError(
                "can't adapt type 'dict'"
            )
        connector, connection, _ = _makeConnector(behavior)
        with pytest.raises(DatabaseQueryError) as excinfo:
            connector.getRecordset(
                DummyTable,
                recordFilter={"featureInstanceId": {"id": "uuid", "featureCode": "trustee"}},
            )
        assert excinfo.value.table == "DummyTable"
        assert "can't adapt type 'dict'" in str(excinfo.value)
        assert isinstance(excinfo.value.original, psycopg2.ProgrammingError)
        connection.rollback.assert_called_once()
    def test_missingColumnRaisesDatabaseQueryError(self):
        def behavior(cursor):
            cursor.execute.side_effect = psycopg2.errors.UndefinedColumn(
                'column "wat" does not exist'
            )
        connector, connection, _ = _makeConnector(behavior)
        with pytest.raises(DatabaseQueryError) as excinfo:
            connector.getRecordset(DummyTable, recordFilter={"wat": "x"})
        assert "wat" in str(excinfo.value)
        connection.rollback.assert_called_once()
    def test_operationalErrorRaisesDatabaseQueryError(self):
        """Connection lost mid-query is also a real failure that must propagate."""
        def behavior(cursor):
            cursor.execute.side_effect = psycopg2.OperationalError("connection lost")
        connector, connection, _ = _makeConnector(behavior)
        with pytest.raises(DatabaseQueryError):
            connector.getRecordset(DummyTable)
        connection.rollback.assert_called_once()
 class TestGetRecordFailLoud:
    def test_recordNotFoundReturnsNone(self):
        """`fetchone()` returning None is "row missing", not an error."""
        def behavior(cursor):
            cursor.execute.return_value = None
            cursor.fetchone.return_value = None
        connector, connection, _ = _makeConnector(behavior)
        result = connector.getRecord(DummyTable, "missing-id")
        assert result is None
        connection.rollback.assert_not_called()
    def test_queryErrorRaisesDatabaseQueryError(self):
        def behavior(cursor):
            cursor.execute.side_effect = psycopg2.errors.UndefinedTable(
                'relation "DummyTable" does not exist'
            )
        connector, connection, _ = _makeConnector(behavior)
        with pytest.raises(DatabaseQueryError) as excinfo:
            connector.getRecord(DummyTable, "any-id")
        assert excinfo.value.table == "DummyTable"
        connection.rollback.assert_called_once()
 class TestRollbackQuietly:
    def test_rollsBackOnLiveConnection(self):
        connection = MagicMock()
        _rollbackQuietly(connection)
        connection.rollback.assert_called_once()
    def test_swallowsRollbackError(self):
        """Rollback failure must not mask the original query error."""
        connection = MagicMock()
        connection.rollback.side_effect = RuntimeError("rollback failed")
        _rollbackQuietly(connection)
    def test_noopOnNoneConnection(self):
        _rollbackQuietly(None)
--- a/tests/unit/serviceAgent/test_action_tool_adapter_typed.py
+++ b/tests/unit/serviceAgent/test_action_tool_adapter_typed.py
@ -125,3 +125,10 @@ class TestConvertParameterSchema:
        schema = _convertParameterSchema(actionParams)
        assert schema["properties"]["connection"]["type"] == "object"
        assert "id" in schema["properties"]["connection"]["properties"]
 # Ref-payload normalization (collapsing `{id: ..., featureCode: ...}` to the
 # bare id string) is no longer the adapter's job — it moved to the central
 # `parameterValidation.validateAndCoerceParameters` invoked by
 # `ActionExecutor.executeAction`. Tests for that contract live in
 # `tests/unit/workflows/test_parameterValidation.py`.
--- a/tests/unit/services/test_bootstrap_clickup.py
+++ b/tests/unit/services/test_bootstrap_clickup.py
@ -0,0 +1,203 @@
 #!/usr/bin/env python3
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Bootstrap ClickUp tests with a fake service + knowledge service.
 Verifies:
 - Teams → spaces → lists (folderless + folder-based) → tasks traversal.
 - Each task produces a `requestIngestion` call with `sourceKind="clickup_task"`
  and header + description content-objects.
 - `date_updated` is forwarded as contentVersion → idempotency.
 - Recency filter drops tasks older than `maxAgeDays`.
 - maxWorkspaces / maxListsPerWorkspace / maxTasks caps are respected.
 """
 import asyncio
 import os
 import sys
 import time
 from types import SimpleNamespace
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
 from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup import (
    bootstrapClickup,
    ClickupBootstrapLimits,
    _syntheticTaskId,
 )
 def _nowMs(offsetDays: int = 0) -> str:
    return str(int((time.time() + offsetDays * 86400) * 1000))
 class _FakeClickupService:
    """Records API calls; serves a canned 1-team / 1-space / 1-list / 2-task layout."""
    def __init__(self, taskCount=2, oldTask=False):
        self._taskCount = taskCount
        self._oldTask = oldTask  # when True, the second task is 400 days old
        self.calls = []
    async def getAuthorizedTeams(self):
        self.calls.append(("getAuthorizedTeams",))
        return {"teams": [{"id": "team-1", "name": "Acme"}]}
    async def getSpaces(self, team_id: str):
        self.calls.append(("getSpaces", team_id))
        return {"spaces": [{"id": "space-1", "name": "Engineering"}]}
    async def getFolderlessLists(self, space_id: str):
        self.calls.append(("getFolderlessLists", space_id))
        return {"lists": [{"id": "list-1", "name": "Sprint 1"}]}
    async def getFolders(self, space_id: str):
        self.calls.append(("getFolders", space_id))
        return {"folders": [{"id": "folder-1", "name": "Subproject"}]}
    async def getListsInFolder(self, folder_id: str):
        self.calls.append(("getListsInFolder", folder_id))
        return {"lists": [{"id": "list-2", "name": "Sub-tasks"}]}
    async def getTasksInList(self, list_id: str, *, page=0, include_closed=False, subtasks=True):
        self.calls.append(("getTasksInList", list_id, page, include_closed))
        if page > 0:
            return {"tasks": []}
        tasks = []
        for i in range(self._taskCount):
            tid = f"{list_id}-task-{i}"
            offsetDays = -400 if (self._oldTask and i == 1) else 0
            tasks.append({
                "id": tid,
                "name": f"Task {i} of {list_id}",
                "description": f"Plain description for task {i}",
                "text_content": f"Rich content for task {i}",
                "status": {"status": "open" if i == 0 else "closed"},
                "assignees": [{"username": "alice"}],
                "tags": [{"name": "urgent"}],
                "date_updated": _nowMs(offsetDays),
                "date_created": _nowMs(-1),
                "url": f"https://app.clickup.com/t/{tid}",
            })
        return {"tasks": tasks}
 class _FakeKnowledgeService:
    def __init__(self, duplicateIds=None):
        self.calls = []
        self._duplicates = duplicateIds or set()
    async def requestIngestion(self, job):
        self.calls.append(job)
        status = "duplicate" if job.sourceId in self._duplicates else "indexed"
        return SimpleNamespace(
            jobId=job.sourceId, status=status, contentHash="h",
            fileId=job.sourceId, index=None, error=None,
        )
 def _adapter(svc):
    return SimpleNamespace(_svc=svc)
 def test_bootstrap_walks_team_space_lists_and_tasks():
    svc = _FakeClickupService(taskCount=2)
    knowledge = _FakeKnowledgeService()
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapClickup(
            connectionId="c1",
            adapter=_adapter(svc),
            connection=connection,
            knowledgeService=knowledge,
            limits=ClickupBootstrapLimits(maxAgeDays=None),
        )
    result = asyncio.run(_run())
    # 2 lists (folderless list-1 + folder's list-2) × 2 tasks each = 4 tasks
    assert result["indexed"] == 4
    assert result["workspaces"] == 1
    assert result["lists"] == 2
    sourceIds = {c.sourceId for c in knowledge.calls}
    assert len(sourceIds) == 4
    for job in knowledge.calls:
        assert job.sourceKind == "clickup_task"
        assert job.mimeType == "application/vnd.clickup.task+json"
        assert job.mandateId == "m1"
        assert job.provenance["connectionId"] == "c1"
        assert job.provenance["authority"] == "clickup"
        assert job.provenance["teamId"] == "team-1"
        assert job.contentVersion  # numeric millisecond string
        # At least the header content-object is present.
        ids = [co["contentObjectId"] for co in job.contentObjects]
        assert "header" in ids
 def test_bootstrap_reports_duplicates_on_second_run():
    svc = _FakeClickupService(taskCount=1)
    duplicates = {
        _syntheticTaskId("c1", "list-1-task-0"),
        _syntheticTaskId("c1", "list-2-task-0"),
    }
    knowledge = _FakeKnowledgeService(duplicateIds=duplicates)
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapClickup(
            connectionId="c1",
            adapter=_adapter(svc),
            connection=connection,
            knowledgeService=knowledge,
            limits=ClickupBootstrapLimits(maxAgeDays=None),
        )
    result = asyncio.run(_run())
    assert result["indexed"] == 0
    assert result["skippedDuplicate"] == 2
 def test_bootstrap_skips_tasks_older_than_maxAgeDays():
    svc = _FakeClickupService(taskCount=2, oldTask=True)
    knowledge = _FakeKnowledgeService()
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapClickup(
            connectionId="c1",
            adapter=_adapter(svc),
            connection=connection,
            knowledgeService=knowledge,
            limits=ClickupBootstrapLimits(maxAgeDays=180),
        )
    result = asyncio.run(_run())
    # 2 lists × (1 recent + 1 skipped old) = 2 indexed + 2 skippedPolicy
    assert result["indexed"] == 2
    assert result["skippedPolicy"] == 2
 def test_bootstrap_maxTasks_caps_ingestion():
    svc = _FakeClickupService(taskCount=2)
    knowledge = _FakeKnowledgeService()
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapClickup(
            connectionId="c1",
            adapter=_adapter(svc),
            connection=connection,
            knowledgeService=knowledge,
            limits=ClickupBootstrapLimits(maxAgeDays=None, maxTasks=3),
        )
    result = asyncio.run(_run())
    assert result["indexed"] == 3
 if __name__ == "__main__":
    test_bootstrap_walks_team_space_lists_and_tasks()
    test_bootstrap_reports_duplicates_on_second_run()
    test_bootstrap_skips_tasks_older_than_maxAgeDays()
    test_bootstrap_maxTasks_caps_ingestion()
    print("OK — bootstrapClickup tests passed")
--- a/tests/unit/services/test_bootstrap_gdrive.py
+++ b/tests/unit/services/test_bootstrap_gdrive.py
@ -0,0 +1,225 @@
 #!/usr/bin/env python3
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Bootstrap Google Drive tests with a fake adapter + knowledge service.
 Verifies:
 - Drive walk traverses root → subfolders, respecting `maxDepth`.
 - Every file triggers `requestIngestion` with `sourceKind="gdrive_item"`.
 - Duplicate runs (same modifiedTime revision) report `skippedDuplicate`.
 - Provenance carries `authority="google"` and the Drive file id.
 - Recency filter skips files older than `maxAgeDays`.
 """
 import asyncio
 import os
 import sys
 from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
 from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGdrive import (
    bootstrapGdrive,
    GdriveBootstrapLimits,
    _syntheticFileId,
 )
@dataclass
 class _ExtEntry:
    name: str
    path: str
    isFolder: bool = False
    size: Optional[int] = None
    mimeType: Optional[str] = None
    metadata: Dict[str, Any] = None
 def _today_iso(offsetDays: int = 0) -> str:
    return (datetime.now(timezone.utc) + timedelta(days=offsetDays)).strftime("%Y-%m-%dT%H:%M:%SZ")
 class _FakeDriveAdapter:
    """Minimal DriveAdapter stand-in.
    Layout:
        "/" (root) → 2 files + 1 folder (sub)
        "/sub_id" → 1 file
    """
    def __init__(self, recent_only: bool = True):
        self.downloaded: List[str] = []
        self._recent = _today_iso(0)
        self._old = _today_iso(-400)
        self._recent_only = recent_only
    async def browse(self, path: str, filter=None, limit=None):
        if path in ("/", "", "root"):
            return [
                _ExtEntry(
                    name="f1.txt", path="/f1", size=20,
                    mimeType="text/plain",
                    metadata={"id": "f1", "modifiedTime": self._recent},
                ),
                _ExtEntry(
                    name="f2.txt", path="/f2", size=20,
                    mimeType="text/plain",
                    metadata={"id": "f2", "modifiedTime": self._recent if self._recent_only else self._old},
                ),
                _ExtEntry(
                    name="Subfolder", path="/sub_id", isFolder=True,
                    mimeType="application/vnd.google-apps.folder",
                    metadata={"id": "sub_id", "modifiedTime": self._recent},
                ),
            ]
        if path == "/sub_id":
            return [
                _ExtEntry(
                    name="f3.txt", path="/f3", size=20,
                    mimeType="text/plain",
                    metadata={"id": "f3", "modifiedTime": self._recent},
                ),
            ]
        return []
    async def download(self, path: str) -> bytes:
        self.downloaded.append(path)
        return path.encode("utf-8")
 class _FakeKnowledgeService:
    def __init__(self, duplicateIds=None):
        self.calls: List[SimpleNamespace] = []
        self._duplicateIds = duplicateIds or set()
    async def requestIngestion(self, job):
        self.calls.append(job)
        status = "duplicate" if job.sourceId in self._duplicateIds else "indexed"
        return SimpleNamespace(
            jobId=f"{job.sourceKind}:{job.sourceId}",
            status=status, contentHash="h",
            fileId=job.sourceId, index=None, error=None,
        )
 def _fakeRunExtraction(data, name, mime, options):
    return SimpleNamespace(
        parts=[
            SimpleNamespace(
                id="p1",
                data=data.decode("utf-8") if isinstance(data, bytes) else str(data),
                typeGroup="text",
                label="page:1",
                metadata={"pageIndex": 0},
            )
        ]
    )
 def test_bootstrap_walks_drive_and_subfolders():
    adapter = _FakeDriveAdapter()
    knowledge = _FakeKnowledgeService()
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapGdrive(
            connectionId="c1",
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
            runExtractionFn=_fakeRunExtraction,
            limits=GdriveBootstrapLimits(maxAgeDays=None),
        )
    result = asyncio.run(_run())
    assert len(knowledge.calls) == 3
    sourceIds = {c.sourceId for c in knowledge.calls}
    assert sourceIds == {
        _syntheticFileId("c1", "f1"),
        _syntheticFileId("c1", "f2"),
        _syntheticFileId("c1", "f3"),
    }
    assert result["indexed"] == 3
    assert result["skippedDuplicate"] == 0
    assert adapter.downloaded == ["/f1", "/f2", "/f3"]
 def test_bootstrap_reports_duplicates_on_second_run():
    adapter = _FakeDriveAdapter()
    duplicateIds = {
        _syntheticFileId("c1", "f1"),
        _syntheticFileId("c1", "f2"),
        _syntheticFileId("c1", "f3"),
    }
    knowledge = _FakeKnowledgeService(duplicateIds=duplicateIds)
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapGdrive(
            connectionId="c1",
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
            runExtractionFn=_fakeRunExtraction,
            limits=GdriveBootstrapLimits(maxAgeDays=None),
        )
    result = asyncio.run(_run())
    assert result["indexed"] == 0
    assert result["skippedDuplicate"] == 3
 def test_bootstrap_skips_files_older_than_maxAgeDays():
    adapter = _FakeDriveAdapter(recent_only=False)  # f2 is 400 days old
    knowledge = _FakeKnowledgeService()
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapGdrive(
            connectionId="c1",
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
            runExtractionFn=_fakeRunExtraction,
            limits=GdriveBootstrapLimits(maxAgeDays=180),
        )
    result = asyncio.run(_run())
    assert result["indexed"] == 2  # f1, f3
    assert result["skippedPolicy"] == 1  # f2 filtered out
 def test_bootstrap_passes_connection_provenance():
    adapter = _FakeDriveAdapter()
    knowledge = _FakeKnowledgeService()
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapGdrive(
            connectionId="c1",
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
            runExtractionFn=_fakeRunExtraction,
            limits=GdriveBootstrapLimits(maxAgeDays=None),
        )
    asyncio.run(_run())
    for job in knowledge.calls:
        assert job.sourceKind == "gdrive_item"
        assert job.mandateId == "m1"
        assert job.provenance["connectionId"] == "c1"
        assert job.provenance["authority"] == "google"
        assert job.provenance["service"] == "drive"
        assert job.contentVersion  # modifiedTime ISO string
 if __name__ == "__main__":
    test_bootstrap_walks_drive_and_subfolders()
    test_bootstrap_reports_duplicates_on_second_run()
    test_bootstrap_skips_files_older_than_maxAgeDays()
    test_bootstrap_passes_connection_provenance()
    print("OK — bootstrapGdrive tests passed")
--- a/tests/unit/services/test_bootstrap_gmail.py
+++ b/tests/unit/services/test_bootstrap_gmail.py
@ -0,0 +1,240 @@
 #!/usr/bin/env python3
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Bootstrap Gmail tests with a fake googleGet + knowledge service.
 Verifies:
 - Default labels (INBOX + SENT) are traversed.
 - Each message produces a requestIngestion call with sourceKind=gmail_message
  and structured contentObjects (header / snippet / body).
 - Pagination via `nextPageToken` is followed.
 - historyId is forwarded as contentVersion → idempotency.
 - MIME body extraction walks nested parts (multipart/alternative).
 """
 import asyncio
 import base64
 import os
 import sys
 from types import SimpleNamespace
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
 from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
    bootstrapGmail,
    GmailBootstrapLimits,
    _syntheticMessageId,
    _buildContentObjects,
    _walkPayloadForBody,
 )
 def _b64url(text: str) -> str:
    return base64.urlsafe_b64encode(text.encode("utf-8")).decode("ascii").rstrip("=")
 def _msg(mid: str, subject: str = "Hi", body: str = "Hello world", historyId: str = "h1"):
    return {
        "id": mid,
        "threadId": f"thread-{mid}",
        "historyId": historyId,
        "internalDate": "1700000000000",
        "snippet": body[:120],
        "payload": {
            "headers": [
                {"name": "Subject", "value": subject},
                {"name": "From", "value": "Alice <a@x.com>"},
                {"name": "To", "value": "Bob <b@x.com>"},
                {"name": "Date", "value": "Tue, 01 Jan 2025 10:00:00 +0000"},
            ],
            "mimeType": "text/plain",
            "body": {"data": _b64url(body), "size": len(body)},
            "parts": [],
        },
    }
 class _FakeGoogleGet:
    """Records URLs + returns the wired-up page or message response."""
    def __init__(self, messages_by_label, paginated_label=None, page2=None):
        self._messages = messages_by_label
        self._paginated = paginated_label
        self._page2 = page2 or []
        self._served_first_page = set()
        self.requested = []
    async def __call__(self, url: str):
        self.requested.append(url)
        # List page: contains `/users/me/messages?labelIds=...`
        if "/users/me/messages?" in url:
            for label, msgs in self._messages.items():
                if f"labelIds={label}" in url:
                    if (
                        label == self._paginated
                        and label not in self._served_first_page
                    ):
                        self._served_first_page.add(label)
                        return {
                            "messages": [{"id": m["id"]} for m in msgs],
                            "nextPageToken": "token-2",
                        }
                    if label == self._paginated and "pageToken=token-2" in url:
                        return {
                            "messages": [{"id": m["id"]} for m in self._page2],
                        }
                    return {"messages": [{"id": m["id"]} for m in msgs]}
            return {"messages": []}
        # Detail fetch: /users/me/messages/{id}?format=full
        if "/users/me/messages/" in url and "format=full" in url:
            msgId = url.split("/users/me/messages/")[-1].split("?")[0]
            for msgs in self._messages.values():
                for m in msgs:
                    if m["id"] == msgId:
                        return m
            for m in self._page2:
                if m["id"] == msgId:
                    return m
        return {"error": "not found"}
 class _FakeKnowledgeService:
    def __init__(self, duplicateIds=None):
        self.calls = []
        self._duplicates = duplicateIds or set()
    async def requestIngestion(self, job):
        self.calls.append(job)
        status = "duplicate" if job.sourceId in self._duplicates else "indexed"
        return SimpleNamespace(
            jobId=job.sourceId, status=status, contentHash="h",
            fileId=job.sourceId, index=None, error=None,
        )
 def test_buildContentObjects_emits_header_snippet_body():
    parts = _buildContentObjects(_msg("m1", body="Hello\nWorld"), maxBodyChars=8000)
    ids = [p["contentObjectId"] for p in parts]
    assert ids == ["header", "snippet", "body"]
    header = parts[0]["data"]
    assert "Subject: Hi" in header
    assert "From: Alice <a@x.com>" in header
    assert "To: Bob <b@x.com>" in header
 def test_walkPayloadForBody_prefers_plain_over_html():
    payload = {
        "mimeType": "multipart/alternative",
        "parts": [
            {"mimeType": "text/plain", "body": {"data": _b64url("plain body")}},
            {"mimeType": "text/html", "body": {"data": _b64url("<p>html body</p>")}},
        ],
    }
    bodies = _walkPayloadForBody(payload)
    assert bodies["text"] == "plain body"
    assert bodies["html"] == "<p>html body</p>"
 def test_walkPayloadForBody_falls_back_to_html():
    payload = {
        "mimeType": "multipart/alternative",
        "parts": [
            {"mimeType": "text/html", "body": {"data": _b64url("<p>only html</p>")}},
        ],
    }
    bodies = _walkPayloadForBody(payload)
    assert bodies["text"] == ""
    assert "only html" in bodies["html"]
 def test_bootstrap_gmail_indexes_messages_from_inbox_and_sent():
    fake_get = _FakeGoogleGet({
        "INBOX": [_msg("m1"), _msg("m2")],
        "SENT": [_msg("m3")],
    })
    knowledge = _FakeKnowledgeService()
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapGmail(
            connectionId="c1",
            adapter=SimpleNamespace(_token="t"),
            connection=connection,
            knowledgeService=knowledge,
            limits=GmailBootstrapLimits(maxAgeDays=None),
            googleGetFn=fake_get,
        )
    result = asyncio.run(_run())
    assert result["indexed"] == 3
    sourceIds = {c.sourceId for c in knowledge.calls}
    assert sourceIds == {
        _syntheticMessageId("c1", "m1"),
        _syntheticMessageId("c1", "m2"),
        _syntheticMessageId("c1", "m3"),
    }
    for job in knowledge.calls:
        assert job.sourceKind == "gmail_message"
        assert job.mimeType == "message/rfc822"
        assert job.provenance["connectionId"] == "c1"
        assert job.provenance["authority"] == "google"
        assert job.provenance["service"] == "gmail"
        assert job.contentVersion == "h1"
        assert any(co["contentObjectId"] == "header" for co in job.contentObjects)
 def test_bootstrap_gmail_follows_pagination():
    fake_get = _FakeGoogleGet(
        messages_by_label={"INBOX": [_msg("m1")], "SENT": []},
        paginated_label="INBOX",
        page2=[_msg("m2"), _msg("m3")],
    )
    knowledge = _FakeKnowledgeService()
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapGmail(
            connectionId="c1",
            adapter=SimpleNamespace(_token="t"),
            connection=connection,
            knowledgeService=knowledge,
            limits=GmailBootstrapLimits(maxAgeDays=None),
            googleGetFn=fake_get,
        )
    result = asyncio.run(_run())
    assert result["indexed"] == 3
 def test_bootstrap_gmail_reports_duplicates():
    fake_get = _FakeGoogleGet({"INBOX": [_msg("m1"), _msg("m2")], "SENT": []})
    duplicates = {
        _syntheticMessageId("c1", "m1"),
        _syntheticMessageId("c1", "m2"),
    }
    knowledge = _FakeKnowledgeService(duplicateIds=duplicates)
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapGmail(
            connectionId="c1",
            adapter=SimpleNamespace(_token="t"),
            connection=connection,
            knowledgeService=knowledge,
            limits=GmailBootstrapLimits(maxAgeDays=None),
            googleGetFn=fake_get,
        )
    result = asyncio.run(_run())
    assert result["indexed"] == 0
    assert result["skippedDuplicate"] == 2
 if __name__ == "__main__":
    test_buildContentObjects_emits_header_snippet_body()
    test_walkPayloadForBody_prefers_plain_over_html()
    test_walkPayloadForBody_falls_back_to_html()
    test_bootstrap_gmail_indexes_messages_from_inbox_and_sent()
    test_bootstrap_gmail_follows_pagination()
    test_bootstrap_gmail_reports_duplicates()
    print("OK — bootstrapGmail tests passed")
--- a/tests/unit/services/test_bootstrap_outlook.py
+++ b/tests/unit/services/test_bootstrap_outlook.py
@ -0,0 +1,190 @@
 #!/usr/bin/env python3
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Bootstrap Outlook tests with a fake adapter + knowledge service.
 Verifies:
 - Well-known folders (inbox, sentitems) are discovered via Graph.
 - Each message produces a `requestIngestion` call with sourceKind=outlook_message
  and structured contentObjects (header / snippet / body).
 - Pagination via `@odata.nextLink` is followed.
 - changeKey is forwarded as contentVersion → idempotency.
 """
 import asyncio
 import os
 import sys
 from types import SimpleNamespace
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
 from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncOutlook import (
    bootstrapOutlook,
    OutlookBootstrapLimits,
    _syntheticMessageId,
    _buildContentObjects,
 )
 class _FakeOutlookAdapter:
    def __init__(self, messages_by_folder, paginated_folder=None, page2=None):
        self._folders = {"inbox": "INBOX-ID", "sentitems": "SENT-ID"}
        self._messages = messages_by_folder
        self._paginated_folder = paginated_folder
        self._page2 = page2 or []
        self.requested_endpoints = []
    async def _graphGet(self, endpoint: str):
        self.requested_endpoints.append(endpoint)
        if endpoint.startswith("me/mailFolders/") and "/messages" not in endpoint:
            wellKnown = endpoint.split("/")[-1]
            fid = self._folders.get(wellKnown)
            if not fid:
                return {"error": "not found"}
            return {"id": fid, "displayName": wellKnown}
        # message page request: e.g. me/mailFolders/INBOX-ID/messages?...
        for fid, messages in self._messages.items():
            if f"me/mailFolders/{fid}/messages" in endpoint:
                page = {"value": messages}
                if fid == self._paginated_folder and "skiptoken" not in endpoint:
                    page["@odata.nextLink"] = (
                        "https://graph.microsoft.com/v1.0/"
                        f"me/mailFolders/{fid}/messages?$skiptoken=abc"
                    )
                elif fid == self._paginated_folder and "skiptoken" in endpoint:
                    page = {"value": self._page2}
                return page
        return {"value": []}
    async def browse(self, path):
        return []
 class _FakeKnowledgeService:
    def __init__(self, duplicateIds=None):
        self.calls = []
        self._duplicates = duplicateIds or set()
    async def requestIngestion(self, job):
        self.calls.append(job)
        status = "duplicate" if job.sourceId in self._duplicates else "indexed"
        return SimpleNamespace(
            jobId=job.sourceId, status=status, contentHash="h",
            fileId=job.sourceId, index=None, error=None,
        )
 def _msg(mid: str, subject: str = "Hi", change: str = "ck1"):
    return {
        "id": mid,
        "subject": subject,
        "from": {"emailAddress": {"name": "Alice", "address": "a@x.com"}},
        "toRecipients": [{"emailAddress": {"name": "Bob", "address": "b@x.com"}}],
        "ccRecipients": [],
        "receivedDateTime": "2025-01-01T10:00:00Z",
        "bodyPreview": "Hello world",
        "body": {"contentType": "text", "content": "Hello world\nThis is the body."},
        "internetMessageId": f"<{mid}@local>",
        "hasAttachments": False,
        "changeKey": change,
    }
 def test_buildContentObjects_emits_header_snippet_body():
    parts = _buildContentObjects(_msg("m1"), maxBodyChars=8000)
    ids = [p["contentObjectId"] for p in parts]
    assert ids == ["header", "snippet", "body"]
    header = parts[0]["data"]
    assert "Subject: Hi" in header
    assert "From: Alice <a@x.com>" in header
    assert "To: Bob <b@x.com>" in header
 def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
    adapter = _FakeOutlookAdapter({
        "INBOX-ID": [_msg("m1"), _msg("m2")],
        "SENT-ID": [_msg("m3")],
    })
    knowledge = _FakeKnowledgeService()
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapOutlook(
            connectionId="c1",
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
            limits=OutlookBootstrapLimits(maxAgeDays=None),
        )
    result = asyncio.run(_run())
    assert result["indexed"] == 3
    sourceIds = {c.sourceId for c in knowledge.calls}
    assert sourceIds == {
        _syntheticMessageId("c1", "m1"),
        _syntheticMessageId("c1", "m2"),
        _syntheticMessageId("c1", "m3"),
    }
    for job in knowledge.calls:
        assert job.sourceKind == "outlook_message"
        assert job.mimeType == "message/rfc822"
        assert job.provenance["connectionId"] == "c1"
        assert job.provenance["service"] == "outlook"
        assert job.contentVersion == "ck1"
        assert any(co["contentObjectId"] == "header" for co in job.contentObjects)
 def test_bootstrap_outlook_follows_pagination():
    adapter = _FakeOutlookAdapter(
        messages_by_folder={"INBOX-ID": [_msg("m1")], "SENT-ID": []},
        paginated_folder="INBOX-ID",
        page2=[_msg("m2"), _msg("m3")],
    )
    knowledge = _FakeKnowledgeService()
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapOutlook(
            connectionId="c1",
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
            limits=OutlookBootstrapLimits(maxAgeDays=None),
        )
    result = asyncio.run(_run())
    assert result["indexed"] == 3
 def test_bootstrap_outlook_reports_duplicates():
    adapter = _FakeOutlookAdapter({
        "INBOX-ID": [_msg("m1"), _msg("m2")],
        "SENT-ID": [],
    })
    duplicates = {
        _syntheticMessageId("c1", "m1"),
        _syntheticMessageId("c1", "m2"),
    }
    knowledge = _FakeKnowledgeService(duplicateIds=duplicates)
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapOutlook(
            connectionId="c1",
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
            limits=OutlookBootstrapLimits(maxAgeDays=None),
        )
    result = asyncio.run(_run())
    assert result["indexed"] == 0
    assert result["skippedDuplicate"] == 2
 if __name__ == "__main__":
    test_buildContentObjects_emits_header_snippet_body()
    test_bootstrap_outlook_indexes_messages_from_inbox_and_sent()
    test_bootstrap_outlook_follows_pagination()
    test_bootstrap_outlook_reports_duplicates()
    print("OK — bootstrapOutlook tests passed")
--- a/tests/unit/services/test_bootstrap_sharepoint.py
+++ b/tests/unit/services/test_bootstrap_sharepoint.py
@ -0,0 +1,209 @@
 #!/usr/bin/env python3
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Bootstrap SharePoint tests with a fake adapter + knowledge service.
 Verifies:
 - Every discovered file triggers `requestIngestion`.
 - Duplicate runs (same eTag revisions) report `skippedDuplicate`.
 - Synthetic fileIds are stable across runs so idempotency works end-to-end.
 """
 import asyncio
 import os
 import sys
 from dataclasses import dataclass
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
 from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
    bootstrapSharepoint,
    _syntheticFileId,
 )
@dataclass
 class _ExtEntry:
    name: str
    path: str
    isFolder: bool = False
    size: Optional[int] = None
    mimeType: Optional[str] = None
    metadata: Dict[str, Any] = None
 class _FakeSpAdapter:
    """Minimal SharepointAdapter stand-in.
    Layout:
        "/" → 1 site
        "/sites/site-1" → 2 files (f1, f2) + 1 folder (sub)
        "/sites/site-1/sub" → 1 file (f3)
    """
    def __init__(self):
        self.downloaded: List[str] = []
    async def browse(self, path: str, filter=None, limit=None):
        if path == "/":
            return [
                _ExtEntry(
                    name="Site 1",
                    path="/sites/site-1",
                    isFolder=True,
                    metadata={"id": "site-1"},
                ),
            ]
        if path == "/sites/site-1":
            return [
                _ExtEntry(
                    name="f1.txt", path="/sites/site-1/f1.txt",
                    mimeType="text/plain", size=20,
                    metadata={"id": "f1", "revision": "etag-f1"},
                ),
                _ExtEntry(
                    name="f2.txt", path="/sites/site-1/f2.txt",
                    mimeType="text/plain", size=20,
                    metadata={"id": "f2", "revision": "etag-f2"},
                ),
                _ExtEntry(
                    name="sub", path="/sites/site-1/sub",
                    isFolder=True, metadata={"id": "sub"},
                ),
            ]
        if path == "/sites/site-1/sub":
            return [
                _ExtEntry(
                    name="f3.txt", path="/sites/site-1/sub/f3.txt",
                    mimeType="text/plain", size=20,
                    metadata={"id": "f3", "revision": "etag-f3"},
                ),
            ]
        return []
    async def download(self, path: str) -> bytes:
        self.downloaded.append(path)
        return path.encode("utf-8")
 class _FakeKnowledgeService:
    """Records requestIngestion calls and returns the scripted handles."""
    def __init__(self, duplicateIds=None):
        self.calls: List[SimpleNamespace] = []
        self._duplicateIds = duplicateIds or set()
    async def requestIngestion(self, job):
        self.calls.append(job)
        status = "duplicate" if job.sourceId in self._duplicateIds else "indexed"
        return SimpleNamespace(
            jobId=f"{job.sourceKind}:{job.sourceId}",
            status=status,
            contentHash="h",
            fileId=job.sourceId,
            index=None,
            error=None,
        )
 def _fakeRunExtraction(data, name, mime, options):
    """Produce a single synthetic text part so `_toContentObjects` returns one."""
    return SimpleNamespace(
        parts=[
            SimpleNamespace(
                id="p1",
                data=data.decode("utf-8") if isinstance(data, bytes) else str(data),
                typeGroup="text",
                label="page:1",
                metadata={"pageIndex": 0},
            )
        ]
    )
 def test_bootstrap_walks_sites_and_subfolders():
    adapter = _FakeSpAdapter()
    knowledge = _FakeKnowledgeService()
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapSharepoint(
            connectionId="c1",
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
            runExtractionFn=_fakeRunExtraction,
        )
    result = asyncio.run(_run())
    assert len(knowledge.calls) == 3
    sourceIds = {c.sourceId for c in knowledge.calls}
    assert sourceIds == {
        _syntheticFileId("c1", "f1"),
        _syntheticFileId("c1", "f2"),
        _syntheticFileId("c1", "f3"),
    }
    assert result["indexed"] == 3
    assert result["skippedDuplicate"] == 0
    assert adapter.downloaded == [
        "/sites/site-1/f1.txt",
        "/sites/site-1/f2.txt",
        "/sites/site-1/sub/f3.txt",
    ]
 def test_bootstrap_reports_duplicates_on_second_run():
    adapter = _FakeSpAdapter()
    duplicateIds = {
        _syntheticFileId("c1", "f1"),
        _syntheticFileId("c1", "f2"),
        _syntheticFileId("c1", "f3"),
    }
    knowledge = _FakeKnowledgeService(duplicateIds=duplicateIds)
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapSharepoint(
            connectionId="c1",
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
            runExtractionFn=_fakeRunExtraction,
        )
    result = asyncio.run(_run())
    assert result["indexed"] == 0
    assert result["skippedDuplicate"] == 3
 def test_bootstrap_passes_connection_provenance():
    adapter = _FakeSpAdapter()
    knowledge = _FakeKnowledgeService()
    connection = SimpleNamespace(mandateId="m1", userId="u1")
    async def _run():
        return await bootstrapSharepoint(
            connectionId="c1",
            adapter=adapter,
            connection=connection,
            knowledgeService=knowledge,
            runExtractionFn=_fakeRunExtraction,
        )
    asyncio.run(_run())
    for job in knowledge.calls:
        assert job.sourceKind == "sharepoint_item"
        assert job.mandateId == "m1"
        assert job.provenance["connectionId"] == "c1"
        assert job.provenance["authority"] == "msft"
        assert job.provenance["service"] == "sharepoint"
        assert job.contentVersion and job.contentVersion.startswith("etag-")
 if __name__ == "__main__":
    test_bootstrap_walks_sites_and_subfolders()
    test_bootstrap_reports_duplicates_on_second_run()
    test_bootstrap_passes_connection_provenance()
    print("OK — bootstrapSharepoint tests passed")
--- a/tests/unit/services/test_clean_email_body.py
+++ b/tests/unit/services/test_clean_email_body.py
@ -0,0 +1,110 @@
 #!/usr/bin/env python3
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Unit tests for cleanEmailBody.
 Covers: HTML→text normalisation, quoted-reply removal, signature removal,
 whitespace collapse and truncation. The utility is used during Outlook
 bootstrap; buggy cleaning would leak quoted threads / signatures into every
 embedding.
 """
 import os
 import sys
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
 from modules.serviceCenter.services.serviceKnowledge.subTextClean import (
    cleanEmailBody,
 )
 def test_strips_html_tags_and_scripts():
    html = (
        "<html><head><style>body{}</style></head>"
        "<body><p>Hello <b>world</b></p>"
        "<script>alert('x')</script></body></html>"
    )
    cleaned = cleanEmailBody(html)
    assert "Hello" in cleaned
    assert "world" in cleaned
    assert "<" not in cleaned
    assert "alert" not in cleaned
 def test_strips_quoted_reply_english():
    body = (
        "Actual answer from me.\n\n"
        "On Mon, 1 Jan 2024 at 10:00, Someone <s@x.com> wrote:\n"
        "> Original question?\n"
        "> Second line.\n"
    )
    cleaned = cleanEmailBody(body)
    assert "Actual answer" in cleaned
    assert "Original question" not in cleaned
    assert "wrote:" not in cleaned
 def test_strips_quoted_reply_german():
    body = (
        "Meine Antwort.\n\n"
        "Am 1. Januar 2024 um 10:00 schrieb Max Muster <m@x.com>:\n"
        "> Ursprüngliche Frage?\n"
    )
    cleaned = cleanEmailBody(body)
    assert "Meine Antwort" in cleaned
    assert "Ursprüngliche Frage" not in cleaned
 def test_strips_signature_after_dashes():
    body = (
        "Kurze Nachricht.\n"
        "\n"
        "--\n"
        "Max Muster\n"
        "Vorstand, Beispiel GmbH\n"
    )
    cleaned = cleanEmailBody(body)
    assert "Kurze Nachricht" in cleaned
    assert "Beispiel GmbH" not in cleaned
 def test_strips_signature_salutation_de():
    body = (
        "Die eigentliche Information steht hier.\n\n"
        "Mit freundlichen Grüßen\n"
        "Max Muster"
    )
    cleaned = cleanEmailBody(body)
    assert "eigentliche Information" in cleaned
    assert "Max Muster" not in cleaned
 def test_truncate_to_max_chars():
    body = "abc " * 5000
    cleaned = cleanEmailBody(body, maxChars=200)
    assert len(cleaned) <= 201  # includes trailing ellipsis
 def test_empty_input_returns_empty_string():
    assert cleanEmailBody("") == ""
    assert cleanEmailBody(None) == ""  # type: ignore[arg-type]
 def test_collapses_whitespace():
    body = "A    lot   of     spaces\n\n\n\nand blank lines"
    cleaned = cleanEmailBody(body)
    assert "   " not in cleaned
    assert "\n\n\n" not in cleaned
 if __name__ == "__main__":
    test_strips_html_tags_and_scripts()
    test_strips_quoted_reply_english()
    test_strips_quoted_reply_german()
    test_strips_signature_after_dashes()
    test_strips_signature_salutation_de()
    test_truncate_to_max_chars()
    test_empty_input_returns_empty_string()
    test_collapses_whitespace()
    print("OK — cleanEmailBody tests passed")
--- a/tests/unit/services/test_connection_purge.py
+++ b/tests/unit/services/test_connection_purge.py
@ -0,0 +1,119 @@
 #!/usr/bin/env python3
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Purge tests for KnowledgeObjects.deleteFileContentIndexByConnectionId.
 Ensures that a `connection.revoked` event wipes every FileContentIndex + chunk
 linked to the given connectionId while leaving entries from other connections
 (or upload-files with connectionId=None) intact.
 """
 import os
 import sys
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
 from modules.datamodels.datamodelKnowledge import FileContentIndex, ContentChunk
 from modules.interfaces.interfaceDbKnowledge import KnowledgeObjects
 class _FakeDb:
    """Minimal in-memory stand-in for ``KnowledgeObjects.db``.
    Supports just the subset of APIs that deleteFileContentIndexByConnectionId
    touches: getRecordset(FileContentIndex|ContentChunk, ...) + recordDelete.
    """
    def __init__(self):
        self.indexRows: dict = {}
        self.chunks: dict = {}
    def addIndex(self, row: dict) -> None:
        self.indexRows[row["id"]] = row
    def addChunk(self, row: dict) -> None:
        self.chunks[row["id"]] = row
    def getRecordset(self, modelClass, recordFilter=None, **_):
        filter_ = recordFilter or {}
        if modelClass is FileContentIndex:
            rows = list(self.indexRows.values())
        elif modelClass is ContentChunk:
            rows = list(self.chunks.values())
        else:
            return []
        def match(row):
            for k, v in filter_.items():
                if row.get(k) != v:
                    return False
            return True
        return [r for r in rows if match(r)]
    def recordDelete(self, modelClass, recordId):
        if modelClass is FileContentIndex:
            return self.indexRows.pop(recordId, None) is not None
        if modelClass is ContentChunk:
            return self.chunks.pop(recordId, None) is not None
        return False
 def _buildKnowledge():
    """Instantiate KnowledgeObjects without triggering the real DB bootstrap."""
    ko = KnowledgeObjects.__new__(KnowledgeObjects)
    ko.currentUser = None
    ko.userId = None
    ko._scopeCache = {}
    ko.db = _FakeDb()
    return ko
 def test_purge_by_connection_removes_only_matching_rows():
    ko = _buildKnowledge()
    ko.db.addIndex({"id": "sp1", "connectionId": "cx", "mandateId": "m1", "sourceKind": "sharepoint_item"})
    ko.db.addIndex({"id": "sp2", "connectionId": "cx", "mandateId": "m1", "sourceKind": "sharepoint_item"})
    ko.db.addIndex({"id": "upload", "connectionId": None, "mandateId": "m1", "sourceKind": "file"})
    ko.db.addIndex({"id": "other", "connectionId": "cy", "mandateId": "m1", "sourceKind": "outlook_message"})
    ko.db.addChunk({"id": "c1", "fileId": "sp1"})
    ko.db.addChunk({"id": "c2", "fileId": "sp1"})
    ko.db.addChunk({"id": "c3", "fileId": "sp2"})
    ko.db.addChunk({"id": "c4", "fileId": "upload"})
    ko.db.addChunk({"id": "c5", "fileId": "other"})
    result = ko.deleteFileContentIndexByConnectionId("cx")
    assert result == {"indexRows": 2, "chunks": 3}
    assert "sp1" not in ko.db.indexRows
    assert "sp2" not in ko.db.indexRows
    assert "upload" in ko.db.indexRows
    assert "other" in ko.db.indexRows
    assert set(ko.db.chunks.keys()) == {"c4", "c5"}
 def test_purge_with_empty_connection_id_is_a_noop():
    ko = _buildKnowledge()
    ko.db.addIndex({"id": "sp1", "connectionId": "cx"})
    ko.db.addChunk({"id": "c1", "fileId": "sp1"})
    result = ko.deleteFileContentIndexByConnectionId("")
    assert result == {"indexRows": 0, "chunks": 0}
    assert "sp1" in ko.db.indexRows
 def test_purge_unknown_connection_returns_zero():
    ko = _buildKnowledge()
    ko.db.addIndex({"id": "sp1", "connectionId": "cx"})
    result = ko.deleteFileContentIndexByConnectionId("nope")
    assert result == {"indexRows": 0, "chunks": 0}
    assert "sp1" in ko.db.indexRows
 if __name__ == "__main__":
    test_purge_by_connection_removes_only_matching_rows()
    test_purge_with_empty_connection_id_is_a_noop()
    test_purge_unknown_connection_returns_zero()
    print("OK — connection-purge tests passed")
--- a/tests/unit/services/test_extraction_merge_strategy.py
+++ b/tests/unit/services/test_extraction_merge_strategy.py
@ -0,0 +1,124 @@
 #!/usr/bin/env python3
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Test that runExtraction preserves per-part granularity when mergeStrategy=None.
 The default MergeStrategy concatenates all text parts into a single ContentPart, which
 collapses multi-page documents into one blob. This destroys RAG retrieval because every
 document ends up as a single ContentChunk with a "blurred average" embedding.
 Ingestion pipelines (requestIngestion callers) MUST pass mergeStrategy=None to preserve
 per-page / per-section chunks.
 """
 import os
 import sys
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
 from modules.datamodels.datamodelExtraction import (
    ContentPart,
    ExtractionOptions,
    MergeStrategy,
 )
 from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
 from modules.serviceCenter.services.serviceExtraction.subRegistry import (
    ChunkerRegistry,
    Extractor,
    ExtractorRegistry,
 )
 class _FakeMultiPagePdfExtractor(Extractor):
    """Emits one text ContentPart per simulated page."""
    def __init__(self, pageCount: int = 10):
        self.pageCount = pageCount
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType == "application/pdf"
    def getSupportedExtensions(self):
        return [".pdf"]
    def getSupportedMimeTypes(self):
        return ["application/pdf"]
    def extract(self, fileBytes: bytes, context):
        return [
            ContentPart(
                id=f"page-{i}",
                parentId=None,
                label=f"page_{i + 1}",
                typeGroup="text",
                mimeType="text/plain",
                data=f"Page {i + 1} content — distinct semantic anchor #{i}",
                metadata={"pageIndex": i, "size": 64},
            )
            for i in range(self.pageCount)
        ]
 def _buildRegistry(pageCount: int) -> ExtractorRegistry:
    registry = ExtractorRegistry()
    fake = _FakeMultiPagePdfExtractor(pageCount)
    registry.register("application/pdf", fake)
    registry.register("pdf", fake)
    return registry
 def test_default_options_merge_all_text_parts_into_one():
    """Regression safeguard: default ExtractionOptions still merges (legacy behaviour).
    Non-ingestion callers (AI processing, summarization) rely on this default.
    """
    registry = _buildRegistry(pageCount=5)
    extracted = runExtraction(
        registry, ChunkerRegistry(), b"", "sample.pdf", "application/pdf",
        ExtractionOptions(),
    )
    textParts = [p for p in extracted.parts if p.typeGroup == "text"]
    assert len(textParts) == 1, (
        f"Default options should merge all text parts into one, got {len(textParts)}"
    )
    assert "Page 1" in textParts[0].data and "Page 5" in textParts[0].data, (
        "Merged text should contain content from all pages"
    )
    print("test_default_options_merge_all_text_parts_into_one [PASS]")
 def test_merge_none_preserves_all_text_parts():
    """Core fix: mergeStrategy=None preserves per-page granularity for RAG ingestion."""
    registry = _buildRegistry(pageCount=500)
    extracted = runExtraction(
        registry, ChunkerRegistry(), b"", "sample.pdf", "application/pdf",
        ExtractionOptions(mergeStrategy=None),
    )
    textParts = [p for p in extracted.parts if p.typeGroup == "text"]
    assert len(textParts) == 500, (
        f"mergeStrategy=None should preserve all 500 text parts, got {len(textParts)}"
    )
    assert textParts[0].label == "page_1"
    assert textParts[-1].label == "page_500"
    print("test_merge_none_preserves_all_text_parts [PASS]")
 def test_explicit_merge_strategy_still_merges():
    """Callers can still opt in to merging by passing an explicit MergeStrategy."""
    registry = _buildRegistry(pageCount=3)
    extracted = runExtraction(
        registry, ChunkerRegistry(), b"", "sample.pdf", "application/pdf",
        ExtractionOptions(mergeStrategy=MergeStrategy()),
    )
    textParts = [p for p in extracted.parts if p.typeGroup == "text"]
    assert len(textParts) == 1, (
        f"Explicit MergeStrategy should merge, got {len(textParts)} parts"
    )
    print("test_explicit_merge_strategy_still_merges [PASS]")
 if __name__ == "__main__":
    test_default_options_merge_all_text_parts_into_one()
    test_merge_none_preserves_all_text_parts()
    test_explicit_merge_strategy_still_merges()
    print("\nAll merge-strategy tests passed.")
--- a/tests/unit/services/test_ingestion_hash_stability.py
+++ b/tests/unit/services/test_ingestion_hash_stability.py
@ -0,0 +1,81 @@
 #!/usr/bin/env python3
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Test that _computeIngestionHash is stable across re-extractions of the same source.
 Extractors generate fresh contentObjectIds (uuid.uuid4()) per run. The ingestion
 hash MUST therefore be derived from content (contentType + data + order) only —
 otherwise idempotency (AC4) silently fails: every re-extraction looks "new" and
 triggers full re-embedding.
 """
 import os
 import sys
 import uuid
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
 from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import (
    _computeIngestionHash,
 )
 def _makeObjects(seed: str = "alpha"):
    """Build a synthetic contentObjects list as routeDataFiles._autoIndexFile would."""
    return [
        {
            "contentObjectId": str(uuid.uuid4()),
            "contentType": "text",
            "data": f"Page 1 of {seed}",
        },
        {
            "contentObjectId": str(uuid.uuid4()),
            "contentType": "text",
            "data": f"Page 2 of {seed}",
        },
        {
            "contentObjectId": str(uuid.uuid4()),
            "contentType": "binary",
            "data": "<image-bytes-as-b64>",
        },
    ]
 def test_hash_stable_across_uuid_regeneration():
    """Same content + different contentObjectIds → same hash."""
    a = _makeObjects("alpha")
    b = _makeObjects("alpha")  # identical data, fresh UUIDs
    assert [o["contentObjectId"] for o in a] != [o["contentObjectId"] for o in b]
    assert _computeIngestionHash(a) == _computeIngestionHash(b)
 def test_hash_changes_when_data_changes():
    a = _makeObjects("alpha")
    b = _makeObjects("beta")
    assert _computeIngestionHash(a) != _computeIngestionHash(b)
 def test_hash_is_order_sensitive():
    """Reordered pages produce a different hash (different document)."""
    a = _makeObjects("alpha")
    b = list(reversed(a))
    assert _computeIngestionHash(a) != _computeIngestionHash(b)
 def test_hash_distinguishes_text_vs_binary_with_same_payload():
    a = [{"contentObjectId": "x", "contentType": "text", "data": "hello"}]
    b = [{"contentObjectId": "x", "contentType": "binary", "data": "hello"}]
    assert _computeIngestionHash(a) != _computeIngestionHash(b)
 def test_hash_handles_empty_input():
    assert _computeIngestionHash([]) == _computeIngestionHash([])
 if __name__ == "__main__":
    test_hash_stable_across_uuid_regeneration()
    test_hash_changes_when_data_changes()
    test_hash_is_order_sensitive()
    test_hash_distinguishes_text_vs_binary_with_same_payload()
    test_hash_handles_empty_input()
    print("OK — all 5 ingestion-hash stability tests passed")
--- a/tests/unit/services/test_knowledge_ingest_consumer.py
+++ b/tests/unit/services/test_knowledge_ingest_consumer.py
@ -0,0 +1,235 @@
 #!/usr/bin/env python3
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Unit tests for KnowledgeIngestionConsumer event dispatch.
 - `connection.established` → enqueue a `connection.bootstrap` job.
 - `connection.revoked` → synchronous purge via KnowledgeObjects.
 """
 import asyncio
 import os
 import sys
 import types
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
 from modules.serviceCenter.services.serviceKnowledge import subConnectorIngestConsumer as consumer
 def _resetRegistration(monkeypatch):
    """Force the module-level guard to register fresh in each test."""
    monkeypatch.setattr(consumer, "_registered", False)
 def test_onConnectionEstablished_enqueues_bootstrap(monkeypatch):
    startedJobs = []
    async def _fakeStartJob(jobType, payload, **kwargs):
        startedJobs.append({"jobType": jobType, "payload": payload, "kwargs": kwargs})
        return "job-1"
    monkeypatch.setattr(consumer, "startJob", _fakeStartJob)
    consumer._onConnectionEstablished(
        connectionId="c1", authority="msft", userId="u1"
    )
    # Drain pending tasks created by the consumer.
    loop = asyncio.new_event_loop()
    try:
        asyncio.set_event_loop(loop)
        # If the consumer created a Task on a closed loop the fake startJob
        # was still called synchronously via asyncio.run — in either case we
        # check the recorded call.
    finally:
        loop.close()
    assert len(startedJobs) == 1
    assert startedJobs[0]["jobType"] == consumer.BOOTSTRAP_JOB_TYPE
    assert startedJobs[0]["payload"]["connectionId"] == "c1"
    assert startedJobs[0]["payload"]["authority"] == "msft"
    assert startedJobs[0]["kwargs"]["triggeredBy"] == "u1"
 def test_onConnectionEstablished_ignores_missing_id(monkeypatch):
    called = []
    async def _fakeStartJob(*a, **kw):
        called.append(1)
        return "x"
    monkeypatch.setattr(consumer, "startJob", _fakeStartJob)
    consumer._onConnectionEstablished(connectionId="", authority="msft")
    assert called == []
 def test_onConnectionRevoked_runs_sync_purge(monkeypatch):
    class _FakeKnowledge:
        def __init__(self):
            self.calls = []
        def deleteFileContentIndexByConnectionId(self, cid):
            self.calls.append(cid)
            return {"indexRows": 2, "chunks": 5}
    fakeKnow = _FakeKnowledge()
    def _fakeGetInterface(_user=None):
        return fakeKnow
    monkeypatch.setattr(consumer, "getKnowledgeInterface", _fakeGetInterface)
    consumer._onConnectionRevoked(
        connectionId="c1", authority="msft", userId="u1", reason="disconnected"
    )
    assert fakeKnow.calls == ["c1"]
 def test_onConnectionRevoked_ignores_missing_id(monkeypatch):
    seen = []
    def _fakeGetInterface(_user=None):
        class _K:
            def deleteFileContentIndexByConnectionId(self, cid):
                seen.append(cid)
                return {"indexRows": 0, "chunks": 0}
        return _K()
    monkeypatch.setattr(consumer, "getKnowledgeInterface", _fakeGetInterface)
    consumer._onConnectionRevoked(connectionId="")
    assert seen == []
 def test_bootstrap_job_skips_unsupported_authority(monkeypatch):
    async def _run():
        result = await consumer._bootstrapJobHandler(
            {"payload": {"connectionId": "c1", "authority": "slack"}},
            lambda *_: None,
        )
        return result
    result = asyncio.run(_run())
    assert result["skipped"] is True
    assert result["authority"] == "slack"
    assert result["reason"] == "unsupported_authority"
 def test_bootstrap_job_dispatches_msft_parts(monkeypatch):
    calls = {"sp": 0, "ol": 0}
    async def _fakeSp(connectionId, progressCb=None):
        calls["sp"] += 1
        return {"indexed": 1}
    async def _fakeOl(connectionId, progressCb=None):
        calls["ol"] += 1
        return {"indexed": 2}
    fakeSharepoint = types.ModuleType("subConnectorSyncSharepoint")
    fakeSharepoint.bootstrapSharepoint = _fakeSp
    fakeOutlook = types.ModuleType("subConnectorSyncOutlook")
    fakeOutlook.bootstrapOutlook = _fakeOl
    monkeypatch.setitem(
        sys.modules,
        "modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint",
        fakeSharepoint,
    )
    monkeypatch.setitem(
        sys.modules,
        "modules.serviceCenter.services.serviceKnowledge.subConnectorSyncOutlook",
        fakeOutlook,
    )
    async def _run():
        return await consumer._bootstrapJobHandler(
            {"payload": {"connectionId": "c1", "authority": "msft"}},
            lambda *_: None,
        )
    result = asyncio.run(_run())
    assert calls == {"sp": 1, "ol": 1}
    assert result["sharepoint"] == {"indexed": 1}
    assert result["outlook"] == {"indexed": 2}
 def test_bootstrap_job_dispatches_google_parts(monkeypatch):
    calls = {"gd": 0, "gm": 0}
    async def _fakeGd(connectionId, progressCb=None):
        calls["gd"] += 1
        return {"indexed": 7}
    async def _fakeGm(connectionId, progressCb=None):
        calls["gm"] += 1
        return {"indexed": 11}
    fakeGdrive = types.ModuleType("subConnectorSyncGdrive")
    fakeGdrive.bootstrapGdrive = _fakeGd
    fakeGmail = types.ModuleType("subConnectorSyncGmail")
    fakeGmail.bootstrapGmail = _fakeGm
    monkeypatch.setitem(
        sys.modules,
        "modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGdrive",
        fakeGdrive,
    )
    monkeypatch.setitem(
        sys.modules,
        "modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail",
        fakeGmail,
    )
    async def _run():
        return await consumer._bootstrapJobHandler(
            {"payload": {"connectionId": "c1", "authority": "google"}},
            lambda *_: None,
        )
    result = asyncio.run(_run())
    assert calls == {"gd": 1, "gm": 1}
    assert result["drive"] == {"indexed": 7}
    assert result["gmail"] == {"indexed": 11}
 def test_bootstrap_job_dispatches_clickup_part(monkeypatch):
    calls = {"cu": 0}
    async def _fakeCu(connectionId, progressCb=None):
        calls["cu"] += 1
        return {"indexed": 4}
    fakeClickup = types.ModuleType("subConnectorSyncClickup")
    fakeClickup.bootstrapClickup = _fakeCu
    monkeypatch.setitem(
        sys.modules,
        "modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup",
        fakeClickup,
    )
    async def _run():
        return await consumer._bootstrapJobHandler(
            {"payload": {"connectionId": "c1", "authority": "clickup"}},
            lambda *_: None,
        )
    result = asyncio.run(_run())
    assert calls == {"cu": 1}
    assert result["clickup"] == {"indexed": 4}
 if __name__ == "__main__":
    # Usable without pytest fixtures for a quick smoke run.
    class _MP:
        def __init__(self):
            self.undos = []
        def setattr(self, target, name_or_value, value=None):
            if value is None:
                # target is an object, name_or_value is value → no, original signature
                raise SystemExit("use pytest monkeypatch in CLI")
            self.undos.append((target, name_or_value, getattr(target, name_or_value)))
            setattr(target, name_or_value, value)
        def setitem(self, mapping, key, value):
            self.undos.append((mapping, key, mapping.get(key)))
            mapping[key] = value
    print("Run via pytest: pytest tests/unit/services/test_knowledge_ingest_consumer.py")
--- a/tests/unit/services/test_p1d_consent_prefs.py
+++ b/tests/unit/services/test_p1d_consent_prefs.py
@ -0,0 +1,298 @@
 #!/usr/bin/env python3
 """Unit tests for P1d: consent gating, preference parsing, and walker behaviour.
 Tests
 -----
 1. Bootstrap runner skips when ``knowledgeIngestionEnabled=False``.
 2. ``loadConnectionPrefs`` returns safe defaults when preferences are absent.
 3. ``loadConnectionPrefs`` maps all §2.6 keys correctly from a full prefs dict.
 4. Gmail walker passes ``neutralize=True`` and ``mailContentDepth`` to IngestionJob.
 5. Gmail walker produces only a header content-object when depth="metadata".
 6. ClickUp walker skips description when scope="titles".
 """
 from __future__ import annotations
 import asyncio
 import os
 import sys
 import types
 import unittest
 from typing import Any, Dict, Optional
 from unittest.mock import AsyncMock, MagicMock, patch
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
 # ---------------------------------------------------------------------------
 # 1. Bootstrap runner consent gate
 # ---------------------------------------------------------------------------
 class TestBootstrapConsentGate(unittest.TestCase):
    """_bootstrapJobHandler must no-op when knowledgeIngestionEnabled is False."""
    def _makeJob(self, connectionId="c-test", authority="google"):
        return {"payload": {"connectionId": connectionId, "authority": authority}}
    def _makeConn(self, enabled: bool):
        conn = MagicMock()
        conn.knowledgeIngestionEnabled = enabled
        return conn
    def test_skips_when_consent_disabled(self):
        from modules.serviceCenter.services.serviceKnowledge import subConnectorIngestConsumer as sut
        fake_root = MagicMock()
        fake_root.getUserConnectionById.return_value = self._makeConn(False)
        with patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=fake_root):
            result = asyncio.get_event_loop().run_until_complete(
                sut._bootstrapJobHandler(self._makeJob(), lambda *a: None)
            )
        assert result.get("skipped") is True
        assert result.get("reason") == "consent_disabled"
        fake_root.getUserConnectionById.assert_called_once_with("c-test")
    def test_proceeds_when_consent_enabled(self):
        """When consent is enabled, the handler should call at least one walker."""
        from modules.serviceCenter.services.serviceKnowledge import subConnectorIngestConsumer as sut
        fake_root = MagicMock()
        fake_root.getUserConnectionById.return_value = self._makeConn(True)
        # Patch the inner walker so it doesn't do real I/O.
        async def _fakeBootstrap(**kwargs):
            return {"indexed": 0}
        with (
            patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=fake_root),
            patch(
                "modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGdrive.bootstrapGdrive",
                new=AsyncMock(return_value={"indexed": 0}),
            ),
            patch(
                "modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail.bootstrapGmail",
                new=AsyncMock(return_value={"indexed": 0}),
            ),
        ):
            result = asyncio.get_event_loop().run_until_complete(
                sut._bootstrapJobHandler(self._makeJob(authority="google"), lambda *a: None)
            )
        # Should not have 'skipped' at the top level.
        assert result.get("skipped") is not True
        assert result.get("authority") == "google"
 # ---------------------------------------------------------------------------
 # 2 + 3. loadConnectionPrefs
 # ---------------------------------------------------------------------------
 class TestLoadConnectionPrefs(unittest.TestCase):
    def _makeConn(self, prefs: Optional[Dict[str, Any]]):
        conn = MagicMock()
        conn.knowledgePreferences = prefs
        return conn
    def _mockRoot(self, prefs):
        root = MagicMock()
        root.getUserConnectionById.return_value = self._makeConn(prefs)
        return root
    def test_returns_safe_defaults_when_prefs_none(self):
        from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import (
            ConnectionIngestionPrefs,
            loadConnectionPrefs,
        )
        with patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=self._mockRoot(None)):
            prefs = loadConnectionPrefs("x")
        assert prefs.neutralizeBeforeEmbed is False
        assert prefs.mailContentDepth == "full"
        assert prefs.mailIndexAttachments is False
        assert prefs.maxAgeDays == 90
        assert prefs.clickupScope == "title_description"
        assert prefs.gmailEnabled is True
        assert prefs.driveEnabled is True
    def test_maps_all_keys(self):
        from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
        raw = {
            "neutralizeBeforeEmbed": True,
            "mailContentDepth": "metadata",
            "mailIndexAttachments": True,
            "filesIndexBinaries": False,
            "clickupScope": "with_comments",
            "maxAgeDays": 30,
            "surfaceToggles": {
                "google": {"gmail": False, "drive": True},
                "msft": {"sharepoint": False, "outlook": True},
            },
        }
        with patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=self._mockRoot(raw)):
            prefs = loadConnectionPrefs("x")
        assert prefs.neutralizeBeforeEmbed is True
        assert prefs.mailContentDepth == "metadata"
        assert prefs.mailIndexAttachments is True
        assert prefs.filesIndexBinaries is False
        assert prefs.clickupScope == "with_comments"
        assert prefs.maxAgeDays == 30
        assert prefs.gmailEnabled is False
        assert prefs.driveEnabled is True
        assert prefs.sharepointEnabled is False
        assert prefs.outlookEnabled is True
    def test_invalid_depth_falls_back_to_default(self):
        from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
        raw = {"mailContentDepth": "everything_please"}
        with patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=self._mockRoot(raw)):
            prefs = loadConnectionPrefs("x")
        assert prefs.mailContentDepth == "full"
 # ---------------------------------------------------------------------------
 # 4. Gmail walker passes neutralize + mailContentDepth to IngestionJob
 # ---------------------------------------------------------------------------
 class TestGmailWalkerPrefs(unittest.TestCase):
    def _make_message(self, *, subject="Test", snippet="hello", body_text="full body"):
        import base64
        encoded = base64.urlsafe_b64encode(body_text.encode()).decode()
        return {
            "id": "msg-1",
            "historyId": "h-42",
            "threadId": "t-1",
            "snippet": snippet,
            "payload": {
                "mimeType": "multipart/alternative",
                "headers": [
                    {"name": "Subject", "value": subject},
                    {"name": "From", "value": "alice@example.com"},
                    {"name": "To", "value": "bob@example.com"},
                    {"name": "Date", "value": "Mon, 20 Apr 2026 10:00:00 +0000"},
                ],
                "parts": [
                    {
                        "mimeType": "text/plain",
                        "body": {"data": encoded},
                    }
                ],
            },
        }
    def test_neutralize_flag_forwarded(self):
        from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
            GmailBootstrapLimits,
            _ingestMessage,
            GmailBootstrapResult,
        )
        from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
        captured_jobs = []
        async def fake_requestIngestion(job: IngestionJob):
            captured_jobs.append(job)
            return MagicMock(status="indexed", error=None)
        ks = MagicMock()
        ks.requestIngestion = fake_requestIngestion
        limits = GmailBootstrapLimits(neutralize=True, mailContentDepth="full")
        result = GmailBootstrapResult(connectionId="c-1")
        asyncio.get_event_loop().run_until_complete(
            _ingestMessage(
                googleGetFn=AsyncMock(return_value={}),
                knowledgeService=ks,
                connectionId="c-1",
                mandateId="",
                userId="u-1",
                labelId="INBOX",
                message=self._make_message(),
                limits=limits,
                result=result,
                progressCb=None,
            )
        )
        assert len(captured_jobs) == 1
        assert captured_jobs[0].neutralize is True
    def test_metadata_depth_yields_only_header(self):
        from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
            _buildContentObjects,
        )
        message = self._make_message(snippet="hi", body_text="should be excluded")
        parts = _buildContentObjects(message, maxBodyChars=4000, mailContentDepth="metadata")
        ids = [p["contentObjectId"] for p in parts]
        assert ids == ["header"]
    def test_snippet_depth_yields_header_and_snippet(self):
        from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
            _buildContentObjects,
        )
        message = self._make_message(snippet="hi", body_text="should be excluded")
        parts = _buildContentObjects(message, maxBodyChars=4000, mailContentDepth="snippet")
        ids = [p["contentObjectId"] for p in parts]
        assert "header" in ids
        assert "snippet" in ids
        assert "body" not in ids
 # ---------------------------------------------------------------------------
 # 5. ClickUp walker respects clickupScope="titles"
 # ---------------------------------------------------------------------------
 class TestClickupWalkerScope(unittest.TestCase):
    def _make_task(self):
        return {
            "id": "task-1",
            "name": "Ship feature X",
            "date_updated": "1713888000000",
            "description": "This should be omitted",
            "text_content": "Also omitted",
            "status": {"status": "open"},
            "assignees": [],
            "tags": [],
            "list": {"name": "Backlog"},
            "folder": {},
            "space": {"name": "Engineering"},
        }
    def test_titles_scope_omits_description(self):
        from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup import (
            ClickupBootstrapLimits,
            _buildContentObjects,
        )
        limits = ClickupBootstrapLimits(clickupScope="titles")
        parts = _buildContentObjects(self._make_task(), limits)
        ids = [p["contentObjectId"] for p in parts]
        assert ids == ["header"]
        assert "description" not in ids
    def test_with_description_scope_includes_description(self):
        from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup import (
            ClickupBootstrapLimits,
            _buildContentObjects,
        )
        limits = ClickupBootstrapLimits(clickupScope="title_description")
        parts = _buildContentObjects(self._make_task(), limits)
        ids = [p["contentObjectId"] for p in parts]
        assert "header" in ids
        assert "description" in ids
 if __name__ == "__main__":
    unittest.main()
--- a/tests/unit/workflows/test_parameterValidation.py
+++ b/tests/unit/workflows/test_parameterValidation.py
@ -0,0 +1,206 @@
 # Copyright (c) 2026 Patrick Motsch
 # All rights reserved.
 """Unit tests: universal action parameter validation + coercion.
 This is the single source of truth for the action parameter contract:
 every workflow action (called via the agent, the workflow graph, or REST)
 runs through ``validateAndCoerceParameters`` before its body executes.
 The tests pin three groups of behaviour:
 1. **Required-parameter enforcement** — missing required params raise a
   typed ``InvalidActionParameterError`` instead of an opaque downstream
   error.
 2. **Ref-payload normalization** — the agent's typed tool schema delivers
   ``FeatureInstanceRef`` as ``{id: ..., featureCode: ...}``, but actions
   expect a bare UUID string. Collapsing happens here, not in N action
   bodies.
 3. **Primitive coercion** — ``"true"``/``"12"``/``"3.14"`` from JSON-shaped
   payloads are coerced to bool/int/float, removing ad-hoc branches.
 Unknown extra keys (e.g. ``parentOperationId``) flow through unchanged so
 the executor can keep injecting cross-cutting context.
 """
 from __future__ import annotations
 import pytest
 from modules.datamodels.datamodelWorkflowActions import (
    WorkflowActionDefinition, WorkflowActionParameter,
 )
 from modules.shared.frontendTypes import FrontendType
 from modules.workflows.processing.shared.parameterValidation import (
    InvalidActionParameterError, validateAndCoerceParameters,
 )
 def _makeActionDef(actionId: str = "trustee.refreshAccountingData", **paramDefs) -> WorkflowActionDefinition:
    """Build a real WorkflowActionDefinition; we only care about parameters."""
    parameters = {
        name: WorkflowActionParameter(
            name=name,
            type=spec["type"],
            frontendType=FrontendType.TEXT,
            required=spec.get("required", False),
            description=spec.get("description", ""),
        )
        for name, spec in paramDefs.items()
    }
    return WorkflowActionDefinition(
        actionId=actionId,
        description="Test action",
        parameters=parameters,
        execute=lambda *_a, **_kw: None,
    )
 class TestRequiredEnforcement:
    def test_missingRequiredRaises(self):
        actionDef = _makeActionDef(
            featureInstanceId={"type": "FeatureInstanceRef", "required": True},
        )
        with pytest.raises(InvalidActionParameterError) as excinfo:
            validateAndCoerceParameters(actionDef, {})
        assert excinfo.value.paramName == "featureInstanceId"
        assert "required" in excinfo.value.reason.lower()
        assert "trustee.refreshAccountingData.featureInstanceId" in str(excinfo.value)
    def test_optionalMissingIsFine(self):
        actionDef = _makeActionDef(
            forceRefresh={"type": "bool", "required": False},
        )
        result = validateAndCoerceParameters(actionDef, {})
        assert result == {}
    def test_requiredNoneCountsAsMissing(self):
        """Explicit ``None`` for a required param is missing, not "unset"."""
        actionDef = _makeActionDef(
            featureInstanceId={"type": "FeatureInstanceRef", "required": True},
        )
        with pytest.raises(InvalidActionParameterError):
            validateAndCoerceParameters(actionDef, {"featureInstanceId": None})
 class TestRefNormalization:
    """Trustee bug regression: agent passed `{id: ..., featureCode: ...}` and
    Postgres failed with "can't adapt type 'dict'", which the connector
    silently turned into "no record found"."""
    def test_collapsesDictWithIdToString(self):
        actionDef = _makeActionDef(
            featureInstanceId={"type": "FeatureInstanceRef", "required": True},
        )
        result = validateAndCoerceParameters(actionDef, {
            "featureInstanceId": {
                "id": "b7574103-f4a3-4894-8c23-74bd0d0e83a5",
                "featureCode": "trustee",
                "label": "Demo AG",
            },
        })
        assert result["featureInstanceId"] == "b7574103-f4a3-4894-8c23-74bd0d0e83a5"
    def test_passThroughString(self):
        """Workflow execution path passes a plain UUID; must not break."""
        actionDef = _makeActionDef(
            featureInstanceId={"type": "FeatureInstanceRef", "required": True},
        )
        uuid = "b7574103-f4a3-4894-8c23-74bd0d0e83a5"
        result = validateAndCoerceParameters(actionDef, {"featureInstanceId": uuid})
        assert result["featureInstanceId"] == uuid
    def test_dictWithoutIdRaises(self):
        actionDef = _makeActionDef(
            featureInstanceId={"type": "FeatureInstanceRef", "required": True},
        )
        with pytest.raises(InvalidActionParameterError) as excinfo:
            validateAndCoerceParameters(actionDef, {
                "featureInstanceId": {"featureCode": "trustee", "label": "Demo"},
            })
        assert "id" in excinfo.value.reason
    def test_otherDictTypeRaises(self):
        actionDef = _makeActionDef(
            featureInstanceId={"type": "FeatureInstanceRef", "required": True},
        )
        with pytest.raises(InvalidActionParameterError):
            validateAndCoerceParameters(actionDef, {"featureInstanceId": 12345})
    def test_connectionRefAlsoCollapses(self):
        """Same logic applies to every Ref-Schema, not just FeatureInstanceRef."""
        actionDef = _makeActionDef(
            actionId="msft.readEmails",
            connection={"type": "ConnectionRef", "required": True},
        )
        result = validateAndCoerceParameters(actionDef, {
            "connection": {"id": "conn-uuid-123", "authority": "msft", "label": "Outlook"},
        })
        assert result["connection"] == "conn-uuid-123"
 class TestPrimitiveCoercion:
    def test_boolFromTrueString(self):
        actionDef = _makeActionDef(forceRefresh={"type": "bool", "required": False})
        result = validateAndCoerceParameters(actionDef, {"forceRefresh": "true"})
        assert result["forceRefresh"] is True
    def test_boolFromFalseString(self):
        actionDef = _makeActionDef(forceRefresh={"type": "bool", "required": False})
        result = validateAndCoerceParameters(actionDef, {"forceRefresh": "false"})
        assert result["forceRefresh"] is False
    def test_boolPassthrough(self):
        actionDef = _makeActionDef(forceRefresh={"type": "bool", "required": False})
        assert validateAndCoerceParameters(actionDef, {"forceRefresh": True})["forceRefresh"] is True
    def test_boolBadValueRaises(self):
        actionDef = _makeActionDef(forceRefresh={"type": "bool", "required": False})
        with pytest.raises(InvalidActionParameterError):
            validateAndCoerceParameters(actionDef, {"forceRefresh": "maybe"})
    def test_intFromString(self):
        actionDef = _makeActionDef(periodMonth={"type": "int", "required": False})
        assert validateAndCoerceParameters(actionDef, {"periodMonth": "12"})["periodMonth"] == 12
    def test_intBadValueRaises(self):
        actionDef = _makeActionDef(periodMonth={"type": "int", "required": False})
        with pytest.raises(InvalidActionParameterError):
            validateAndCoerceParameters(actionDef, {"periodMonth": "twelve"})
    def test_floatFromString(self):
        actionDef = _makeActionDef(threshold={"type": "float", "required": False})
        assert validateAndCoerceParameters(actionDef, {"threshold": "0.75"})["threshold"] == 0.75
 class TestUnknownAndOtherTypes:
    def test_unknownKeysPassThrough(self):
        """The executor injects parentOperationId, expectedDocumentFormats, etc.
        Validation must not strip them."""
        actionDef = _makeActionDef(
            featureInstanceId={"type": "FeatureInstanceRef", "required": True},
        )
        result = validateAndCoerceParameters(actionDef, {
            "featureInstanceId": "uuid-123",
            "parentOperationId": "action_xyz",
            "expectedDocumentFormats": ["pdf", "txt"],
        })
        assert result["parentOperationId"] == "action_xyz"
        assert result["expectedDocumentFormats"] == ["pdf", "txt"]
    def test_strParamsAreUntouched(self):
        actionDef = _makeActionDef(dateFrom={"type": "str", "required": False})
        assert validateAndCoerceParameters(actionDef, {"dateFrom": "2025-01-01"})["dateFrom"] == "2025-01-01"
    def test_listParamsAreUntouched(self):
        actionDef = _makeActionDef(documentList={"type": "List[ActionDocument]", "required": False})
        docs = [{"name": "a"}, {"name": "b"}]
        assert validateAndCoerceParameters(actionDef, {"documentList": docs})["documentList"] is docs
    def test_doesNotMutateInput(self):
        """validateAndCoerceParameters must return a new dict."""
        actionDef = _makeActionDef(
            featureInstanceId={"type": "FeatureInstanceRef", "required": True},
        )
        original = {"featureInstanceId": {"id": "uuid", "featureCode": "trustee"}}
        result = validateAndCoerceParameters(actionDef, original)
        assert isinstance(original["featureInstanceId"], dict)
        assert result["featureInstanceId"] == "uuid"
Author	SHA1	Message	Date
Ida	ce671f61b6	feat: app-scheduler ausgebaut um nachts bestehende connections zu indexieren	2026-04-29 14:39:40 +02:00
Ida	4a840e9e6e	added neutralization option to indexing new connections	2026-04-29 14:39:40 +02:00
Ida	93cb6939dc	feat: frontend consent integration	2026-04-29 14:39:40 +02:00
Ida	3add5c9a80	commit before rebase	2026-04-29 14:39:40 +02:00
Ida	6a5ff1ff7c	feat(rag): P1 user-connection hooks + retrieval threshold fix - connection.established/revoked callbacks from OAuth routes and connection management endpoints - KnowledgeIngestionConsumer dispatches bootstrap job (established) and synchronous purge (revoked) - FileContentIndex: add connectionId + sourceKind columns - SharePoint bootstrap with @odata.nextLink pagination and eTag-based idempotency - Outlook bootstrap treats messages as virtual documents with cleanEmailBody for HTML/quote/signature stripping - fix(rag): lower buildAgentContext minScore thresholds from 0.55/0.65/0.70 to 0.35 — previous values blocked all real matches from text-embedding-3-small - 24 new unit tests covering purge, consumer dispatch, email cleaning and both bootstrap paths	2026-04-29 14:39:40 +02:00
Ida	dff3d41845	fix(rag): stable ingestion idempotency across re-extractions (AC4) Re-indexing the same file always triggered a full embedding run — ingestion.skipped.duplicate never fired. Two independent causes: 1. _computeIngestionHash included contentObjectId in its payload, but extractors generate fresh uuid4() per run, making the hash a per-run nonce. Now hashed over (contentType, data) in extractor order — stable across re-extractions, sensitive to content, ordering, and type changes. 2. _autoIndexFile upserted the fresh pre-scan FileContentIndex before requestIngestion's duplicate check, wiping structure._ingestion and status=indexed from the prior run. The pre-upsert now merges the existing _ingestion metadata and preserves the indexed status. Verified end-to-end: second PATCH /scope on an already-indexed file logs and returns in ~2s with zero embedding API calls. Adds test_ingestion_hash_stability.py (5 cases).	2026-04-29 14:39:40 +02:00
Ida	a7f4055130	fix(rag): preserve per-page granularity + remove on-demand extraction fallbacks The default MergeStrategy concatenates every extracted text part into a single ContentPart, collapsing a 500-page PDF into one chunk with a blurred average embedding — RAG retrieval was effectively broken. - ExtractionOptions.mergeStrategy is now Optional[MergeStrategy]; passing None preserves per-part granularity. Default factory kept for backward compatibility. - routeDataFiles._autoIndexFile, _workspaceTools.readFile, and _documentTools.describeImage explicitly pass mergeStrategy=None. - Agent tools no longer carry redundant extraction + requestIngestion fallback paths: the unified ingestion lane owns all corpus writes, and readFile/describeImage are pure consumers of the knowledge store. - Unit test asserts runExtraction(mergeStrategy=None) keeps every part.	2026-04-29 14:39:40 +02:00
Ida	078b4eaaaf	removed unnecessary test files	2026-04-29 14:39:40 +02:00
Ida	9d82d3d353	P0: injection facade	2026-04-29 14:39:40 +02:00
Patrick Motsch	ba21005401	Merge pull request #147 from valueonag/feat/demo-system-readieness Feat/demo system readieness	2026-04-29 01:57:49 +02:00
ValueOn AG	052647a52b	wired infomaniac to ai adapters and tools	2026-04-29 01:52:47 +02:00
ValueOn AG	49f3660d89	fixes infomaniak download	2026-04-29 01:03:40 +02:00
ValueOn AG	9816f13ae9	fixes infomaniac different than in doc	2026-04-29 00:57:28 +02:00
ValueOn AG	b405cebdec	kdrive fix	2026-04-29 00:35:21 +02:00
ValueOn AG	fb3a1f0a51	fixes ai agents parameter flow	2026-04-28 11:58:53 +02:00