Compare commits

...

15 commits

Author SHA1 Message Date
Ida
ce671f61b6 feat: app-scheduler ausgebaut um nachts bestehende connections zu indexieren 2026-04-29 14:39:40 +02:00
Ida
4a840e9e6e added neutralization option to indexing new connections 2026-04-29 14:39:40 +02:00
Ida
93cb6939dc feat: frontend consent integration 2026-04-29 14:39:40 +02:00
Ida
3add5c9a80 commit before rebase 2026-04-29 14:39:40 +02:00
Ida
6a5ff1ff7c feat(rag): P1 user-connection hooks + retrieval threshold fix
- connection.established/revoked callbacks from OAuth routes and
  connection management endpoints
- KnowledgeIngestionConsumer dispatches bootstrap job (established)
  and synchronous purge (revoked)
- FileContentIndex: add connectionId + sourceKind columns
- SharePoint bootstrap with @odata.nextLink pagination and eTag-based
  idempotency
- Outlook bootstrap treats messages as virtual documents with
  cleanEmailBody for HTML/quote/signature stripping
- fix(rag): lower buildAgentContext minScore thresholds from
  0.55/0.65/0.70 to 0.35 — previous values blocked all real matches
  from text-embedding-3-small
- 24 new unit tests covering purge, consumer dispatch, email cleaning
  and both bootstrap paths
2026-04-29 14:39:40 +02:00
Ida
dff3d41845 fix(rag): stable ingestion idempotency across re-extractions (AC4)
Re-indexing the same file always triggered a full embedding run —
ingestion.skipped.duplicate never fired. Two independent causes:

1. _computeIngestionHash included contentObjectId in its payload, but
   extractors generate fresh uuid4() per run, making the hash a
   per-run nonce. Now hashed over (contentType, data) in extractor
   order — stable across re-extractions, sensitive to content,
   ordering, and type changes.
2. _autoIndexFile upserted the fresh pre-scan FileContentIndex before
   requestIngestion's duplicate check, wiping structure._ingestion
   and status=indexed from the prior run. The pre-upsert now merges
   the existing _ingestion metadata and preserves the indexed status.

Verified end-to-end: second PATCH /scope on an already-indexed file
logs  and returns in ~2s
with zero embedding API calls.

Adds test_ingestion_hash_stability.py (5 cases).
2026-04-29 14:39:40 +02:00
Ida
a7f4055130 fix(rag): preserve per-page granularity + remove on-demand extraction fallbacks
The default MergeStrategy concatenates every extracted text part into a
single ContentPart, collapsing a 500-page PDF into one chunk with a
blurred average embedding — RAG retrieval was effectively broken.

- ExtractionOptions.mergeStrategy is now Optional[MergeStrategy]; passing
  None preserves per-part granularity. Default factory kept for
  backward compatibility.
- routeDataFiles._autoIndexFile, _workspaceTools.readFile, and
  _documentTools.describeImage explicitly pass mergeStrategy=None.
- Agent tools no longer carry redundant extraction + requestIngestion
  fallback paths: the unified ingestion lane owns all corpus writes,
  and readFile/describeImage are pure consumers of the knowledge store.
- Unit test asserts runExtraction(mergeStrategy=None) keeps every part.
2026-04-29 14:39:40 +02:00
Ida
078b4eaaaf removed unnecessary test files 2026-04-29 14:39:40 +02:00
Ida
9d82d3d353 P0: injection facade 2026-04-29 14:39:40 +02:00
Patrick Motsch
ba21005401
Merge pull request #147 from valueonag/feat/demo-system-readieness
Feat/demo system readieness
2026-04-29 01:57:49 +02:00
ValueOn AG
052647a52b wired infomaniac to ai adapters and tools 2026-04-29 01:52:47 +02:00
ValueOn AG
49f3660d89 fixes infomaniak download 2026-04-29 01:03:40 +02:00
ValueOn AG
9816f13ae9 fixes infomaniac different than in doc 2026-04-29 00:57:28 +02:00
ValueOn AG
b405cebdec kdrive fix 2026-04-29 00:35:21 +02:00
ValueOn AG
fb3a1f0a51 fixes ai agents parameter flow 2026-04-28 11:58:53 +02:00
76 changed files with 9400 additions and 942 deletions

10
app.py
View file

@ -405,6 +405,16 @@ async def lifespan(app: FastAPI):
except Exception as e: except Exception as e:
logger.warning(f"BackgroundJob recovery failed (non-critical): {e}") logger.warning(f"BackgroundJob recovery failed (non-critical): {e}")
# Subscribe knowledge ingestion to connection lifecycle events so OAuth
# connect/disconnect reliably trigger bootstrap/purge.
try:
from modules.serviceCenter.services.serviceKnowledge.subConnectorIngestConsumer import (
registerKnowledgeIngestionConsumer,
)
registerKnowledgeIngestionConsumer()
except Exception as e:
logger.warning(f"KnowledgeIngestionConsumer registration failed (non-critical): {e}")
yield yield
# --- Stop Managers --- # --- Stop Managers ---

View file

@ -0,0 +1,107 @@
# Development Environment Configuration
# System Configuration
APP_ENV_TYPE = dev
APP_ENV_LABEL = Development Instance Patrick
APP_API_URL = http://localhost:8000
APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/notes/key.txt
APP_INIT_PASS_ADMIN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEeFFtRGtQeVUtcjlrU3dab1ZxUm9WSks0MlJVYUtERFlqUElHemZrOGNENk1tcmJNX3Vxc01UMDhlNU40VzZZRVBpUGNmT3podzZrOGhOeEJIUEt4eVlSWG5UYXA3d09DVXlLT21Kb1JYSUU9
APP_INIT_PASS_EVENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERzZjNm56WGVBdjJTeG5Udjd6OGQwUVotYXUzQjJ1YVNyVXVBa3NZVml3ODU0MVNkZjhWWmJwNUFkc19BcHlHMTU1Q3BRcHU0cDBoZkFlR2l6UEZQU3d2U3MtMDh5UDZteGFoQ0EyMUE1ckE9
# PostgreSQL DB Host
DB_HOST=localhost
DB_USER=poweron_dev
DB_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEcUIxNEFfQ2xnS0RrSC1KNnUxTlVvTGZoMHgzaEI4Z3NlVzVROTVLak5Ubi1vaEZubFZaMTFKMGd6MXAxekN2d2NvMy1hRjg2UVhybktlcFA5anZ1WjFlQmZhcXdwaGhWdzRDc3ExeUhzWTg9
DB_PORT=5432
# Security Configuration
APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2ZlUFRlcFdOZ001RnlzM2FhLWhRV2tjWWFhaWQwQ3hkcUFvbThMcndxSjFpYTdfRV9OZGhTcksxbXFTZWg5MDZvOHpCVXBHcDJYaHlJM0tyNWRZckZsVHpQcmxTZHJoZUs1M3lfU2ljRnJaTmNSQ0w0X085OXI0QW80M2xfQnJqZmZ6VEh3TUltX0xzeE42SGtZPQ==
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron-center.net
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
APP_LOGGING_CONSOLE_ENABLED = True
APP_LOGGING_FILE_ENABLED = True
APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP.
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
Service_CLICKUP_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd4ZWVBeHVtRnpIT0VBN0tSZDhLRmFmN05DOVBOelJtLWhkVnJDRVBqUkh3bDFTZFRWaWQ1cWowdGNLUk5IQzlGN1J6RFVCaW8zRnBwLVBnclJfdWgxV3pVRzFEV2lwcW5Rc19Xa1ROWXNJcUF0ajZaYUxOUXk0WHRsRmJLM25FaHV5T2IxdV92ZW1nRjhzaGpwU0l2Wm9FTkRnY2lJVjhuNHUwT29salAxYV8wPQ==
Service_CLICKUP_OAUTH_REDIRECT_URI = http://localhost:8000/api/clickup/auth/connect/callback
# Infomaniak OAuth -- Data App (kDrive + Mail)
Service_INFOMANIAK_DATA_CLIENT_ID = abd71a95-7c67-465a-b7ab-963cc5eccb4b
Service_INFOMANIAK_DATA_CLIENT_SECRET = jwaEZza0VnmAHA1vIQJcpaCC1O4ND6IS0mkQ0GGiVlmof7XHxUcl9YMl7TbtEINz
Service_INFOMANIAK_OAUTH_REDIRECT_URI = http://localhost:8000/api/infomaniak/auth/connect/callback
# Stripe Billing (both end with _SECRET for encryption script)
STRIPE_SECRET_KEY_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5aHNGejgzQmpTdmprdzQxR19KZkh3MlhYUTNseFN3WnlaWjh2SDZyalN6aU9xSktkbUQwUnZrVnlvbGVRQm4yZFdiRU5aSEk5WVJuUnR4VUwtTm9OVk1WWmJQeU5QaDdib0hfVWV5U1BfYTFXRmdoOWdnOWxkb3JFQmF3bm45UjFUVUxmWGtGRkFKUGd6bmhpQlFnaVI3Q2lLdDlsY1VESk1vOEM0ZFBJNW1qcVZ0N2tPYmRLNmVKajZ2M3o3S05lWnRRVG5LdkRseW4wQ3VjNHNQZTZUdz09
STRIPE_WEBHOOK_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5dDJMSHBrVk8wTzJhU2xzTTZCZWdvWmU2NGI2WklfRXRJZVUzaVYyOU9GLUZsalUwa2lPdEgtUHo0dVVvRDU1cy1saHJyU0Rxa2xQZjBuakExQzk3bmxBcU9WbEIxUEtpR1JoUFMxZG9ISGRZUXFhdFpSMGxvQUV3a0VLQllfUUtCOHZwTGdteV9rYTFOazBfSlN3ekNWblFpakJlZVlCTmNkWWQ4Sm01a1RCWTlnTlFHWVA0MkZYMlprUExrWFN2V0NVU1BTd1NKczFJbVo3VHpLdlc4UT09
STRIPE_API_VERSION = 2026-01-28.clover
STRIPE_AUTOMATIC_TAX_ENABLED = false
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
# AI configuration
Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQnBaSnM4TWFRRmxVQmNQblVIYmc1Y0Q3aW9zZUtDWlNWdGZjbFpncGp2NHN2QjkxMWxibUJnZDBId252MWk5TXN3Yk14ajFIdi1CTkx2ZWx2QzF5OFR6LUx5azQ3dnNLaXJBOHNxc0tlWmtZcTFVelF4eXBSM2JkbHd2eTM0VHNXdHNtVUprZWtPVzctNlJsZHNmM20tU1N6Q1Q2cHFYSi1tNlhZNDNabTVuaEVGWmIydEhadTcyMlBURmw2aUJxOF9GTzR0dTZiNGZfOFlHaVpPZ1A1LXhhOEFtN1J5TEVNNWtMcGpyNkMzSl8xRnZsaTF1WTZrOUZmb0cxVURjSGFLS2dIYTQyZEJtTm90bEYxVWxNNXVPdTVjaVhYbXhxT3JsVDM5VjZMVFZKSE1tZnM9
Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpENmFBWG16STFQUVZxNzZZRzRLYTA4X3lRanF1VkF4cU45OExNMzlsQmdISGFxTUxud1dXODBKcFhMVG9KNjdWVnlTTFFROVc3NDlsdlNHLUJXeG41NDBHaXhHR0VHVWl5UW9RNkVWbmlhakRKVW5pM0R4VHk0LUw0TV9LdkljNHdBLXJua21NQkl2b3l4UkVkMGN1YjBrMmJEeWtMay1jbmxrYWJNbUV0aktCXzU1djR2d2RSQXZORTNwcG92ZUVvVGMtQzQzTTVncEZTRGRtZUFIZWQ0dz09
Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5ZmdDZ3hrSElrMnQzNFAtel9wX191VjVzN2g1LWZoa0V1YklubEdmMEJDdEZiR1RWeVZrM3V3enBHX3p6WUtTS0kwYkFyVEF0Nm8zX05CelVQcFJUc0lwVW5iNFczc1p1WWJ2WFBmd0lpLUxxWndEeUh0b2hGUHVpN19vb19nMTBnV1A1VmNpWERVX05lQ29VS20wTjZ3PT0=
Connector_AiTavily_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQTdnUHMwd2pIaXNtMmtCTFREd0pyQXRKb1F5eGtHSnkyOGZiUnlBOFc0b3Vzcndrc3ViRm1nMDJIOEZKYWxqdWNkZGh5N0Z4R0JlQmxXSG5pVnJUR2VYckZhMWNMZ1FNeXJ3enJLVlpiblhOZTNleUg3ZzZyUzRZanFSeDlVMkI=
Connector_AiPrivateLlm_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGRHM5eFdUVmVZU1R1cHBwN1RlMUx4T0NlLTJLUFFVX3J2OElDWFpuZmJHVmp4Z3BNNWMwZUVVZUd2TFhRSjVmVkVlcFlVRWtybXh0ZHloZ01ZcnVvX195YjdlWVdEcjZSWFFTTlNBWUlaTlNoLWhqVFBIb0thVlBiaWhjYjFQOFY=
Connector_AiMistral_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGeEQxYUIxOHhia0JlQWpWQ2dWQWZzY3l6SWwyUnJoR1hRQWloX2lxb2lGNkc4UnA4U2tWNjJaYzB1d1hvNG9fWUp1N3V4OW9FMGhaWVhjSlVwWEc1X2loVDBSZDEtdHdfcTA5QkcxQTR4OHc4RkRzclJrU2d1RFZpNDJkRDRURlE=
Service_MSFT_TENANT_ID = common
# Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0=
# Feature SyncDelta JIRA configuration
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0=
# Teamsbot Browser Bot Service
# For local testing: run the bot locally with `npm run dev` in service-teams-browser-bot
# The bot will connect back to localhost:8000 via WebSocket
TEAMSBOT_BROWSER_BOT_URL = http://localhost:4100
# Debug Configuration
APP_DEBUG_CHAT_WORKFLOW_ENABLED = True
APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True
APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync
# Manadate Pre-Processing Servers
PREPROCESS_ALTHAUS_CHAT_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGbEphQ3ZUMlFMQ2EwSGpoSE9NNzRJNTJtaGk1N0RGakdIYnVVeVFHZmF5OXB3QTVWLVNaZk9wNkhfQkZWRnVwRGRxem9iRzJIWXdpX1NIN2FwSExfT3c9PQ==
# Preprocessor API Configuration
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
# Zurich WFS Parcels (dynamic map layer). Default: Stadt Zürich OGD. Override for full canton if wfs.zh.ch resolves.
# Connector_ZhWfsParcels_WFS_URL = https://wfs.zh.ch/av
# Connector_ZhWfsParcels_TYPENAMES = av_li_liegenschaften_a

View file

@ -51,6 +51,8 @@ Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
Service_CLICKUP_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd4ZWVBeHVtRnpIT0VBN0tSZDhLRmFmN05DOVBOelJtLWhkVnJDRVBqUkh3bDFTZFRWaWQ1cWowdGNLUk5IQzlGN1J6RFVCaW8zRnBwLVBnclJfdWgxV3pVRzFEV2lwcW5Rc19Xa1ROWXNJcUF0ajZaYUxOUXk0WHRsRmJLM25FaHV5T2IxdV92ZW1nRjhzaGpwU0l2Wm9FTkRnY2lJVjhuNHUwT29salAxYV8wPQ== Service_CLICKUP_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd4ZWVBeHVtRnpIT0VBN0tSZDhLRmFmN05DOVBOelJtLWhkVnJDRVBqUkh3bDFTZFRWaWQ1cWowdGNLUk5IQzlGN1J6RFVCaW8zRnBwLVBnclJfdWgxV3pVRzFEV2lwcW5Rc19Xa1ROWXNJcUF0ajZaYUxOUXk0WHRsRmJLM25FaHV5T2IxdV92ZW1nRjhzaGpwU0l2Wm9FTkRnY2lJVjhuNHUwT29salAxYV8wPQ==
Service_CLICKUP_OAUTH_REDIRECT_URI = http://localhost:8000/api/clickup/auth/connect/callback Service_CLICKUP_OAUTH_REDIRECT_URI = http://localhost:8000/api/clickup/auth/connect/callback
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
# Stripe Billing (both end with _SECRET for encryption script) # Stripe Billing (both end with _SECRET for encryption script)
STRIPE_SECRET_KEY_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5aHNGejgzQmpTdmprdzQxR19KZkh3MlhYUTNseFN3WnlaWjh2SDZyalN6aU9xSktkbUQwUnZrVnlvbGVRQm4yZFdiRU5aSEk5WVJuUnR4VUwtTm9OVk1WWmJQeU5QaDdib0hfVWV5U1BfYTFXRmdoOWdnOWxkb3JFQmF3bm45UjFUVUxmWGtGRkFKUGd6bmhpQlFnaVI3Q2lLdDlsY1VESk1vOEM0ZFBJNW1qcVZ0N2tPYmRLNmVKajZ2M3o3S05lWnRRVG5LdkRseW4wQ3VjNHNQZTZUdz09 STRIPE_SECRET_KEY_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5aHNGejgzQmpTdmprdzQxR19KZkh3MlhYUTNseFN3WnlaWjh2SDZyalN6aU9xSktkbUQwUnZrVnlvbGVRQm4yZFdiRU5aSEk5WVJuUnR4VUwtTm9OVk1WWmJQeU5QaDdib0hfVWV5U1BfYTFXRmdoOWdnOWxkb3JFQmF3bm45UjFUVUxmWGtGRkFKUGd6bmhpQlFnaVI3Q2lLdDlsY1VESk1vOEM0ZFBJNW1qcVZ0N2tPYmRLNmVKajZ2M3o3S05lWnRRVG5LdkRseW4wQ3VjNHNQZTZUdz09
STRIPE_WEBHOOK_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5dDJMSHBrVk8wTzJhU2xzTTZCZWdvWmU2NGI2WklfRXRJZVUzaVYyOU9GLUZsalUwa2lPdEgtUHo0dVVvRDU1cy1saHJyU0Rxa2xQZjBuakExQzk3bmxBcU9WbEIxUEtpR1JoUFMxZG9ISGRZUXFhdFpSMGxvQUV3a0VLQllfUUtCOHZwTGdteV9rYTFOazBfSlN3ekNWblFpakJlZVlCTmNkWWQ4Sm01a1RCWTlnTlFHWVA0MkZYMlprUExrWFN2V0NVU1BTd1NKczFJbVo3VHpLdlc4UT09 STRIPE_WEBHOOK_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5dDJMSHBrVk8wTzJhU2xzTTZCZWdvWmU2NGI2WklfRXRJZVUzaVYyOU9GLUZsalUwa2lPdEgtUHo0dVVvRDU1cy1saHJyU0Rxa2xQZjBuakExQzk3bmxBcU9WbEIxUEtpR1JoUFMxZG9ISGRZUXFhdFpSMGxvQUV3a0VLQllfUUtCOHZwTGdteV9rYTFOazBfSlN3ekNWblFpakJlZVlCTmNkWWQ4Sm01a1RCWTlnTlFHWVA0MkZYMlprUExrWFN2V0NVU1BTd1NKczFJbVo3VHpLdlc4UT09

View file

@ -0,0 +1,100 @@
# Integration Environment Configuration
# System Configuration
APP_ENV_TYPE = int
APP_ENV_LABEL = Integration Instance
APP_API_URL = https://gateway-int.poweron-center.net
APP_KEY_SYSVAR = CONFIG_KEY
APP_INIT_PASS_ADMIN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjWm41MWZ4TUZGaVlrX3pWZWNwakJsY3Facm0wLVZDd1VKeTFoZEVZQnItcEdUUnVJS1NXeDBpM2xKbGRsYmxOSmRhc29PZjJSU2txQjdLbUVrTTE1NEJjUXBHbV9NOVJWZUR3QlJkQnJvTEU9
APP_INIT_PASS_EVENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjdmtrakgxa0djekZVNGtTZV8wM2I5UUpCZllveVBMWXROYk5yS3BiV3JEelJSM09VYTRONHpnY3VtMGxDRk5JTEZSRFhtcDZ0RVRmZ1RicTFhb3c5dVZRQ1o4SmlkLVpPTW5MMTU2eTQ0Vkk9
# PostgreSQL DB Host
DB_HOST=gateway-int-server.postgres.database.azure.com
DB_USER=heeshkdlby
DB_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjczYzOUtTa21MMGJVTUQ5UmFfdWc3YlhCbWZOeXFaNEE1QzdJV3BLVjhnalBkLVVCMm5BZzdxdlFXQXc2RHYzLWtPSFZkZE1iWG9rQ1NkVWlpRnF5TURVbnl1cm9iYXlSMGYxd1BGYVc0VDA9
DB_PORT=5432
# Security Configuration
APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZzTEp4aDR0MktWRjNoeVBrY1Npd1R0VE9YVHp3M2w1cXRzbUxNaU82QUJvaDNFeVQyN05KblRWblBvbWtoT0VXbkNBbDQ5OHhwSUFnaDZGRG10Vmgtdm1YUkRsYUhFMzRVZURmSFlDTFIzVWg4MXNueDZyMGc5aVpFdWRxY3dkTExGM093ZTVUZVl5LUhGWnlRPQ==
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net, https://nyla-int.poweron-center.net
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
APP_LOGGING_LOG_DIR = /home/site/wwwroot/
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
APP_LOGGING_CONSOLE_ENABLED = True
APP_LOGGING_FILE_ENABLED = True
APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
Service_CLICKUP_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5SE1uVURMNVE3NkM4cHBKa2R2TjBnLWdpSXI5dHpKWGExZVFiUF95TFNnZ1NwLWFLdmh6eWFZTHVHYTBzU2FGRUpLYkVyM1NvZjZkWDZHN21qUER5ZVNOaGpCc3NrUGd3VnFTclF3OW1nUlVuWXQ1UVhDLVpyb1BwRExOeFpDeVhtbEhDVnd4TVdpbzNBNk5QQWFPdjdza0xBWGxFY1E3WFpCSUlNa1l4RDlBPQ==
Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/clickup/auth/connect/callback
# Infomaniak OAuth -- Data App (kDrive + Mail)
Service_INFOMANIAK_DATA_CLIENT_ID = abd71a95-7c67-465a-b7ab-963cc5eccb4b
Service_INFOMANIAK_DATA_CLIENT_SECRET = jwaEZza0VnmAHA1vIQJcpaCC1O4ND6IS0mkQ0GGiVlmof7XHxUcl9YMl7TbtEINz
Service_INFOMANIAK_OAUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/infomaniak/auth/connect/callback
# Stripe Billing (both end with _SECRET for encryption script)
STRIPE_SECRET_KEY_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5ekdBaGNGVUlOQUpncTlzLWlTV0V5OWZzQkpDczhCUGw4U1JpTHZ0d3pfYlFNWElLRlNiNlNsaDRYTGZUTkg2OUFrTW1GZXpOUjBVbmRQWjN6ekhHd2ZSQ195OHlaeWh1TmxrUm10V2R3YmdncmFLbFMzVjdqcWJMSUJPR2xuSEozclNoZG1rZVBTaWg3OFQ1Qzdxb0wyQ2RKazc2dG1aZXBUTXlvbDZqLS1KOVI5M3BGc3NQZkZRbnFpRjIwWmh2ZHlVNlpxZVo2dWNmMjQ5eW02QmtzUT09
STRIPE_WEBHOOK_SECRET = whsec_2agCQEbDPSOn2C40EJcwoPCqlvaPLF7M
STRIPE_API_VERSION = 2026-01-28.clover
STRIPE_AUTOMATIC_TAX_ENABLED = false
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0
# AI configuration
Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4MENkQ2xJVmE5WFZKUkh2SHJFby1YVXN3ZmVxRkptS3ZWRmlwdU93ZEJjSjlMV2NGbU5mS3NCdmFfcmFYTEJNZXFIQ3ozTWE4ZC1pemlQNk9wbjU1d3BPS0ZCTTZfOF8yWmVXMWx0TU1DamlJLVFhSTJXclZsY3hMVWlPcXVqQWtMdER4T252NHZUWEhUOTdIN1VGR3ltazEweXFqQ0lvb0hYWmxQQnpxb0JwcFNhRDNGWXdoRTVJWm9FalZpTUF5b1RqZlRaYnVKYkp0NWR5Vko1WWJ0Wmg2VWJzYXZ0Z3Q4UkpsTldDX2dsekhKMmM4YjRoa2RwemMwYVQwM2cyMFlvaU5mOTVTWGlROU8xY2ZVRXlxZzJqWkxURWlGZGI2STZNb0NpdEtWUnM9
Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjT1ZlRWVJdVZMT3ljSFJDcFdxRFBRVkZhS204NnN5RDBlQ0tpenhTM0FFVktuWW9mWHNwRWx2dHB0eDBSZ0JFQnZKWlp6c01pVGREWHd1eGpERnU0Q2xhaks1clQ1ZXVsdnd2ZzhpNXNQS1BhY3FjSkdkVEhHalNaRGR4emhpakZncnpDQUVxOHVXQzVUWmtQc0FsYmFwTF9TSG5FOUFtWk5Ick1NcHFvY2s1T1c2WXlRUFFJZnh6TWhuaVpMYmppcDR0QUx0a0R6RXlwbGRYb1R4dzJkUT09
Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQnB5dkd6UkhtU3lhYmZMSlo0bklQZ2s3UTFBSkprZTNwWkg5Q2lVa0wtenhxWXpva21xVDVMRjdKSmhpTmxWS05IUTRoRHdCbktSRVVjcVFnY1RfV0N2S2dyV0dTMlhxQlRFVm41RkFTWVQzQThuVkZwdlNuVC05QlVRVXB6Qjk3akNpYmY1MFR6R1ByMzlIMllRZlRRYVVRN2ZBPT0=
Connector_AiTavily_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkdkJMTDY0akhXNzZDWHVYSEt1cDZoOWEzSktneHZEV2JndTNmWlNSMV9KbFNIZmQzeVlrNE5qUEIwcUlBSGM1a0hOZ3J6djIyOVhnZzI3M1dIUkdicl9FVXF3RGktMmlEYmhnaHJfWTdGUkktSXVUSGdQMC1vSEV6VE8zR2F1SVk=
Connector_AiPrivateLlm_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGSjZ1NWh0aWc1R3Z4MHNaeS1HamtUbndhcUZFZDlqUDhjSmg5eHFfdlVkU0RsVkJ2UVRaMWs3aWhraG5jSlc0YkxNWHVmR2JoSW5ENFFCdkJBM0VienlKSnhzNnBKbTJOUTFKczRfWlQ3bWpmUkRTT1I1OGNUSTlQdExacGRpeXg=
Connector_AiMistral_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGZTNtZ1E4TWIxSEU1OUlreUpxZkJIR0Vxcm9xRHRUbnBxbTQ1cXlkbnltWkJVdTdMYWZ4c3Fsam42TERWUTVhNzZFMU9xVjdyRGFCYml6bmZsZFd2YmJzemlrSWN6Q3o3X0NXX2xXNUQteTNONHdKYzJ5YVpLLWdhU2JhSTJQZnI=
Service_MSFT_TENANT_ID = common
# Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0=
# Feature SyncDelta JIRA configuration
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0=
# Teamsbot Browser Bot Service
TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io
# Debug Configuration
APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
# Manadate Pre-Processing Servers
PREPROCESS_ALTHAUS_CHAT_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4UkNBelhvckxCQUVjZm94N3BZUDcxaEMyckE2dm1lRVhqODhrWU1SUjNXZ3dQZlVJOWhveXFkZXpobW5xT0NneGZ2SkNUblFmYXd0WTBYNTl3UmRnSWc9PQ==
# Preprocessor API Configuration
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss

View file

@ -49,11 +49,13 @@ Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron-center.net/api/go
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4 Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
Service_CLICKUP_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5SE1uVURMNVE3NkM4cHBKa2R2TjBnLWdpSXI5dHpKWGExZVFiUF95TFNnZ1NwLWFLdmh6eWFZTHVHYTBzU2FGRUpLYkVyM1NvZjZkWDZHN21qUER5ZVNOaGpCc3NrUGd3VnFTclF3OW1nUlVuWXQ1UVhDLVpyb1BwRExOeFpDeVhtbEhDVnd4TVdpbzNBNk5QQWFPdjdza0xBWGxFY1E3WFpCSUlNa1l4RDlBPQ== Service_CLICKUP_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5SE1uVURMNVE3NkM4cHBKa2R2TjBnLWdpSXI5dHpKWGExZVFiUF95TFNnZ1NwLWFLdmh6eWFZTHVHYTBzU2FGRUpLYkVyM1NvZjZkWDZHN21qUER5ZVNOaGpCc3NrUGd3VnFTclF3OW1nUlVuWXQ1UVhDLVpyb1BwRExOeFpDeVhtbEhDVnd4TVdpbzNBNk5QQWFPdjdza0xBWGxFY1E3WFpCSUlNa1l4RDlBPQ==
Service_CLICKUP_OAUTH_REDIRECT_URI = http://gateway-int.poweron-center.net/api/clickup/auth/connect/callback Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-int.poweron-center.net/api/clickup/auth/connect/callback
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
# Stripe Billing (both end with _SECRET for encryption script) # Stripe Billing (both end with _SECRET for encryption script)
STRIPE_SECRET_KEY_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5ekdBaGNGVUlOQUpncTlzLWlTV0V5OWZzQkpDczhCUGw4U1JpTHZ0d3pfYlFNWElLRlNiNlNsaDRYTGZUTkg2OUFrTW1GZXpOUjBVbmRQWjN6ekhHd2ZSQ195OHlaeWh1TmxrUm10V2R3YmdncmFLbFMzVjdqcWJMSUJPR2xuSEozclNoZG1rZVBTaWg3OFQ1Qzdxb0wyQ2RKazc2dG1aZXBUTXlvbDZqLS1KOVI5M3BGc3NQZkZRbnFpRjIwWmh2ZHlVNlpxZVo2dWNmMjQ5eW02QmtzUT09 STRIPE_SECRET_KEY_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5ekdBaGNGVUlOQUpncTlzLWlTV0V5OWZzQkpDczhCUGw4U1JpTHZ0d3pfYlFNWElLRlNiNlNsaDRYTGZUTkg2OUFrTW1GZXpOUjBVbmRQWjN6ekhHd2ZSQ195OHlaeWh1TmxrUm10V2R3YmdncmFLbFMzVjdqcWJMSUJPR2xuSEozclNoZG1rZVBTaWg3OFQ1Qzdxb0wyQ2RKazc2dG1aZXBUTXlvbDZqLS1KOVI5M3BGc3NQZkZRbnFpRjIwWmh2ZHlVNlpxZVo2dWNmMjQ5eW02QmtzUT09
STRIPE_WEBHOOK_SECRET = whsec_2agCQEbDPSOn2C40EJcwoPCqlvaPLF7M STRIPE_WEBHOOK_SECRET = INT_ENC:Z0FBQUFBQnA4UXZiUUVqTl9lREVRWTh1aHFDcFpwcXRkOUx4MS1ham9Ddkl6T0xzMnJuM1hhUHdGNG5CenY1MUg4RlJBOGFQTWl5cVd5MjJ2REItcHYyRmdLX3ZlT2p5Z3BRVkMtQnRoTVkteXlfaU92MVBtOEI0Ni1kbGlfa0NiRmFRRXNHLVE2NHI=
STRIPE_API_VERSION = 2026-01-28.clover STRIPE_API_VERSION = 2026-01-28.clover
STRIPE_AUTOMATIC_TAX_ENABLED = false STRIPE_AUTOMATIC_TAX_ENABLED = false
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0 STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0

View file

@ -0,0 +1,101 @@
# Production Environment Configuration
# System Configuration
APP_ENV_TYPE = prod
APP_ENV_LABEL = Production Instance
APP_KEY_SYSVAR = CONFIG_KEY
APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3UnJRV0sySFlDblpXUlREclREaW1WbUt6bGtQYkdrNkZDOXNOLXFua1hqeFF2RHJnRXJ5VlVGV3hOZm41QjZOMlNTb0duYXNxZi05dXVTc2xDVkx0SVBFLUhncVo5T0VUZHE0UTZLWWw3ck09
APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3QVpIY19DQVZSSzJmc2F0VEZvQlU1cHBhTEgxdHdnR3g4eW01aTEzYTUxc1gxTDR1RVVpSHRXYjV6N1BLZUdCUGlfOW1qdy0xSHFVRkNBcGZvaGlSSkZycXRuUllaWnpyVGRoeFg1dGEyNUk9
APP_API_URL = https://gateway-prod.poweron-center.net
# PostgreSQL DB Host
DB_HOST=gateway-prod-server.postgres.database.azure.com
DB_USER=gzxxmcrdhn
DB_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3Y1JScGxjZG9TdUkwaHRzSHZhRHpNcDV3N1U2TnIwZ21PRG5TWFFfR1k0N3BiRk5WelVadjlnXzVSTDZ6NXFQNFpqbnJ1R3dNVkJocm1zVEgtSk0xaDRiR19zNDBEbVIzSk51ekNlQ0Z3b0U9
DB_PORT=5432
# Security Configuration
APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUlV5SVpaWXBNX1hpa2xPZGdMSWpnN2ZINHQxeGZnNHJweU5pZjlyYlY5Qm9zOUZEbl9wUEgtZHZXd1NhR19JSG9kbFU4MnFGQnllbFhRQVphRGQyNHlFVWR5VHQyUUpqN0stUmRuY2QyTi1oalczRHpLTEJqWURjZWs4YjZvT2U5YnFqcXEwdEpxV05fX05QMmtrPQ==
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
APP_LOGGING_LOG_DIR = /home/site/wwwroot/
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
APP_LOGGING_CONSOLE_ENABLED = True
APP_LOGGING_FILE_ENABLED = True
APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/login/callback
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/connect/callback
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/clickup/auth/connect/callback
# Infomaniak OAuth -- Data App (kDrive + Mail)
Service_INFOMANIAK_DATA_CLIENT_ID = abd71a95-7c67-465a-b7ab-963cc5eccb4b
Service_INFOMANIAK_DATA_CLIENT_SECRET = jwaEZza0VnmAHA1vIQJcpaCC1O4ND6IS0mkQ0GGiVlmof7XHxUcl9YMl7TbtEINz
Service_INFOMANIAK_OAUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/infomaniak/auth/connect/callback
# Stripe Billing (both end with _SECRET for encryption script)
STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
STRIPE_API_VERSION = 2026-01-28.clover
STRIPE_AUTOMATIC_TAX_ENABLED = false
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah
# AI configuration
Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3UxLUdwY2hhdGYxRGp4VFJqYXZIcmkxM1ZyOWV2M0Z4MHdFNkVYQ0ROb1d6LUZFUEdvMHhLMEtXYVBCRzM5TlYyY3ROYWtJRk41cDZxd0tYYi00MjVqMTh4QVcyTXl0bmVocEFHbXQwREpwNi1vODdBNmwzazE5bkpNelE2WXpvblIzWlQwbGdEelI2WXFqT1RibXVHcjNWbVhwYzBOM25XTzNmTDAwUjRvYk4yNjIyZHc5c2RSZzREQUFCdUwyb0ZuOXN1dzI2c2FKdXI4NGxEbk92czZWamJXU3ZSbUlLejZjRklRRk4tLV9aVUFZekI2bTU4OHYxNTUybDg3RVo0ZTh6dXNKRW5GNXVackZvcm9laGI0X3R6V3M9
Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09
Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6NG5CTm9QOFZRV1BIVC0tV2RKTGtCQWFOUXlpRnhEdjN1U2x3VUdDamtIZV9CQzQ5ZmRmcUh3ZUVUa0NxbGhlenVVdWtaYjdpcnhvUlNFLXZfOWh2dWFZai0xUGU5cWpuYmpnRVRWakh0RVNUUTFyX0w5V0NXVWFrQlZuOTd5TkI0eVRoQ0ZBSm9HYUlYamoyY1FCMmlBPT0=
Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg=
Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo=
Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGc2tQc2lvMk1YZk01Q1dob1U5cnR0dG03WWE3WkpoOWo0SEpvLU9Rc2lCNDExdy1wZExaN3lpT2FEQkxnaHRmWmZUUUZUUUJmblZreGlpaFpOdnFhbzlEd1RsVVJtX216cmhxTm5BcTN2eUZ2T054cDE5bmlEamJ3NGR6MVpFQnA=
Service_MSFT_TENANT_ID = common
# Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4NFQxaF9uN3h1cVB6dnZid1c1R1VfNDlSQ1NHMEVDZWtKanpMQ29CLXc1MXBqRm1hQ0YtWVhaejBMY1ZTOEFEVlpWQ3hrYkFza1E2RDNsYkdMMndNR0VGNTMwVDRGdURJY3hyaVFxVjEtSEYwNHJzeWM3WmlpZW9jU2E3NTgycEV2allqQ3dJRTNyRFAzaDJ6dklKeXpNRkJhYjFzUkptN2dpbkNpMklrcGxuZl9vTkt3T0JvNm1YTXd5UlkwZWptUXdWVFpnV2J4X3J2WUhIUlFkSElFVnlqMnlJRnNHTnlpMWs2R1dZc2ROWjNYZG85cndmd1E5cUZnVmZRYnVjTG43dXFmSWd2bGFfVWFWSmtpWkpndWNlSUNwcnFNU2NqZXFaV0xsY3l3SElLRkVHcHZGZERKV1ltcGhTS0dhTko1VTJLYzNoZjRkSGVEX3dTMWVVTmdDczV5cE1JQUdSbUJGUm11eFhTVjJHbkt0SzB4UG1Dc2xmbnp1Y041Y2RTeWRuWGdmQy1sTGx0MGtnM2VJQ3EyLXViRlNhTU9ybzZkR1N1bXE5SXhlZENWRFpWSGlYOWx4SUQ3UlR0ZEVxQkxNakRUVFRiUmFnbklOalphLUZkRFVVaXBRUk5NZW5PaUZydTFmQkNPSTdTVTNZd0plWXllNVFJdmN4MVcyTGlwMGFtVjBzOGRxR1FjbzhfYW5zdTB0ZEZBTTJhakltazh1dktNMUZsOUItdFdTb1pIaUxySllXNkdlY20zUS0wTnpFNTB2SU5acG1VcXhyaHBmME8takw3RDh5T043T2VGOV92TzNya2pWSlpYVjZDdXlZcjM3a0hPTlhkaW9oQmxqQlpGRFYyTTY4WmZmT3k4Tk1tdXRuSGdTUVpNT2NKenhXb05PdXBfSEdhMTNxNjdpNXlKUUI2YUgydFFPX1VvXzVJb0UxWTU2YVNiNDQ0QndZanhMMHR1cGdHWGhvcEg1QXEtSXZJdTdZUE12ZEVVWkF4QmtsQS1GYnY3SFIxSHlsOGVfcEpGS1A4QUVEQWNEOFZYYlljQ3ByTU03YU16Y0UzUnJQZEprSWNjT1ZXVEtDWi03Y3ZzRVdYUTlabXJISEo5THRHVXVuM0xqbzA4bGVlZVpOMk1QMmptb21tV0pTMlVoOXdWVU95UW1iQmttc2w1RG9mMWwxXzg1T2IxYUVmTUJEZkpUdTFDTzZ3RlBFeUFiX01iRTZNWkNaSG45TkFOM2pzbUJRZ2N0VFpoejJUTG1RODY3TzZpSzVkYUQzaEpfY2pSTkRzU0VpanlkdXVQQmJ2WU5peno4QWNLTDVxZTlhSHI3NnNiM0k0Y3JkQ0xaOU05bGtsQl8zQklvaktWSDZ4aVp2MHlYelJuUDJyTU9CZC1OZjJxNFc1dDcwSUlxaVh1LTMyWWFwU0IwUU9kOUFpMWpnOERtLTh1VmJiNGVwcXBMbU5fMjVZc0hFbmxQT2puSFd1ZGpyTkphLU5sVlBZWWxrWEZrWGJQWmVkN19tZFZfZ1l1V3pSWlA0V0ZxM2lrWnl2NU9WeTdCbDROSmhfeENKTFhMVXk1d195S2JMUFJoRXZjcVo4V2g0MTNKRnZhUE1wRkNPM3FZOGdVazJPeW5PSGpuZnFGTTdJMkRnam5rUlV6NFlqODlIelRYaEN5VjdJNnVwbllNODNCTFRHMWlXbmM1VlRxbXB3Wm9LRjVrQUpjYzRNMThUMWwwSVhBMUlyamtPZnE4R0o4bEdHay1zMjR5RDJkZ1lYRHZaNHVHU2otR3ZpN25LZlEySEU0UmdTNzJGVHNWQXMyb0dVMV9WUE13ODhZWUFaakxGOWZieGNXZkNYRnV5djEyWTZLcmdrajRBLU1rS1Z0VVRkOWlDMU9fMGVmYXFhZXJGMUhpNkdmb2hkbzZ1OWV6VlNmVzNISjVYTFh6SjJNdWR5MWZidE8yVEo2dnRrZXhMRXBPczUwTG13OGhNUVpIQm0zQmRKRnJ0Nl8wNW1Ob0dHRDVpU0NWREV3TkY2SjktdVBkMFU1ZXBmSFpHQ3FHNTRZdTJvaExpZVEtLTU4YTVyeFBpNDdEajZtWUc4c1dBeUJqQ3NIY1NLS0FIMUxGZzZxNFNkOG9ORGNHWWJCVnZuNnJVTEtoQi1mRTZyUl81ZWJJMi1KOGdERzBhNVRZeHRYUUlqY2JvMFlaNHhWMU9pWFFiZjdaLUhkaG15TTBPZVlkS2R5UVdENTI4QVFiY1RJV0ZNZnlpVWxfZmlnN1BXbGdrbjFGUkhzYl9qeHBxVVJacUE4bjZETENHVFpSamh0NVpOM2hMYTZjYzBuS3J0a3hhZGxSM1V5UHd2OTU3ZHY0Yy1xWDBkWUk0Ymp0MWVrS3YzSktKODhQZnY3QTZ1Wm1VZkZJbS1jamdreks1ZlhpQjFOUDFiOHJ2Nm9NcmdTdU5LQXV2RkZWZEFNZnVKUjVwcVY3dDdhQnpmRVJ6SmlvVXpDM0ZiYXh5bGE2X04tTE9qZ3BiTnN3TF9ZaFRxSUpjNjB1dXZBcy1TZHRHTjFjSUR3WUl4cE9VNzB5Rkk4U3Z1SVZYTl9sYXlZVk83UnFrMlVmcnBpam9lRUlCY19DdVJwOXl2TVVDV1pMRFZTZk9MY3Z1eXA0MnhGazc5YllQaWtOeTc4NjlOa2lGY05RRzY1cG9nbGpYelc4c3FicWxWRkg0YzRSamFlQ19zOU14YWJreU9pNDREZVJ3a0REMUxGTzF1XzI1bEF3VXVZRjlBeWFiLXJsOXgza3VZem1WckhWSnVNbDBNcldadU8xQ3RwOTl5NGgtVlR0QklCLWl5WkE4V1FlQTBCOVU1RE9sQlRrYUNZOGdfUmEwbEZvUTFGUEFWVmQ4V1FhOU9VNjZqemRpZm1sUDhZQTJ0YVBRbWZldkF5THV4QXpfdUtNZ0tlcGdSRFM3c0lDOTNQbnBxdmxYYWNpTmI3MW9BMlZIdTQ5RldudHpNQWQ5NDNPLVVTLXVVNzdHZXh4UXpZa3dVa2J4dTFDV1RkYjRnWXU2M3lJekRYWGNMcWU5OVh6U2xZWDh6MmpqcnpiOHlnMjA5S3RFQm1NZjNSM21adkVnTUpSYVhkTzNkNnJCTmljY0x1cl9kMkx3UHhySjZEdHREanZERzNEUTFlTkR0NWlBczAtdmFGTjdZNVpTMlkxV2czYW5RN2lqemg4eUViZDV6RjdKNXdFcUlvcVhoNkJ6eVJkR1pua1hnNzQwOEs2TXJYSlpGcW9qRDU2QjBOWFFtdXBJRkRKbmdZUF9ZSmRPVEtvUjVhLTV1NjdXQjRhS0duaEtJb2FrQnNjUTRvdFMxdkdTNk1NYlFHUFhhYTJ1eUN3WHN4UlJ4UjdrZjY0SzFGYWVFN1k0cGJnc1RjNmFUenR4NHljbVhablZSWHZmUVN3cXRHNjhsX1BSZWEzdTJUZFA0S2pTaU9YMnZIQ1ZPcGhWMFJqZkVEMWRMR1h3SnU0Z2FzZ3VGM3puNzdhVjhaQXNIWHFsbjB0TDVYSFdSNV9rdWhUUUhSZHBGYkJIVDB5SDdlMC13QTVnS0g5Qkg5RGNxSGJlelVndUhPcEQ0QkRKMTJTZUM1OXJhVm0zYjU0OVY2dk9MQVBheklIQXpVNW9Yc0ROVjEzaFZTWmVxYlBWMlNlSzladzJ6TmNuMG5FVVZkN1VZN1pfS2ZHa0lQcE80S24wSnQtVlJVV09OVWJ3M09YMkZpV2ktVF9ENHhKU2dfYUQ2aUVyamk0VHJHQmVfVHU4clpUTFoteW5aSWRPV1M0RDRMTms4NGRoYmJfVE82aUl2X3VieVJOdDhBQmRwdzdnRTVBNzZwaW93dUlZb3ZRYUtOeG9ULWxvNVp5a0haSjdkcUhRb3d6UGIxRUpCVkVYX2d6TkRqQVozUWxkNGFoc1FXYVd2YWNkME9Qclo0bjYxMFRWTy1nbnI5NTBJNzRMMDluUXRKYTFqQUN4d0d5aHVlamN3Tkk3NWJXeXR0TW9BeUg5Vnp4Q2RnZUY3b3AtMDlrNmlrSGR0eGRtbUdUd2lFRWg4MklEeWJHN2wwZEpVSXMxNDNOWjRFS0tPdWxhMmFCckhfRENIY184aEFDZXNrRDl2dHQtQW12UnRuQXJjaDJoTUpiYkNWQUtfRG9GMUZoNWM4UnBYZ29RWWs2NHcyUm5kdTF3Vk1GeFpiRUJLaVZ2UGFjbi1jV3lMV0N2ZDl4VERPN295X01NNG56ZjZkRzZoYUtmY1E5NlVXemx2SnVfb19iSXg0R2M3Mjd1a2JRPT0=
# Feature SyncDelta JIRA configuration
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4d3Z4d2x6N1FhUktMU0RKbkxfY2pTQkRzXzJ6UXVEbDNCaFM3UHMtQVFGYzNmYWs4N0lMM1R2SFJuZTVFVmx6MGVEbXc5U3NOTnY1TWN0ZDNaamlHQWloalM3VldmREJNSHQ1TlVkSVFJMTVhQWVGSVRMTGw4UTBqNGlQZFVuaHp4WUlKemR5UnBXZlh0REJFLXJ4ejR3PT0=
# Teamsbot Browser Bot Service
TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io
# Debug Configuration
APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
# Manadate Pre-Processing Servers
PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
# Preprocessor API Configuration
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss

View file

@ -51,6 +51,8 @@ Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ== Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/clickup/auth/connect/callback Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/clickup/auth/connect/callback
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
# Stripe Billing (both end with _SECRET for encryption script) # Stripe Billing (both end with _SECRET for encryption script)
STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09 STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08= STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=

View file

@ -0,0 +1,101 @@
# Production Environment Configuration
# System Configuration
APP_ENV_TYPE = prod
APP_ENV_LABEL = Production Instance Forgejo
APP_KEY_SYSVAR = /srv/gateway/shared/secrets/master_key.txt
APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3UnJRV0sySFlDblpXUlREclREaW1WbUt6bGtQYkdrNkZDOXNOLXFua1hqeFF2RHJnRXJ5VlVGV3hOZm41QjZOMlNTb0duYXNxZi05dXVTc2xDVkx0SVBFLUhncVo5T0VUZHE0UTZLWWw3ck09
APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3QVpIY19DQVZSSzJmc2F0VEZvQlU1cHBhTEgxdHdnR3g4eW01aTEzYTUxc1gxTDR1RVVpSHRXYjV6N1BLZUdCUGlfOW1qdy0xSHFVRkNBcGZvaGlSSkZycXRuUllaWnpyVGRoeFg1dGEyNUk9
APP_API_URL = https://api.poweron.swiss
# PostgreSQL DB Host
DB_HOST=10.20.0.21
DB_USER=poweron_dev
DB_PASSWORD_SECRET = mypassword
DB_PORT=5432
# Security Configuration
APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUlV5SVpaWXBNX1hpa2xPZGdMSWpnN2ZINHQxeGZnNHJweU5pZjlyYlY5Qm9zOUZEbl9wUEgtZHZXd1NhR19JSG9kbFU4MnFGQnllbFhRQVphRGQyNHlFVWR5VHQyUUpqN0stUmRuY2QyTi1oalczRHpLTEJqWURjZWs4YjZvT2U5YnFqcXEwdEpxV05fX05QMmtrPQ==
APP_TOKEN_EXPIRY=300
# CORS Configuration
APP_ALLOWED_ORIGINS=https://porta.poweron.swiss
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
APP_LOGGING_LOG_DIR = srv/gateway/shared/logs
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
APP_LOGGING_CONSOLE_ENABLED = True
APP_LOGGING_FILE_ENABLED = True
APP_LOGGING_ROTATION_SIZE = 10485760
APP_LOGGING_BACKUP_COUNT = 5
# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
Service_MSFT_AUTH_REDIRECT_URI=https://api.poweron.swiss/api/msft/auth/login/callback
Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback
Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
Service_GOOGLE_AUTH_REDIRECT_URI =
Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
Service_GOOGLE_DATA_REDIRECT_URI =
# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
Service_CLICKUP_OAUTH_REDIRECT_URI = https://api.poweron.swiss/api/clickup/auth/connect/callback
# Infomaniak OAuth -- Data App (kDrive + Mail)
Service_INFOMANIAK_DATA_CLIENT_ID = abd71a95-7c67-465a-b7ab-963cc5eccb4b
Service_INFOMANIAK_DATA_CLIENT_SECRET = jwaEZza0VnmAHA1vIQJcpaCC1O4ND6IS0mkQ0GGiVlmof7XHxUcl9YMl7TbtEINz
Service_INFOMANIAK_OAUTH_REDIRECT_URI = https://api.poweron.swiss/api/infomaniak/auth/connect/callback
# Stripe Billing (both end with _SECRET for encryption script)
STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=
STRIPE_API_VERSION = 2026-01-28.clover
STRIPE_AUTOMATIC_TAX_ENABLED = false
STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah
# AI configuration
Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3UxLUdwY2hhdGYxRGp4VFJqYXZIcmkxM1ZyOWV2M0Z4MHdFNkVYQ0ROb1d6LUZFUEdvMHhLMEtXYVBCRzM5TlYyY3ROYWtJRk41cDZxd0tYYi00MjVqMTh4QVcyTXl0bmVocEFHbXQwREpwNi1vODdBNmwzazE5bkpNelE2WXpvblIzWlQwbGdEelI2WXFqT1RibXVHcjNWbVhwYzBOM25XTzNmTDAwUjRvYk4yNjIyZHc5c2RSZzREQUFCdUwyb0ZuOXN1dzI2c2FKdXI4NGxEbk92czZWamJXU3ZSbUlLejZjRklRRk4tLV9aVUFZekI2bTU4OHYxNTUybDg3RVo0ZTh6dXNKRW5GNXVackZvcm9laGI0X3R6V3M9
Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09
Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6NG5CTm9QOFZRV1BIVC0tV2RKTGtCQWFOUXlpRnhEdjN1U2x3VUdDamtIZV9CQzQ5ZmRmcUh3ZUVUa0NxbGhlenVVdWtaYjdpcnhvUlNFLXZfOWh2dWFZai0xUGU5cWpuYmpnRVRWakh0RVNUUTFyX0w5V0NXVWFrQlZuOTd5TkI0eVRoQ0ZBSm9HYUlYamoyY1FCMmlBPT0=
Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg=
Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo=
Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGc2tQc2lvMk1YZk01Q1dob1U5cnR0dG03WWE3WkpoOWo0SEpvLU9Rc2lCNDExdy1wZExaN3lpT2FEQkxnaHRmWmZUUUZUUUJmblZreGlpaFpOdnFhbzlEd1RsVVJtX216cmhxTm5BcTN2eUZ2T054cDE5bmlEamJ3NGR6MVpFQnA=
Service_MSFT_TENANT_ID = common
# Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4NFQxaF9uN3h1cVB6dnZid1c1R1VfNDlSQ1NHMEVDZWtKanpMQ29CLXc1MXBqRm1hQ0YtWVhaejBMY1ZTOEFEVlpWQ3hrYkFza1E2RDNsYkdMMndNR0VGNTMwVDRGdURJY3hyaVFxVjEtSEYwNHJzeWM3WmlpZW9jU2E3NTgycEV2allqQ3dJRTNyRFAzaDJ6dklKeXpNRkJhYjFzUkptN2dpbkNpMklrcGxuZl9vTkt3T0JvNm1YTXd5UlkwZWptUXdWVFpnV2J4X3J2WUhIUlFkSElFVnlqMnlJRnNHTnlpMWs2R1dZc2ROWjNYZG85cndmd1E5cUZnVmZRYnVjTG43dXFmSWd2bGFfVWFWSmtpWkpndWNlSUNwcnFNU2NqZXFaV0xsY3l3SElLRkVHcHZGZERKV1ltcGhTS0dhTko1VTJLYzNoZjRkSGVEX3dTMWVVTmdDczV5cE1JQUdSbUJGUm11eFhTVjJHbkt0SzB4UG1Dc2xmbnp1Y041Y2RTeWRuWGdmQy1sTGx0MGtnM2VJQ3EyLXViRlNhTU9ybzZkR1N1bXE5SXhlZENWRFpWSGlYOWx4SUQ3UlR0ZEVxQkxNakRUVFRiUmFnbklOalphLUZkRFVVaXBRUk5NZW5PaUZydTFmQkNPSTdTVTNZd0plWXllNVFJdmN4MVcyTGlwMGFtVjBzOGRxR1FjbzhfYW5zdTB0ZEZBTTJhakltazh1dktNMUZsOUItdFdTb1pIaUxySllXNkdlY20zUS0wTnpFNTB2SU5acG1VcXhyaHBmME8takw3RDh5T043T2VGOV92TzNya2pWSlpYVjZDdXlZcjM3a0hPTlhkaW9oQmxqQlpGRFYyTTY4WmZmT3k4Tk1tdXRuSGdTUVpNT2NKenhXb05PdXBfSEdhMTNxNjdpNXlKUUI2YUgydFFPX1VvXzVJb0UxWTU2YVNiNDQ0QndZanhMMHR1cGdHWGhvcEg1QXEtSXZJdTdZUE12ZEVVWkF4QmtsQS1GYnY3SFIxSHlsOGVfcEpGS1A4QUVEQWNEOFZYYlljQ3ByTU03YU16Y0UzUnJQZEprSWNjT1ZXVEtDWi03Y3ZzRVdYUTlabXJISEo5THRHVXVuM0xqbzA4bGVlZVpOMk1QMmptb21tV0pTMlVoOXdWVU95UW1iQmttc2w1RG9mMWwxXzg1T2IxYUVmTUJEZkpUdTFDTzZ3RlBFeUFiX01iRTZNWkNaSG45TkFOM2pzbUJRZ2N0VFpoejJUTG1RODY3TzZpSzVkYUQzaEpfY2pSTkRzU0VpanlkdXVQQmJ2WU5peno4QWNLTDVxZTlhSHI3NnNiM0k0Y3JkQ0xaOU05bGtsQl8zQklvaktWSDZ4aVp2MHlYelJuUDJyTU9CZC1OZjJxNFc1dDcwSUlxaVh1LTMyWWFwU0IwUU9kOUFpMWpnOERtLTh1VmJiNGVwcXBMbU5fMjVZc0hFbmxQT2puSFd1ZGpyTkphLU5sVlBZWWxrWEZrWGJQWmVkN19tZFZfZ1l1V3pSWlA0V0ZxM2lrWnl2NU9WeTdCbDROSmhfeENKTFhMVXk1d195S2JMUFJoRXZjcVo4V2g0MTNKRnZhUE1wRkNPM3FZOGdVazJPeW5PSGpuZnFGTTdJMkRnam5rUlV6NFlqODlIelRYaEN5VjdJNnVwbllNODNCTFRHMWlXbmM1VlRxbXB3Wm9LRjVrQUpjYzRNMThUMWwwSVhBMUlyamtPZnE4R0o4bEdHay1zMjR5RDJkZ1lYRHZaNHVHU2otR3ZpN25LZlEySEU0UmdTNzJGVHNWQXMyb0dVMV9WUE13ODhZWUFaakxGOWZieGNXZkNYRnV5djEyWTZLcmdrajRBLU1rS1Z0VVRkOWlDMU9fMGVmYXFhZXJGMUhpNkdmb2hkbzZ1OWV6VlNmVzNISjVYTFh6SjJNdWR5MWZidE8yVEo2dnRrZXhMRXBPczUwTG13OGhNUVpIQm0zQmRKRnJ0Nl8wNW1Ob0dHRDVpU0NWREV3TkY2SjktdVBkMFU1ZXBmSFpHQ3FHNTRZdTJvaExpZVEtLTU4YTVyeFBpNDdEajZtWUc4c1dBeUJqQ3NIY1NLS0FIMUxGZzZxNFNkOG9ORGNHWWJCVnZuNnJVTEtoQi1mRTZyUl81ZWJJMi1KOGdERzBhNVRZeHRYUUlqY2JvMFlaNHhWMU9pWFFiZjdaLUhkaG15TTBPZVlkS2R5UVdENTI4QVFiY1RJV0ZNZnlpVWxfZmlnN1BXbGdrbjFGUkhzYl9qeHBxVVJacUE4bjZETENHVFpSamh0NVpOM2hMYTZjYzBuS3J0a3hhZGxSM1V5UHd2OTU3ZHY0Yy1xWDBkWUk0Ymp0MWVrS3YzSktKODhQZnY3QTZ1Wm1VZkZJbS1jamdreks1ZlhpQjFOUDFiOHJ2Nm9NcmdTdU5LQXV2RkZWZEFNZnVKUjVwcVY3dDdhQnpmRVJ6SmlvVXpDM0ZiYXh5bGE2X04tTE9qZ3BiTnN3TF9ZaFRxSUpjNjB1dXZBcy1TZHRHTjFjSUR3WUl4cE9VNzB5Rkk4U3Z1SVZYTl9sYXlZVk83UnFrMlVmcnBpam9lRUlCY19DdVJwOXl2TVVDV1pMRFZTZk9MY3Z1eXA0MnhGazc5YllQaWtOeTc4NjlOa2lGY05RRzY1cG9nbGpYelc4c3FicWxWRkg0YzRSamFlQ19zOU14YWJreU9pNDREZVJ3a0REMUxGTzF1XzI1bEF3VXVZRjlBeWFiLXJsOXgza3VZem1WckhWSnVNbDBNcldadU8xQ3RwOTl5NGgtVlR0QklCLWl5WkE4V1FlQTBCOVU1RE9sQlRrYUNZOGdfUmEwbEZvUTFGUEFWVmQ4V1FhOU9VNjZqemRpZm1sUDhZQTJ0YVBRbWZldkF5THV4QXpfdUtNZ0tlcGdSRFM3c0lDOTNQbnBxdmxYYWNpTmI3MW9BMlZIdTQ5RldudHpNQWQ5NDNPLVVTLXVVNzdHZXh4UXpZa3dVa2J4dTFDV1RkYjRnWXU2M3lJekRYWGNMcWU5OVh6U2xZWDh6MmpqcnpiOHlnMjA5S3RFQm1NZjNSM21adkVnTUpSYVhkTzNkNnJCTmljY0x1cl9kMkx3UHhySjZEdHREanZERzNEUTFlTkR0NWlBczAtdmFGTjdZNVpTMlkxV2czYW5RN2lqemg4eUViZDV6RjdKNXdFcUlvcVhoNkJ6eVJkR1pua1hnNzQwOEs2TXJYSlpGcW9qRDU2QjBOWFFtdXBJRkRKbmdZUF9ZSmRPVEtvUjVhLTV1NjdXQjRhS0duaEtJb2FrQnNjUTRvdFMxdkdTNk1NYlFHUFhhYTJ1eUN3WHN4UlJ4UjdrZjY0SzFGYWVFN1k0cGJnc1RjNmFUenR4NHljbVhablZSWHZmUVN3cXRHNjhsX1BSZWEzdTJUZFA0S2pTaU9YMnZIQ1ZPcGhWMFJqZkVEMWRMR1h3SnU0Z2FzZ3VGM3puNzdhVjhaQXNIWHFsbjB0TDVYSFdSNV9rdWhUUUhSZHBGYkJIVDB5SDdlMC13QTVnS0g5Qkg5RGNxSGJlelVndUhPcEQ0QkRKMTJTZUM1OXJhVm0zYjU0OVY2dk9MQVBheklIQXpVNW9Yc0ROVjEzaFZTWmVxYlBWMlNlSzladzJ6TmNuMG5FVVZkN1VZN1pfS2ZHa0lQcE80S24wSnQtVlJVV09OVWJ3M09YMkZpV2ktVF9ENHhKU2dfYUQ2aUVyamk0VHJHQmVfVHU4clpUTFoteW5aSWRPV1M0RDRMTms4NGRoYmJfVE82aUl2X3VieVJOdDhBQmRwdzdnRTVBNzZwaW93dUlZb3ZRYUtOeG9ULWxvNVp5a0haSjdkcUhRb3d6UGIxRUpCVkVYX2d6TkRqQVozUWxkNGFoc1FXYVd2YWNkME9Qclo0bjYxMFRWTy1nbnI5NTBJNzRMMDluUXRKYTFqQUN4d0d5aHVlamN3Tkk3NWJXeXR0TW9BeUg5Vnp4Q2RnZUY3b3AtMDlrNmlrSGR0eGRtbUdUd2lFRWg4MklEeWJHN2wwZEpVSXMxNDNOWjRFS0tPdWxhMmFCckhfRENIY184aEFDZXNrRDl2dHQtQW12UnRuQXJjaDJoTUpiYkNWQUtfRG9GMUZoNWM4UnBYZ29RWWs2NHcyUm5kdTF3Vk1GeFpiRUJLaVZ2UGFjbi1jV3lMV0N2ZDl4VERPN295X01NNG56ZjZkRzZoYUtmY1E5NlVXemx2SnVfb19iSXg0R2M3Mjd1a2JRPT0=
# Feature SyncDelta JIRA configuration
Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4d3Z4d2x6N1FhUktMU0RKbkxfY2pTQkRzXzJ6UXVEbDNCaFM3UHMtQVFGYzNmYWs4N0lMM1R2SFJuZTVFVmx6MGVEbXc5U3NOTnY1TWN0ZDNaamlHQWloalM3VldmREJNSHQ1TlVkSVFJMTVhQWVGSVRMTGw4UTBqNGlQZFVuaHp4WUlKemR5UnBXZlh0REJFLXJ4ejR3PT0=
# Teamsbot Browser Bot Service
TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io
# Debug Configuration
APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
# Manadate Pre-Processing Servers
PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
# Preprocessor API Configuration
PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
# Azure Communication Services Email Configuration
MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss

View file

@ -11,7 +11,7 @@ APP_API_URL = https://api.poweron.swiss
# PostgreSQL DB Host # PostgreSQL DB Host
DB_HOST=10.20.0.21 DB_HOST=10.20.0.21
DB_USER=poweron_dev DB_USER=poweron_dev
DB_PASSWORD_SECRET = mypassword DB_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQnA4UXZiMnRoUzVlbVRLX3JTRl94cVpMaURtMndZVmFBYXdvdnIxLV81dWwxWmhmcUlCMUFZbDhRT2NsQmNqSl9ZMmRWRVN1Y2JqNlVwOXRJY1VBTm1oSjNiaFE9PQ==
DB_PORT=5432 DB_PORT=5432
# Security Configuration # Security Configuration
@ -51,6 +51,8 @@ Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ== Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ==
Service_CLICKUP_OAUTH_REDIRECT_URI = https://api.poweron.swiss/api/clickup/auth/connect/callback Service_CLICKUP_OAUTH_REDIRECT_URI = https://api.poweron.swiss/api/clickup/auth/connect/callback
# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI.
# Stripe Billing (both end with _SECRET for encryption script) # Stripe Billing (both end with _SECRET for encryption script)
STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09 STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09
STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08= STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08=

View file

@ -13,6 +13,35 @@ from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingMode
# Configure logger # Configure logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _supportsCustomTemperature(modelName: str) -> bool:
"""Check whether an Anthropic model accepts a custom ``temperature``.
Anthropic's Extended-Thinking models (Claude 4.7 Opus and the
upcoming 4.7 Sonnet/Haiku, plus all 5.x and beyond) reject every
``temperature`` value with HTTP 400
``{"error": "`temperature` is deprecated for this model."}`` --
only the model's internal default is accepted. Older Claude 4.5 /
4.6 models still accept any value in [0, 1].
Returns:
True if ``temperature`` may be sent; False if it must be omitted.
"""
if not modelName:
return True
name = modelName.lower()
if name.startswith("claude-opus-4-7"):
return False
if name.startswith("claude-sonnet-4-7"):
return False
if name.startswith("claude-haiku-4-7"):
return False
# 5.x and beyond: same Extended-Thinking family, no custom temperature.
if name.startswith("claude-opus-5") or name.startswith("claude-sonnet-5") or name.startswith("claude-haiku-5"):
return False
return True
def loadConfigData(): def loadConfigData():
"""Load configuration data for Anthropic connector""" """Load configuration data for Anthropic connector"""
return { return {
@ -276,9 +305,12 @@ class AiAnthropic(BaseConnectorAi):
payload: Dict[str, Any] = { payload: Dict[str, Any] = {
"model": model.name, "model": model.name,
"messages": converted_messages, "messages": converted_messages,
"temperature": temperature,
} }
# Extended-Thinking models (claude-opus-4-7 etc.) reject any
# `temperature` value -- only the model default is accepted.
if _supportsCustomTemperature(model.name):
payload["temperature"] = temperature
# Anthropic requires max_tokens - use provided value or throw error # Anthropic requires max_tokens - use provided value or throw error
if maxTokens is None: if maxTokens is None:
raise ValueError("maxTokens must be provided for Anthropic API calls") raise ValueError("maxTokens must be provided for Anthropic API calls")
@ -381,10 +413,11 @@ class AiAnthropic(BaseConnectorAi):
payload: Dict[str, Any] = { payload: Dict[str, Any] = {
"model": model.name, "model": model.name,
"messages": converted, "messages": converted,
"temperature": temperature,
"max_tokens": model.maxTokens, "max_tokens": model.maxTokens,
"stream": True, "stream": True,
} }
if _supportsCustomTemperature(model.name):
payload["temperature"] = temperature
if system_prompt: if system_prompt:
payload["system"] = system_prompt payload["system"] = system_prompt
if modelCall.tools: if modelCall.tools:
@ -608,10 +641,10 @@ class AiAnthropic(BaseConnectorAi):
if systemPrompt: if systemPrompt:
payload["system"] = systemPrompt payload["system"] = systemPrompt
# Set temperature from model if _supportsCustomTemperature(model.name):
payload["temperature"] = temperature payload["temperature"] = temperature
# Make API call with headers from httpClient (which includes anthropic-version) # Make API call with headers from httpClient (which includes anthropic-version)
response = await self.httpClient.post( response = await self.httpClient.post(
"https://api.anthropic.com/v1/messages", "https://api.anthropic.com/v1/messages",

View file

@ -11,6 +11,30 @@ from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingMode
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _supportsCustomTemperature(modelName: str) -> bool:
"""Check whether an OpenAI model accepts a custom `temperature` value.
GPT-5.x and the o-series (o1/o3/o4) reasoning models reject every
`temperature` value other than the default (1) with HTTP 400
`unsupported_value`. For these models we must omit `temperature`
from the payload entirely. Older chat-completions models
(gpt-4o, gpt-4o-mini, gpt-4.1, gpt-3.5-*) still accept any value
in [0, 2].
Returns:
True if `temperature` may be sent; False if it must be omitted.
"""
if not modelName:
return True
name = modelName.lower()
if name.startswith("gpt-5"):
return False
if name.startswith("o1") or name.startswith("o3") or name.startswith("o4"):
return False
return True
def loadConfigData(): def loadConfigData():
"""Load configuration data for OpenAI connector""" """Load configuration data for OpenAI connector"""
return { return {
@ -344,14 +368,18 @@ class AiOpenai(BaseConnectorAi):
payload = { payload = {
"model": model.name, "model": model.name,
"messages": messages, "messages": messages,
"temperature": temperature,
# Universal output-length cap. `max_tokens` is deprecated and # Universal output-length cap. `max_tokens` is deprecated and
# rejected outright by gpt-5.x / o-series; `max_completion_tokens` # rejected outright by gpt-5.x / o-series; `max_completion_tokens`
# is accepted by every current chat-completions model (legacy # is accepted by every current chat-completions model (legacy
# gpt-4o, gpt-4.1, gpt-5.x, o1/o3/o4) per OpenAI API reference. # gpt-4o, gpt-4.1, gpt-5.x, o1/o3/o4) per OpenAI API reference.
"max_completion_tokens": maxTokens "max_completion_tokens": maxTokens
} }
# gpt-5.x and o-series only accept the default temperature (1) and
# return HTTP 400 `unsupported_value` for anything else - omit the
# field entirely for those models.
if _supportsCustomTemperature(model.name):
payload["temperature"] = temperature
if modelCall.tools: if modelCall.tools:
payload["tools"] = modelCall.tools payload["tools"] = modelCall.tools
payload["tool_choice"] = modelCall.toolChoice or "auto" payload["tool_choice"] = modelCall.toolChoice or "auto"
@ -428,13 +456,15 @@ class AiOpenai(BaseConnectorAi):
payload: Dict[str, Any] = { payload: Dict[str, Any] = {
"model": model.name, "model": model.name,
"messages": messages, "messages": messages,
"temperature": temperature,
# See callAiBasic for the rationale: `max_completion_tokens` # See callAiBasic for the rationale: `max_completion_tokens`
# is the universal output-length parameter; `max_tokens` is # is the universal output-length parameter; `max_tokens` is
# deprecated and rejected by gpt-5.x / o-series. # deprecated and rejected by gpt-5.x / o-series.
"max_completion_tokens": model.maxTokens, "max_completion_tokens": model.maxTokens,
"stream": True, "stream": True,
} }
if _supportsCustomTemperature(model.name):
payload["temperature"] = temperature
if modelCall.tools: if modelCall.tools:
payload["tools"] = modelCall.tools payload["tools"] = modelCall.tools
payload["tool_choice"] = modelCall.toolChoice or "auto" payload["tool_choice"] = modelCall.toolChoice or "auto"
@ -585,15 +615,15 @@ class AiOpenai(BaseConnectorAi):
# Use the messages directly - they should already contain the image data # Use the messages directly - they should already contain the image data
# in the format: {"type": "image_url", "image_url": {"url": "data:...base64,..."}} # in the format: {"type": "image_url", "image_url": {"url": "data:...base64,..."}}
# Use parameters from model
temperature = model.temperature temperature = model.temperature
# Don't set maxTokens - let the model use its full context length # Don't set maxTokens - let the model use its full context length
payload = { payload = {
"model": model.name, "model": model.name,
"messages": messages, "messages": messages,
"temperature": temperature
} }
if _supportsCustomTemperature(model.name):
payload["temperature"] = temperature
response = await self.httpClient.post( response = await self.httpClient.post(
model.apiUrl, model.apiUrl,

View file

@ -9,13 +9,15 @@ googleAuthScopes = [
"https://www.googleapis.com/auth/userinfo.profile", "https://www.googleapis.com/auth/userinfo.profile",
] ]
# Google — Data app (Gmail + Drive + identity for token responses) # Google — Data app (Gmail + Drive + Calendar + Contacts + identity for token responses)
googleDataScopes = [ googleDataScopes = [
"openid", "openid",
"https://www.googleapis.com/auth/userinfo.email", "https://www.googleapis.com/auth/userinfo.email",
"https://www.googleapis.com/auth/userinfo.profile", "https://www.googleapis.com/auth/userinfo.profile",
"https://www.googleapis.com/auth/gmail.readonly", "https://www.googleapis.com/auth/gmail.readonly",
"https://www.googleapis.com/auth/drive.readonly", "https://www.googleapis.com/auth/drive.readonly",
"https://www.googleapis.com/auth/calendar.readonly",
"https://www.googleapis.com/auth/contacts.readonly",
] ]
# Microsoft — Auth app: Graph profile only (MSAL adds openid, profile, offline_access, …) # Microsoft — Auth app: Graph profile only (MSAL adds openid, profile, offline_access, …)
@ -34,6 +36,8 @@ msftDataScopes = [
"OnlineMeetings.Read", "OnlineMeetings.Read",
"Chat.ReadWrite", "Chat.ReadWrite",
"ChatMessage.Send", "ChatMessage.Send",
"Calendars.Read",
"Contacts.Read",
] ]
@ -42,14 +46,8 @@ def msftDataScopesForRefresh() -> str:
return " ".join(msftDataScopes) return " ".join(msftDataScopes)
# Infomaniak — Data app (kDrive + Mail; user_info needed for /1/profile lookup) # Infomaniak intentionally has no OAuth scope set: the kDrive + Mail data APIs
infomaniakDataScopes = [ # are only reachable with manually issued Personal Access Tokens (see
"user_info", # wiki/d-guides/infomaniak-token-setup.md). The OAuth /authorize endpoint at
"kdrive", # login.infomaniak.com only accepts identity scopes (openid/profile/email/phone)
"mail", # and does not return tokens that work against /1/* data routes.
]
def infomaniakDataScopesForRefresh() -> str:
"""Space-separated scope string identical to authorization request."""
return " ".join(infomaniakDataScopes)

View file

@ -13,7 +13,7 @@ from modules.datamodels.datamodelSecurity import Token, TokenPurpose
from modules.datamodels.datamodelUam import AuthAuthority from modules.datamodels.datamodelUam import AuthAuthority
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
from modules.shared.timeUtils import getUtcTimestamp, createExpirationTimestamp, parseTimestamp from modules.shared.timeUtils import getUtcTimestamp, createExpirationTimestamp, parseTimestamp
from modules.auth.oauthProviderConfig import msftDataScopesForRefresh, infomaniakDataScopesForRefresh from modules.auth.oauthProviderConfig import msftDataScopesForRefresh
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -30,9 +30,6 @@ class TokenManager:
self.google_client_id = APP_CONFIG.get("Service_GOOGLE_DATA_CLIENT_ID") self.google_client_id = APP_CONFIG.get("Service_GOOGLE_DATA_CLIENT_ID")
self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_DATA_CLIENT_SECRET") self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_DATA_CLIENT_SECRET")
# Infomaniak Data OAuth (kDrive + Mail)
self.infomaniak_client_id = APP_CONFIG.get("Service_INFOMANIAK_DATA_CLIENT_ID")
self.infomaniak_client_secret = APP_CONFIG.get("Service_INFOMANIAK_DATA_CLIENT_SECRET")
def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]: def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
"""Refresh Microsoft OAuth token using refresh token""" """Refresh Microsoft OAuth token using refresh token"""
@ -166,65 +163,6 @@ class TokenManager:
logger.error(f"Error refreshing Google token: {str(e)}") logger.error(f"Error refreshing Google token: {str(e)}")
return None return None
def refreshInfomaniakToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
"""Refresh Infomaniak OAuth token using refresh token"""
try:
logger.debug(f"refreshInfomaniakToken: Starting Infomaniak token refresh for user {userId}")
if not self.infomaniak_client_id or not self.infomaniak_client_secret:
logger.error("Infomaniak OAuth configuration not found")
return None
tokenUrl = "https://login.infomaniak.com/token"
data = {
"client_id": self.infomaniak_client_id,
"client_secret": self.infomaniak_client_secret,
"grant_type": "refresh_token",
"refresh_token": refreshToken,
"scope": infomaniakDataScopesForRefresh(),
}
with httpx.Client(timeout=30.0) as client:
response = client.post(tokenUrl, data=data)
logger.debug(f"refreshInfomaniakToken: HTTP response status: {response.status_code}")
if response.status_code == 200:
tokenData = response.json()
if "access_token" not in tokenData:
logger.error("Infomaniak token refresh response missing access_token")
return None
newToken = Token(
userId=userId,
authority=AuthAuthority.INFOMANIAK,
connectionId=oldToken.connectionId,
tokenPurpose=TokenPurpose.DATA_CONNECTION,
tokenAccess=tokenData["access_token"],
tokenRefresh=tokenData.get("refresh_token", refreshToken),
tokenType=tokenData.get("token_type", "bearer"),
expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)),
createdAt=getUtcTimestamp(),
)
return newToken
logger.error(
f"Failed to refresh Infomaniak token: {response.status_code} - {response.text}"
)
if response.status_code == 400:
try:
errorData = response.json()
if errorData.get("error") == "invalid_grant":
logger.warning(
"Infomaniak refresh token is invalid or expired - user needs to re-authenticate"
)
except Exception:
pass
return None
except Exception as e:
logger.error(f"Error refreshing Infomaniak token: {str(e)}")
return None
def refreshToken(self, oldToken: Token) -> Optional[Token]: def refreshToken(self, oldToken: Token) -> Optional[Token]:
"""Refresh an expired token using the appropriate OAuth service""" """Refresh an expired token using the appropriate OAuth service"""
try: try:
@ -268,9 +206,6 @@ class TokenManager:
elif oldToken.authority == AuthAuthority.GOOGLE: elif oldToken.authority == AuthAuthority.GOOGLE:
logger.debug(f"refreshToken: Refreshing Google token") logger.debug(f"refreshToken: Refreshing Google token")
return self.refreshGoogleToken(oldToken.tokenRefresh, oldToken.userId, oldToken) return self.refreshGoogleToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
elif oldToken.authority == AuthAuthority.INFOMANIAK:
logger.debug(f"refreshToken: Refreshing Infomaniak token")
return self.refreshInfomaniakToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
else: else:
logger.warning(f"Unknown authority for token refresh: {oldToken.authority}") logger.warning(f"Unknown authority for token refresh: {oldToken.authority}")
return None return None

View file

@ -144,45 +144,6 @@ class TokenRefreshService:
logger.error(f"Error refreshing Microsoft token for connection {connection.id}: {str(e)}") logger.error(f"Error refreshing Microsoft token for connection {connection.id}: {str(e)}")
return False return False
async def _refresh_infomaniak_token(self, interface, connection: UserConnection) -> bool:
"""Refresh Infomaniak OAuth token"""
try:
logger.debug(f"Refreshing Infomaniak token for connection {connection.id}")
current_token = interface.getConnectionToken(connection.id)
if not current_token:
logger.warning(f"No Infomaniak token found for connection {connection.id}")
return False
from modules.auth.tokenManager import TokenManager
token_manager = TokenManager()
refreshedToken = token_manager.refreshToken(current_token)
if refreshedToken:
interface.saveConnectionToken(refreshedToken)
interface.db.recordModify(UserConnection, connection.id, {
"lastChecked": getUtcTimestamp(),
"expiresAt": refreshedToken.expiresAt,
})
logger.info(f"Successfully refreshed Infomaniak token for connection {connection.id}")
try:
audit_logger.logSecurityEvent(
userId=str(connection.userId),
mandateId="system",
action="token_refresh",
details=f"Infomaniak token refreshed for connection {connection.id}",
)
except Exception:
pass
return True
logger.warning(f"Failed to refresh Infomaniak token for connection {connection.id}")
return False
except Exception as e:
logger.error(f"Error refreshing Infomaniak token for connection {connection.id}: {str(e)}")
return False
async def refresh_expired_tokens(self, user_id: str) -> Dict[str, Any]: async def refresh_expired_tokens(self, user_id: str) -> Dict[str, Any]:
""" """
Refresh expired OAuth tokens for a user Refresh expired OAuth tokens for a user
@ -216,7 +177,7 @@ class TokenRefreshService:
for connection in connections: for connection in connections:
# Only refresh expired OAuth connections # Only refresh expired OAuth connections
if (connection.tokenStatus == 'expired' and if (connection.tokenStatus == 'expired' and
connection.authority in [AuthAuthority.GOOGLE, AuthAuthority.MSFT, AuthAuthority.INFOMANIAK]): connection.authority in [AuthAuthority.GOOGLE, AuthAuthority.MSFT]):
# Check rate limiting # Check rate limiting
if self._is_rate_limited(connection.id): if self._is_rate_limited(connection.id):
@ -233,8 +194,6 @@ class TokenRefreshService:
success = await self._refresh_google_token(root_interface, connection) success = await self._refresh_google_token(root_interface, connection)
elif connection.authority == AuthAuthority.MSFT: elif connection.authority == AuthAuthority.MSFT:
success = await self._refresh_microsoft_token(root_interface, connection) success = await self._refresh_microsoft_token(root_interface, connection)
elif connection.authority == AuthAuthority.INFOMANIAK:
success = await self._refresh_infomaniak_token(root_interface, connection)
if success: if success:
refreshed_count += 1 refreshed_count += 1
@ -289,7 +248,7 @@ class TokenRefreshService:
# Only refresh active tokens that expire soon # Only refresh active tokens that expire soon
if (connection.tokenStatus == 'active' and if (connection.tokenStatus == 'active' and
connection.tokenExpiresAt and connection.tokenExpiresAt and
connection.authority in [AuthAuthority.GOOGLE, AuthAuthority.MSFT, AuthAuthority.INFOMANIAK]): connection.authority in [AuthAuthority.GOOGLE, AuthAuthority.MSFT]):
# Check if token expires within 5 minutes # Check if token expires within 5 minutes
time_until_expiry = connection.tokenExpiresAt - current_time time_until_expiry = connection.tokenExpiresAt - current_time
@ -310,8 +269,6 @@ class TokenRefreshService:
success = await self._refresh_google_token(root_interface, connection) success = await self._refresh_google_token(root_interface, connection)
elif connection.authority == AuthAuthority.MSFT: elif connection.authority == AuthAuthority.MSFT:
success = await self._refresh_microsoft_token(root_interface, connection) success = await self._refresh_microsoft_token(root_interface, connection)
elif connection.authority == AuthAuthority.INFOMANIAK:
success = await self._refresh_infomaniak_token(root_interface, connection)
if success: if success:
refreshed_count += 1 refreshed_count += 1

View file

@ -21,6 +21,47 @@ logger = logging.getLogger(__name__)
# No mapping needed - table name = Pydantic model name exactly # No mapping needed - table name = Pydantic model name exactly
class DatabaseQueryError(RuntimeError):
"""Raised by DB read methods when the underlying SQL query failed.
Empty result sets do NOT raise this they return ``[]`` / ``None`` /
``{"items": [], "totalItems": 0, "totalPages": 0}`` as before. This
exception is reserved for **real** failures: psycopg2 ProgrammingError,
DataError, OperationalError, IntegrityError, plus any unexpected
Python error raised inside a query path.
Read methods used to silently swallow such errors and return empty
collections, which made every caller incapable of distinguishing
"no rows" from "broken query / type adapter / dropped column / lost
connection". That hid concrete bugs (e.g. dict passed where Postgres
expected a UUID string) behind misleading downstream "no record found"
errors.
"""
def __init__(self, table: str, message: str, original: BaseException = None):
super().__init__(f"{table}: {message}")
self.table = table
self.original = original
def _rollbackQuietly(connection) -> None:
"""Restore the connection state after a failed query.
Postgres puts the connection in an error state after any failed
statement; subsequent queries on the same connection raise
``InFailedSqlTransaction`` until we rollback. We swallow rollback
errors because the original query error is what the caller should
see a secondary rollback failure typically means the connection
is gone and will be reopened on the next ``_ensure_connection``.
"""
if connection is None:
return
try:
connection.rollback()
except Exception:
pass
class SystemTable(PowerOnModel): class SystemTable(PowerOnModel):
"""Data model for system table entries""" """Data model for system table entries"""
@ -762,7 +803,8 @@ class DatabaseConnector:
return record return record
except Exception as e: except Exception as e:
logger.error(f"Error loading record {recordId} from table {table}: {e}") logger.error(f"Error loading record {recordId} from table {table}: {e}")
return None _rollbackQuietly(getattr(self, "connection", None))
raise DatabaseQueryError(table, str(e), original=e) from e
def getRecord(self, model_class: type, recordId: str) -> Optional[Dict[str, Any]]: def getRecord(self, model_class: type, recordId: str) -> Optional[Dict[str, Any]]:
"""Load one row by primary key (routes / services; wraps _loadRecord).""" """Load one row by primary key (routes / services; wraps _loadRecord)."""
@ -848,7 +890,8 @@ class DatabaseConnector:
return records return records
except Exception as e: except Exception as e:
logger.error(f"Error loading table {table}: {e}") logger.error(f"Error loading table {table}: {e}")
return [] _rollbackQuietly(getattr(self, "connection", None))
raise DatabaseQueryError(table, str(e), original=e) from e
def _registerInitialId(self, table: str, initialId: str) -> bool: def _registerInitialId(self, table: str, initialId: str) -> bool:
"""Registers the initial ID for a table.""" """Registers the initial ID for a table."""
@ -1047,7 +1090,8 @@ class DatabaseConnector:
return records return records
except Exception as e: except Exception as e:
logger.error(f"Error loading records from table {table}: {e}") logger.error(f"Error loading records from table {table}: {e}")
return [] _rollbackQuietly(getattr(self, "connection", None))
raise DatabaseQueryError(table, str(e), original=e) from e
def _buildPaginationClauses( def _buildPaginationClauses(
self, self,
@ -1270,7 +1314,8 @@ class DatabaseConnector:
return {"items": records, "totalItems": totalItems, "totalPages": totalPages} return {"items": records, "totalItems": totalItems, "totalPages": totalPages}
except Exception as e: except Exception as e:
logger.error(f"Error in getRecordsetPaginated for table {table}: {e}") logger.error(f"Error in getRecordsetPaginated for table {table}: {e}")
return {"items": [], "totalItems": 0, "totalPages": 0} _rollbackQuietly(getattr(self, "connection", None))
raise DatabaseQueryError(table, str(e), original=e) from e
def getDistinctColumnValues( def getDistinctColumnValues(
self, self,
@ -1332,7 +1377,8 @@ class DatabaseConnector:
return result return result
except Exception as e: except Exception as e:
logger.error(f"Error in getDistinctColumnValues for {table}.{column}: {e}") logger.error(f"Error in getDistinctColumnValues for {table}.{column}: {e}")
return [] _rollbackQuietly(getattr(self, "connection", None))
raise DatabaseQueryError(table, str(e), original=e) from e
def recordCreate( def recordCreate(
self, model_class: type, record: Union[Dict[str, Any], BaseModel] self, model_class: type, record: Union[Dict[str, Any], BaseModel]
@ -1710,7 +1756,8 @@ class DatabaseConnector:
return records return records
except Exception as e: except Exception as e:
logger.error(f"Error in semantic search on {table}: {e}") logger.error(f"Error in semantic search on {table}: {e}")
return [] _rollbackQuietly(getattr(self, "connection", None))
raise DatabaseQueryError(table, str(e), original=e) from e
def close(self, forceClose: bool = False): def close(self, forceClose: bool = False):
"""Close the database connection. """Close the database connection.

View file

@ -14,6 +14,8 @@ logger = logging.getLogger(__name__)
_DRIVE_BASE = "https://www.googleapis.com/drive/v3" _DRIVE_BASE = "https://www.googleapis.com/drive/v3"
_GMAIL_BASE = "https://gmail.googleapis.com/gmail/v1" _GMAIL_BASE = "https://gmail.googleapis.com/gmail/v1"
_CALENDAR_BASE = "https://www.googleapis.com/calendar/v3"
_PEOPLE_BASE = "https://people.googleapis.com/v1"
async def _googleGet(token: str, url: str) -> Dict[str, Any]: async def _googleGet(token: str, url: str) -> Dict[str, Any]:
@ -274,12 +276,480 @@ class GmailAdapter(ServiceAdapter):
] ]
class CalendarAdapter(ServiceAdapter):
"""Google Calendar ServiceAdapter -- browse calendars, list events, .ics download.
Path conventions:
``""`` / ``"/"`` -> list calendars from ``calendarList``
``"/<calendarId>"`` -> list upcoming events in that calendar
``"/<calendarId>/<eventId>"`` -> reserved for future event detail browse
"""
_DEFAULT_EVENT_LIMIT = 100
_MAX_EVENT_LIMIT = 2500
def __init__(self, accessToken: str):
self._token = accessToken
async def browse(
self,
path: str,
filter: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
cleanPath = (path or "").strip("/")
if not cleanPath:
url = f"{_CALENDAR_BASE}/users/me/calendarList?maxResults=250"
result = await _googleGet(self._token, url)
if "error" in result:
logger.warning(f"Google Calendar list failed: {result['error']}")
return []
calendars = result.get("items", [])
if filter:
f = filter.lower()
calendars = [c for c in calendars if f in (c.get("summary") or "").lower()]
return [
ExternalEntry(
name=c.get("summaryOverride") or c.get("summary", ""),
path=f"/{c.get('id', '')}",
isFolder=True,
metadata={
"id": c.get("id"),
"primary": c.get("primary", False),
"accessRole": c.get("accessRole"),
"backgroundColor": c.get("backgroundColor"),
"timeZone": c.get("timeZone"),
},
)
for c in calendars
]
from urllib.parse import quote
calendarId = cleanPath.split("/", 1)[0]
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
url = (
f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
f"?maxResults={effectiveLimit}&orderBy=startTime&singleEvents=true"
)
result = await _googleGet(self._token, url)
if "error" in result:
logger.warning(f"Google Calendar events failed: {result['error']}")
return []
events = result.get("items", [])
return [
ExternalEntry(
name=ev.get("summary", "(no title)"),
path=f"/{calendarId}/{ev.get('id', '')}",
isFolder=False,
mimeType="text/calendar",
metadata={
"id": ev.get("id"),
"start": (ev.get("start") or {}).get("dateTime") or (ev.get("start") or {}).get("date"),
"end": (ev.get("end") or {}).get("dateTime") or (ev.get("end") or {}).get("date"),
"location": ev.get("location"),
"organizer": (ev.get("organizer") or {}).get("email"),
"htmlLink": ev.get("htmlLink"),
"status": ev.get("status"),
},
)
for ev in events
]
async def download(self, path: str) -> DownloadResult:
from urllib.parse import quote
cleanPath = (path or "").strip("/")
if "/" not in cleanPath:
return DownloadResult()
calendarId, eventId = cleanPath.split("/", 1)
url = f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events/{quote(eventId, safe='')}"
ev = await _googleGet(self._token, url)
if "error" in ev:
logger.warning(f"Google Calendar event fetch failed: {ev['error']}")
return DownloadResult()
icsBytes = _googleEventToIcs(ev)
summary = ev.get("summary") or eventId
safeName = _googleSafeFileName(summary) or "event"
return DownloadResult(
data=icsBytes,
fileName=f"{safeName}.ics",
mimeType="text/calendar",
)
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
return {"error": "Google Calendar upload not supported"}
async def search(
self,
query: str,
path: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
from urllib.parse import quote
calendarId = (path or "").strip("/").split("/", 1)[0] or "primary"
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
url = (
f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
f"?q={quote(query, safe='')}&maxResults={effectiveLimit}&singleEvents=true"
)
result = await _googleGet(self._token, url)
if "error" in result:
return []
return [
ExternalEntry(
name=ev.get("summary", "(no title)"),
path=f"/{calendarId}/{ev.get('id', '')}",
isFolder=False,
mimeType="text/calendar",
metadata={
"id": ev.get("id"),
"start": (ev.get("start") or {}).get("dateTime") or (ev.get("start") or {}).get("date"),
"end": (ev.get("end") or {}).get("dateTime") or (ev.get("end") or {}).get("date"),
},
)
for ev in result.get("items", [])
]
class ContactsAdapter(ServiceAdapter):
"""Google Contacts ServiceAdapter -- People API (read-only).
Path conventions:
``""`` / ``"/"`` -> list contact groups (incl. virtual ``all`` for the user's connections)
``"/all"`` -> list all ``people/me/connections``
``"/<groupResourceName>"`` -> list members of that contact group (e.g. ``contactGroups/myFriends``)
``"/<group>/<personId>"`` -> reserved for future detail browse;
``personId`` is the suffix after ``people/``
"""
_DEFAULT_CONTACT_LIMIT = 200
_MAX_CONTACT_LIMIT = 1000
_PERSON_FIELDS = (
"names,emailAddresses,phoneNumbers,organizations,addresses,biographies,memberships"
)
def __init__(self, accessToken: str):
self._token = accessToken
async def browse(
self,
path: str,
filter: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
cleanPath = (path or "").strip("/")
if not cleanPath:
entries: List[ExternalEntry] = [
ExternalEntry(
name="Alle Kontakte",
path="/all",
isFolder=True,
metadata={"id": "all", "isVirtual": True},
),
]
url = f"{_PEOPLE_BASE}/contactGroups?pageSize=200"
result = await _googleGet(self._token, url)
if "error" not in result:
for grp in result.get("contactGroups", []):
name = grp.get("formattedName") or grp.get("name") or ""
if not name:
continue
entries.append(
ExternalEntry(
name=name,
path=f"/{grp.get('resourceName', '')}",
isFolder=True,
metadata={
"id": grp.get("resourceName"),
"memberCount": grp.get("memberCount", 0),
"groupType": grp.get("groupType"),
},
)
)
else:
logger.warning(f"Google contactGroups list failed: {result['error']}")
return entries
from urllib.parse import quote
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
groupRef = cleanPath.split("/", 1)[0]
if groupRef == "all":
url = (
f"{_PEOPLE_BASE}/people/me/connections"
f"?pageSize={min(effectiveLimit, 1000)}&personFields={self._PERSON_FIELDS}"
)
result = await _googleGet(self._token, url)
if "error" in result:
logger.warning(f"Google People connections failed: {result['error']}")
return []
people = result.get("connections", [])
else:
groupResource = groupRef
grpUrl = (
f"{_PEOPLE_BASE}/{quote(groupResource, safe='/')}"
f"?maxMembers={min(effectiveLimit, 1000)}"
)
grpResult = await _googleGet(self._token, grpUrl)
if "error" in grpResult:
logger.warning(f"Google contactGroup detail failed: {grpResult['error']}")
return []
memberResourceNames = grpResult.get("memberResourceNames") or []
if not memberResourceNames:
return []
chunkSize = 200
people: List[Dict[str, Any]] = []
for i in range(0, min(len(memberResourceNames), effectiveLimit), chunkSize):
chunk = memberResourceNames[i : i + chunkSize]
params = "&".join(f"resourceNames={quote(rn, safe='/')}" for rn in chunk)
batchUrl = f"{_PEOPLE_BASE}/people:batchGet?{params}&personFields={self._PERSON_FIELDS}"
batchResult = await _googleGet(self._token, batchUrl)
if "error" in batchResult:
logger.warning(f"Google People batchGet failed: {batchResult['error']}")
continue
for resp in batchResult.get("responses", []):
person = resp.get("person")
if person:
people.append(person)
if len(people) >= effectiveLimit:
break
return [
ExternalEntry(
name=_googlePersonLabel(p) or "(no name)",
path=f"/{groupRef}/{(p.get('resourceName', '') or '').split('/')[-1]}",
isFolder=False,
mimeType="text/vcard",
metadata={
"id": p.get("resourceName"),
"emails": [e.get("value") for e in (p.get("emailAddresses") or []) if e.get("value")],
"phones": [pn.get("value") for pn in (p.get("phoneNumbers") or []) if pn.get("value")],
"organization": (p.get("organizations") or [{}])[0].get("name") if p.get("organizations") else None,
},
)
for p in people[:effectiveLimit]
]
async def download(self, path: str) -> DownloadResult:
from urllib.parse import quote
cleanPath = (path or "").strip("/")
if "/" not in cleanPath:
return DownloadResult()
personSuffix = cleanPath.split("/")[-1]
if not personSuffix:
return DownloadResult()
url = f"{_PEOPLE_BASE}/people/{quote(personSuffix, safe='')}?personFields={self._PERSON_FIELDS}"
person = await _googleGet(self._token, url)
if "error" in person:
logger.warning(f"Google People fetch failed: {person['error']}")
return DownloadResult()
vcfBytes = _googlePersonToVcard(person)
label = _googlePersonLabel(person) or personSuffix
safeName = _googleSafeFileName(label) or "contact"
return DownloadResult(
data=vcfBytes,
fileName=f"{safeName}.vcf",
mimeType="text/vcard",
)
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
return {"error": "Google Contacts upload not supported"}
async def search(
self,
query: str,
path: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
from urllib.parse import quote
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
url = (
f"{_PEOPLE_BASE}/people:searchContacts"
f"?query={quote(query, safe='')}&pageSize={min(effectiveLimit, 30)}"
f"&readMask={self._PERSON_FIELDS}"
)
result = await _googleGet(self._token, url)
if "error" in result:
return []
entries: List[ExternalEntry] = []
for r in result.get("results", []):
p = r.get("person") or {}
entries.append(
ExternalEntry(
name=_googlePersonLabel(p) or "(no name)",
path=f"/search/{(p.get('resourceName', '') or '').split('/')[-1]}",
isFolder=False,
mimeType="text/vcard",
metadata={
"id": p.get("resourceName"),
"emails": [e.get("value") for e in (p.get("emailAddresses") or []) if e.get("value")],
},
)
)
return entries
def _googleSafeFileName(name: str) -> str:
import re
return re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", name or "")[:80].strip(". ")
def _googleIcsEscape(value: str) -> str:
if value is None:
return ""
return (
value.replace("\\", "\\\\")
.replace(";", "\\;")
.replace(",", "\\,")
.replace("\r\n", "\\n")
.replace("\n", "\\n")
)
def _googleIcsDateTime(value: Optional[str]) -> Optional[str]:
"""Convert a Google Calendar dateTime/date string to RFC 5545 format (UTC)."""
if not value:
return None
from datetime import datetime, timezone
try:
if "T" not in value:
dt = datetime.strptime(value, "%Y-%m-%d")
return dt.strftime("%Y%m%d")
normalized = value.replace("Z", "+00:00") if value.endswith("Z") else value
dt = datetime.fromisoformat(normalized)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt.astimezone(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
except (TypeError, ValueError):
return None
def _googleEventToIcs(event: Dict[str, Any]) -> bytes:
"""Build a minimal RFC 5545 VCALENDAR/VEVENT for a Google Calendar event."""
from datetime import datetime, timezone
uid = event.get("iCalUID") or event.get("id") or "unknown@poweron"
summary = _googleIcsEscape(event.get("summary") or "")
location = _googleIcsEscape(event.get("location") or "")
description = _googleIcsEscape(event.get("description") or "")
rawStart = (event.get("start") or {}).get("dateTime") or (event.get("start") or {}).get("date")
rawEnd = (event.get("end") or {}).get("dateTime") or (event.get("end") or {}).get("date")
isAllDay = bool((event.get("start") or {}).get("date") and not (event.get("start") or {}).get("dateTime"))
dtstart = _googleIcsDateTime(rawStart)
dtend = _googleIcsDateTime(rawEnd)
dtstamp = _googleIcsDateTime(event.get("updated")) or datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
lines = [
"BEGIN:VCALENDAR",
"VERSION:2.0",
"PRODID:-//PowerOn//Google-Calendar-Adapter//EN",
"CALSCALE:GREGORIAN",
"BEGIN:VEVENT",
f"UID:{uid}",
f"DTSTAMP:{dtstamp}",
]
if dtstart:
lines.append(f"DTSTART;VALUE=DATE:{dtstart}" if isAllDay else f"DTSTART:{dtstart}")
if dtend:
lines.append(f"DTEND;VALUE=DATE:{dtend}" if isAllDay else f"DTEND:{dtend}")
if summary:
lines.append(f"SUMMARY:{summary}")
if location:
lines.append(f"LOCATION:{location}")
if description:
lines.append(f"DESCRIPTION:{description}")
organizer = (event.get("organizer") or {}).get("email")
if organizer:
lines.append(f"ORGANIZER:mailto:{organizer}")
for att in (event.get("attendees") or []):
addr = att.get("email")
if addr:
lines.append(f"ATTENDEE:mailto:{addr}")
lines.append("END:VEVENT")
lines.append("END:VCALENDAR")
return ("\r\n".join(lines) + "\r\n").encode("utf-8")
def _googlePersonLabel(person: Dict[str, Any]) -> str:
names = person.get("names") or []
if names:
primary = names[0]
display = primary.get("displayName") or ""
if display:
return display
given = primary.get("givenName") or ""
family = primary.get("familyName") or ""
full = f"{given} {family}".strip()
if full:
return full
orgs = person.get("organizations") or []
if orgs and orgs[0].get("name"):
return orgs[0]["name"]
emails = person.get("emailAddresses") or []
if emails and emails[0].get("value"):
return emails[0]["value"]
return ""
def _googlePersonToVcard(person: Dict[str, Any]) -> bytes:
"""Build a vCard 3.0 from a Google People API person payload."""
names = person.get("names") or []
primaryName = names[0] if names else {}
given = primaryName.get("givenName") or ""
family = primaryName.get("familyName") or ""
middle = primaryName.get("middleName") or ""
fn = primaryName.get("displayName") or _googlePersonLabel(person) or ""
lines = [
"BEGIN:VCARD",
"VERSION:3.0",
f"N:{family};{given};{middle};;",
f"FN:{fn}",
]
orgs = person.get("organizations") or []
if orgs:
org = orgs[0]
orgVal = org.get("name") or ""
if org.get("department"):
orgVal = f"{orgVal};{org['department']}"
if orgVal:
lines.append(f"ORG:{orgVal}")
if org.get("title"):
lines.append(f"TITLE:{org['title']}")
for em in (person.get("emailAddresses") or []):
addr = em.get("value")
if not addr:
continue
emailType = (em.get("type") or "INTERNET").upper()
lines.append(f"EMAIL;TYPE={emailType}:{addr}")
for ph in (person.get("phoneNumbers") or []):
val = ph.get("value")
if not val:
continue
phType = (ph.get("type") or "VOICE").upper()
lines.append(f"TEL;TYPE={phType}:{val}")
for addr in (person.get("addresses") or []):
street = addr.get("streetAddress") or ""
city = addr.get("city") or ""
region = addr.get("region") or ""
postal = addr.get("postalCode") or ""
country = addr.get("country") or ""
if any([street, city, region, postal, country]):
adrType = (addr.get("type") or "OTHER").upper()
lines.append(f"ADR;TYPE={adrType}:;;{street};{city};{region};{postal};{country}")
bios = person.get("biographies") or []
if bios and bios[0].get("value"):
lines.append(f"NOTE:{_googleIcsEscape(bios[0]['value'])}")
lines.append(f"UID:{person.get('resourceName', '')}")
lines.append("END:VCARD")
return ("\r\n".join(lines) + "\r\n").encode("utf-8")
class GoogleConnector(ProviderConnector): class GoogleConnector(ProviderConnector):
"""Google ProviderConnector -- 1 connection -> Drive + Gmail.""" """Google ProviderConnector -- 1 connection -> Drive + Gmail + Calendar + Contacts."""
_SERVICE_MAP = { _SERVICE_MAP = {
"drive": DriveAdapter, "drive": DriveAdapter,
"gmail": GmailAdapter, "gmail": GmailAdapter,
"calendar": CalendarAdapter,
"contact": ContactsAdapter,
} }
def getAvailableServices(self) -> List[str]: def getAvailableServices(self) -> List[str]:

File diff suppressed because it is too large Load diff

View file

@ -126,6 +126,11 @@ def _stripGraphBase(url: str) -> str:
def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> ExternalEntry: def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> ExternalEntry:
isFolder = "folder" in item isFolder = "folder" in item
# Graph exposes the driveItem content hash as ``eTag`` (quoted) or
# ``cTag``; we normalise to a "revision" string so callers can use it as a
# stable ``contentVersion`` for idempotent ingestion without re-downloading
# file bytes.
revision = item.get("eTag") or item.get("cTag")
return ExternalEntry( return ExternalEntry(
name=item.get("name", ""), name=item.get("name", ""),
path=f"{basePath}/{item.get('name', '')}" if basePath else item.get("name", ""), path=f"{basePath}/{item.get('name', '')}" if basePath else item.get("name", ""),
@ -137,6 +142,9 @@ def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> Exter
"id": item.get("id"), "id": item.get("id"),
"webUrl": item.get("webUrl"), "webUrl": item.get("webUrl"),
"childCount": item.get("folder", {}).get("childCount") if isFolder else None, "childCount": item.get("folder", {}).get("childCount") if isFolder else None,
"revision": revision,
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
"parentReference": item.get("parentReference", {}),
}, },
) )
@ -167,21 +175,36 @@ class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
return await self._discoverSites() return await self._discoverSites()
if not folderPath or folderPath == "/": if not folderPath or folderPath == "/":
endpoint = f"sites/{siteId}/drive/root/children" endpoint: Optional[str] = f"sites/{siteId}/drive/root/children?$top=200"
else: else:
cleanPath = folderPath.lstrip("/") cleanPath = folderPath.lstrip("/")
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children" endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children?$top=200"
result = await self._graphGet(endpoint) # Follow @odata.nextLink until a hard cap is reached so large libraries
if "error" in result: # are fully enumerated (required for bootstrap). Per-page size uses
logger.warning(f"SharePoint browse failed: {result['error']}") # Graph's max supported value to minimise round-trips.
return [] effectiveLimit = int(limit) if limit is not None else None
items: List[Dict[str, Any]] = []
hardCap = 5000
while endpoint and len(items) < hardCap:
result = await self._graphGet(endpoint)
if "error" in result:
logger.warning(f"SharePoint browse failed: {result['error']}")
break
for raw in result.get("value", []) or []:
items.append(raw)
if effectiveLimit is not None and len(items) >= effectiveLimit:
break
if effectiveLimit is not None and len(items) >= effectiveLimit:
break
nextLink = result.get("@odata.nextLink")
endpoint = _stripGraphBase(nextLink) if nextLink else None
entries = [_graphItemToExternalEntry(item, path) for item in result.get("value", [])] entries = [_graphItemToExternalEntry(item, path) for item in items]
if filter: if filter:
entries = [e for e in entries if _matchFilter(e, filter)] entries = [e for e in entries if _matchFilter(e, filter)]
if limit is not None: if effectiveLimit is not None:
entries = entries[: max(1, int(limit))] entries = entries[: max(1, effectiveLimit)]
return entries return entries
async def _discoverSites(self) -> List[ExternalEntry]: async def _discoverSites(self) -> List[ExternalEntry]:
@ -841,6 +864,285 @@ class OneDriveAdapter(_GraphApiMixin, ServiceAdapter):
return entries return entries
# ---------------------------------------------------------------------------
# Calendar Adapter
# ---------------------------------------------------------------------------
class CalendarAdapter(_GraphApiMixin, ServiceAdapter):
"""ServiceAdapter for Outlook Calendar via Microsoft Graph.
Path conventions:
``""`` / ``"/"`` -> list user calendars
``"/<calendarId>"`` -> list events in that calendar
``"/<calendarId>/<eventId>"`` -> reserved for future event detail browse
Downloads return a synthesised ``.ics`` (VCALENDAR/VEVENT) since Microsoft
Graph does not expose a ``/$value`` endpoint for events.
"""
_DEFAULT_EVENT_LIMIT = 100
_MAX_EVENT_LIMIT = 1000
_PAGE_SIZE = 100
async def browse(
self,
path: str,
filter: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
cleanPath = (path or "").strip("/")
if not cleanPath:
result = await self._graphGet("me/calendars?$top=100")
if "error" in result:
logger.warning(f"MSFT Calendar list failed: {result['error']}")
return []
calendars = result.get("value", [])
if filter:
calendars = [c for c in calendars if filter.lower() in (c.get("name") or "").lower()]
return [
ExternalEntry(
name=c.get("name", ""),
path=f"/{c.get('id', '')}",
isFolder=True,
metadata={
"id": c.get("id"),
"color": c.get("color"),
"owner": (c.get("owner") or {}).get("address"),
"isDefaultCalendar": c.get("isDefaultCalendar", False),
"canEdit": c.get("canEdit", False),
},
)
for c in calendars
]
calendarId = cleanPath.split("/", 1)[0]
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
pageSize = min(self._PAGE_SIZE, effectiveLimit)
endpoint: Optional[str] = (
f"me/calendars/{calendarId}/events"
f"?$top={pageSize}&$orderby=start/dateTime desc"
)
events: List[Dict[str, Any]] = []
while endpoint and len(events) < effectiveLimit:
result = await self._graphGet(endpoint)
if "error" in result:
logger.warning(f"MSFT Calendar events failed: {result['error']}")
break
for ev in result.get("value", []):
events.append(ev)
if len(events) >= effectiveLimit:
break
nextLink = result.get("@odata.nextLink")
endpoint = _stripGraphBase(nextLink) if nextLink else None
return [
ExternalEntry(
name=ev.get("subject", "(no subject)"),
path=f"/{calendarId}/{ev.get('id', '')}",
isFolder=False,
mimeType="text/calendar",
metadata={
"id": ev.get("id"),
"start": (ev.get("start") or {}).get("dateTime"),
"end": (ev.get("end") or {}).get("dateTime"),
"location": (ev.get("location") or {}).get("displayName"),
"organizer": (ev.get("organizer") or {}).get("emailAddress", {}).get("address"),
"isAllDay": ev.get("isAllDay", False),
"webLink": ev.get("webLink"),
},
)
for ev in events
]
async def download(self, path: str) -> DownloadResult:
cleanPath = (path or "").strip("/")
if "/" not in cleanPath:
return DownloadResult()
eventId = cleanPath.split("/")[-1]
ev = await self._graphGet(f"me/events/{eventId}")
if "error" in ev:
logger.warning(f"MSFT Calendar event fetch failed: {ev['error']}")
return DownloadResult()
icsBytes = _eventToIcs(ev)
subject = ev.get("subject") or eventId
safeName = _safeFileName(subject) or "event"
return DownloadResult(
data=icsBytes,
fileName=f"{safeName}.ics",
mimeType="text/calendar",
)
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
return {"error": "Calendar upload not supported"}
async def search(
self,
query: str,
path: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
safeQuery = query.replace("'", "''")
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
endpoint = f"me/events?$search=\"{safeQuery}\"&$top={effectiveLimit}"
result = await self._graphGet(endpoint)
if "error" in result:
return []
return [
ExternalEntry(
name=ev.get("subject", "(no subject)"),
path=f"/search/{ev.get('id', '')}",
isFolder=False,
mimeType="text/calendar",
metadata={
"id": ev.get("id"),
"start": (ev.get("start") or {}).get("dateTime"),
"end": (ev.get("end") or {}).get("dateTime"),
},
)
for ev in result.get("value", [])
]
# ---------------------------------------------------------------------------
# Contacts Adapter
# ---------------------------------------------------------------------------
class ContactsAdapter(_GraphApiMixin, ServiceAdapter):
"""ServiceAdapter for Outlook Contacts via Microsoft Graph.
Path conventions:
``""`` -> list contact folders (default + custom)
``"/<folderId>"`` -> list contacts in that folder; the
virtual id ``default`` maps to
``/me/contacts`` (the user's primary
contact list)
``"/<folderId>/<contactId>"`` -> reserved for future detail browse
Downloads return a synthesised vCard 3.0 (.vcf) since Microsoft Graph
does not expose a ``/$value`` endpoint for contacts.
"""
_DEFAULT_CONTACT_LIMIT = 200
_MAX_CONTACT_LIMIT = 1000
_PAGE_SIZE = 100
_DEFAULT_FOLDER_ID = "default"
async def browse(
self,
path: str,
filter: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
cleanPath = (path or "").strip("/")
if not cleanPath:
folders: List[ExternalEntry] = [
ExternalEntry(
name="Kontakte",
path=f"/{self._DEFAULT_FOLDER_ID}",
isFolder=True,
metadata={"id": self._DEFAULT_FOLDER_ID, "isDefault": True},
),
]
result = await self._graphGet("me/contactFolders?$top=100")
if "error" not in result:
for f in result.get("value", []):
folders.append(
ExternalEntry(
name=f.get("displayName", ""),
path=f"/{f.get('id', '')}",
isFolder=True,
metadata={"id": f.get("id"), "parentFolderId": f.get("parentFolderId")},
)
)
else:
logger.warning(f"MSFT contactFolders list failed: {result['error']}")
return folders
folderId = cleanPath.split("/", 1)[0]
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
pageSize = min(self._PAGE_SIZE, effectiveLimit)
if folderId == self._DEFAULT_FOLDER_ID:
endpoint: Optional[str] = f"me/contacts?$top={pageSize}&$orderby=displayName"
else:
endpoint = f"me/contactFolders/{folderId}/contacts?$top={pageSize}&$orderby=displayName"
contacts: List[Dict[str, Any]] = []
while endpoint and len(contacts) < effectiveLimit:
result = await self._graphGet(endpoint)
if "error" in result:
logger.warning(f"MSFT contacts list failed: {result['error']}")
break
for c in result.get("value", []):
contacts.append(c)
if len(contacts) >= effectiveLimit:
break
nextLink = result.get("@odata.nextLink")
endpoint = _stripGraphBase(nextLink) if nextLink else None
return [
ExternalEntry(
name=c.get("displayName") or _personLabel(c) or "(no name)",
path=f"/{folderId}/{c.get('id', '')}",
isFolder=False,
mimeType="text/vcard",
metadata={
"id": c.get("id"),
"givenName": c.get("givenName"),
"surname": c.get("surname"),
"companyName": c.get("companyName"),
"emailAddresses": [e.get("address") for e in (c.get("emailAddresses") or []) if e.get("address")],
"businessPhones": c.get("businessPhones") or [],
"mobilePhone": c.get("mobilePhone"),
},
)
for c in contacts
]
async def download(self, path: str) -> DownloadResult:
cleanPath = (path or "").strip("/")
if "/" not in cleanPath:
return DownloadResult()
contactId = cleanPath.split("/")[-1]
c = await self._graphGet(f"me/contacts/{contactId}")
if "error" in c:
logger.warning(f"MSFT contact fetch failed: {c['error']}")
return DownloadResult()
vcfBytes = _contactToVcard(c)
label = c.get("displayName") or _personLabel(c) or contactId
safeName = _safeFileName(label) or "contact"
return DownloadResult(
data=vcfBytes,
fileName=f"{safeName}.vcf",
mimeType="text/vcard",
)
async def upload(self, path: str, data: bytes, fileName: str) -> dict:
return {"error": "Contacts upload not supported"}
async def search(
self,
query: str,
path: Optional[str] = None,
limit: Optional[int] = None,
) -> List[ExternalEntry]:
safeQuery = query.replace("'", "''")
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
endpoint = f"me/contacts?$search=\"{safeQuery}\"&$top={effectiveLimit}"
result = await self._graphGet(endpoint)
if "error" in result:
return []
return [
ExternalEntry(
name=c.get("displayName") or _personLabel(c) or "(no name)",
path=f"/search/{c.get('id', '')}",
isFolder=False,
mimeType="text/vcard",
metadata={"id": c.get("id")},
)
for c in result.get("value", [])
]
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# MsftConnector (1:n) # MsftConnector (1:n)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -853,6 +1155,8 @@ class MsftConnector(ProviderConnector):
"outlook": OutlookAdapter, "outlook": OutlookAdapter,
"teams": TeamsAdapter, "teams": TeamsAdapter,
"onedrive": OneDriveAdapter, "onedrive": OneDriveAdapter,
"calendar": CalendarAdapter,
"contact": ContactsAdapter,
} }
def getAvailableServices(self) -> List[str]: def getAvailableServices(self) -> List[str]:
@ -891,3 +1195,143 @@ def _matchFilter(entry: ExternalEntry, pattern: str) -> bool:
"""Simple glob-like filter (supports * wildcard).""" """Simple glob-like filter (supports * wildcard)."""
import fnmatch import fnmatch
return fnmatch.fnmatch(entry.name.lower(), pattern.lower()) return fnmatch.fnmatch(entry.name.lower(), pattern.lower())
def _safeFileName(name: str) -> str:
"""Strip path-unsafe characters and trim length so the result is a usable file name."""
import re
return re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", name or "")[:80].strip(". ")
def _personLabel(contact: Dict[str, Any]) -> str:
given = (contact.get("givenName") or "").strip()
surname = (contact.get("surname") or "").strip()
if given or surname:
return f"{given} {surname}".strip()
company = (contact.get("companyName") or "").strip()
return company
def _icsEscape(value: str) -> str:
"""Escape RFC 5545 reserved characters in TEXT properties."""
if value is None:
return ""
return (
value.replace("\\", "\\\\")
.replace(";", "\\;")
.replace(",", "\\,")
.replace("\r\n", "\\n")
.replace("\n", "\\n")
)
def _icsDateTime(value: Optional[str]) -> Optional[str]:
"""Convert an ISO datetime string to an RFC 5545 DATE-TIME value (UTC)."""
if not value:
return None
from datetime import datetime, timezone
try:
normalized = value.replace("Z", "+00:00") if value.endswith("Z") else value
dt = datetime.fromisoformat(normalized)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt.astimezone(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
except (TypeError, ValueError):
return None
def _eventToIcs(event: Dict[str, Any]) -> bytes:
"""Build a minimal RFC 5545 VCALENDAR/VEVENT for a Graph event payload."""
from datetime import datetime, timezone
uid = event.get("iCalUId") or event.get("id") or "unknown@poweron"
summary = _icsEscape(event.get("subject") or "")
location = _icsEscape((event.get("location") or {}).get("displayName") or "")
body = (event.get("body") or {}).get("content") or ""
description = _icsEscape(body)
dtstart = _icsDateTime((event.get("start") or {}).get("dateTime"))
dtend = _icsDateTime((event.get("end") or {}).get("dateTime"))
dtstamp = _icsDateTime(event.get("lastModifiedDateTime")) or datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
lines = [
"BEGIN:VCALENDAR",
"VERSION:2.0",
"PRODID:-//PowerOn//MSFT-Calendar-Adapter//EN",
"CALSCALE:GREGORIAN",
"BEGIN:VEVENT",
f"UID:{uid}",
f"DTSTAMP:{dtstamp}",
]
if dtstart:
lines.append(f"DTSTART:{dtstart}")
if dtend:
lines.append(f"DTEND:{dtend}")
if summary:
lines.append(f"SUMMARY:{summary}")
if location:
lines.append(f"LOCATION:{location}")
if description:
lines.append(f"DESCRIPTION:{description}")
organizer = (event.get("organizer") or {}).get("emailAddress", {}).get("address")
if organizer:
lines.append(f"ORGANIZER:mailto:{organizer}")
for att in (event.get("attendees") or []):
addr = (att.get("emailAddress") or {}).get("address")
if addr:
lines.append(f"ATTENDEE:mailto:{addr}")
lines.append("END:VEVENT")
lines.append("END:VCALENDAR")
return ("\r\n".join(lines) + "\r\n").encode("utf-8")
def _contactToVcard(contact: Dict[str, Any]) -> bytes:
"""Build a vCard 3.0 from a Graph /me/contacts payload."""
given = contact.get("givenName") or ""
surname = contact.get("surname") or ""
middle = contact.get("middleName") or ""
fn = contact.get("displayName") or _personLabel(contact) or contact.get("companyName") or ""
lines = [
"BEGIN:VCARD",
"VERSION:3.0",
f"N:{surname};{given};{middle};;",
f"FN:{fn}",
]
if contact.get("companyName"):
org = contact["companyName"]
if contact.get("department"):
org = f"{org};{contact['department']}"
lines.append(f"ORG:{org}")
if contact.get("jobTitle"):
lines.append(f"TITLE:{contact['jobTitle']}")
for em in (contact.get("emailAddresses") or []):
addr = em.get("address")
if addr:
lines.append(f"EMAIL;TYPE=INTERNET:{addr}")
for phone in (contact.get("businessPhones") or []):
if phone:
lines.append(f"TEL;TYPE=WORK,VOICE:{phone}")
if contact.get("mobilePhone"):
lines.append(f"TEL;TYPE=CELL,VOICE:{contact['mobilePhone']}")
for phone in (contact.get("homePhones") or []):
if phone:
lines.append(f"TEL;TYPE=HOME,VOICE:{phone}")
def _appendAddress(addr: Dict[str, Any], typ: str) -> None:
if not addr:
return
street = addr.get("street") or ""
city = addr.get("city") or ""
state = addr.get("state") or ""
postal = addr.get("postalCode") or ""
country = addr.get("countryOrRegion") or ""
if any([street, city, state, postal, country]):
lines.append(f"ADR;TYPE={typ}:;;{street};{city};{state};{postal};{country}")
_appendAddress(contact.get("businessAddress") or {}, "WORK")
_appendAddress(contact.get("homeAddress") or {}, "HOME")
_appendAddress(contact.get("otherAddress") or {}, "OTHER")
if contact.get("personalNotes"):
lines.append(f"NOTE:{_icsEscape(contact['personalNotes'])}")
lines.append(f"UID:{contact.get('id', '')}")
lines.append("END:VCARD")
return ("\r\n".join(lines) + "\r\n").encode("utf-8")

View file

@ -26,7 +26,12 @@ class DataSource(PowerOnModel):
json_schema_extra={"label": "Verbindungs-ID", "fk_target": {"db": "poweron_app", "table": "UserConnection", "labelField": "externalUsername"}}, json_schema_extra={"label": "Verbindungs-ID", "fk_target": {"db": "poweron_app", "table": "UserConnection", "labelField": "externalUsername"}},
) )
sourceType: str = Field( sourceType: str = Field(
description="sharepointFolder, googleDriveFolder, outlookFolder, ftpFolder, clickupList (path under /team/...)", description=(
"sharepointFolder, onedriveFolder, googleDriveFolder, "
"outlookFolder, gmailFolder, ftpFolder, clickupList "
"(path under /team/...), kdriveFolder, calendarFolder, "
"contactFolder"
),
json_schema_extra={"label": "Quellentyp"}, json_schema_extra={"label": "Quellentyp"},
) )
path: str = Field( path: str = Field(

View file

@ -4,10 +4,13 @@
Document reference models for typed document references in workflows. Document reference models for typed document references in workflows.
""" """
from typing import List, Optional import logging
from typing import Any, List, Optional
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from modules.shared.i18nRegistry import i18nModel from modules.shared.i18nRegistry import i18nModel
logger = logging.getLogger(__name__)
class DocumentReference(BaseModel): class DocumentReference(BaseModel):
"""Base class for document references""" """Base class for document references"""
@ -115,3 +118,86 @@ class DocumentReferenceList(BaseModel):
references.append(DocumentListReference(label=refStr)) references.append(DocumentListReference(label=refStr))
return cls(references=references) return cls(references=references)
def coerceDocumentReferenceList(value: Any) -> DocumentReferenceList:
"""Tolerant coercion of any agent/UI-supplied document list to
:class:`DocumentReferenceList`.
Accepts the canonical formats plus the dict-wrapper shapes that
LLM tool-callers tend to generate when they see a
``type=DocumentList`` parameter:
* ``None`` / ``""`` -> empty list
* :class:`DocumentReferenceList` -> as-is
* ``str`` -> single-element string list
* ``list[str]`` -> :meth:`from_string_list`
* ``list[dict]`` with ``id`` or ``documentId`` -> item references
* ``{"documents": [...]}`` / ``{"references": [...]}`` ->
recurse into the inner list (this is the shape LLMs love)
* ``{"id": "..."}`` / ``{"documentId": "..."}`` -> single
item reference
* any unrecognised input -> empty list with a WARN log; never
raises (the caller decides whether an empty list is fatal).
"""
if value is None or value == "":
return DocumentReferenceList(references=[])
if isinstance(value, DocumentReferenceList):
return value
if isinstance(value, str):
return DocumentReferenceList.from_string_list([value])
if isinstance(value, dict):
for innerKey in ("documents", "references", "items", "files"):
if innerKey in value and isinstance(value[innerKey], list):
return coerceDocumentReferenceList(value[innerKey])
docId = value.get("documentId") or value.get("id")
if docId:
return DocumentReferenceList(references=[
DocumentItemReference(
documentId=str(docId),
fileName=value.get("fileName") or value.get("name"),
)
])
logger.warning(
f"coerceDocumentReferenceList: unsupported dict shape "
f"(keys={list(value.keys())}); returning empty list."
)
return DocumentReferenceList(references=[])
if isinstance(value, list):
if not value:
return DocumentReferenceList(references=[])
first = value[0]
if isinstance(first, str):
return DocumentReferenceList.from_string_list(value)
if isinstance(first, dict):
references: List[DocumentReference] = []
for item in value:
if not isinstance(item, dict):
continue
docId = item.get("documentId") or item.get("id")
if docId:
references.append(DocumentItemReference(
documentId=str(docId),
fileName=item.get("fileName") or item.get("name"),
))
elif item.get("label"):
references.append(DocumentListReference(
label=str(item["label"]),
messageId=item.get("messageId"),
))
return DocumentReferenceList(references=references)
# Mixed/object list (e.g. inline ActionDocument-like): caller
# must pre-handle that case before calling this coercer.
logger.warning(
f"coerceDocumentReferenceList: list element type "
f"{type(first).__name__} not recognised; returning empty list."
)
return DocumentReferenceList(references=[])
logger.warning(
f"coerceDocumentReferenceList: unsupported value type "
f"{type(value).__name__}; returning empty list."
)
return DocumentReferenceList(references=[])

View file

@ -95,7 +95,14 @@ class ExtractionOptions(BaseModel):
imageQuality: int = Field(default=85, ge=1, le=100, description="Image quality (1-100)") imageQuality: int = Field(default=85, ge=1, le=100, description="Image quality (1-100)")
# Merging strategy # Merging strategy
mergeStrategy: MergeStrategy = Field(default_factory=MergeStrategy, description="Strategy for merging extraction results") mergeStrategy: Optional[MergeStrategy] = Field(
default_factory=MergeStrategy,
description=(
"Strategy for merging extraction results. Pass None to skip merging entirely "
"(required for per-chunk ingestion pipelines like RAG, where per-page/per-section "
"granularity must be preserved for embedding)."
),
)
# Optional chunking parameters (for backward compatibility) # Optional chunking parameters (for backward compatibility)
chunkAllowed: Optional[bool] = Field(default=None, description="Whether chunking is allowed") chunkAllowed: Optional[bool] = Field(default=None, description="Whether chunking is allowed")

View file

@ -90,6 +90,16 @@ class FileContentIndex(PowerOnModel):
description="Data visibility scope: personal, featureInstance, mandate, global", description="Data visibility scope: personal, featureInstance, mandate, global",
json_schema_extra={"label": "Sichtbarkeit"}, json_schema_extra={"label": "Sichtbarkeit"},
) )
sourceKind: str = Field(
default="file",
description="Origin of the indexed content: file, sharepoint_item, outlook_message, outlook_attachment, ...",
json_schema_extra={"label": "Quellenart"},
)
connectionId: Optional[str] = Field(
default=None,
description="UserConnection ID if this index entry originates from an external connector",
json_schema_extra={"label": "Connection-ID"},
)
neutralizationStatus: Optional[str] = Field( neutralizationStatus: Optional[str] = Field(
default=None, default=None,
description="Neutralization status: completed, failed, skipped, None = not required", description="Neutralization status: completed, failed, skipped, None = not required",

View file

@ -475,7 +475,23 @@ class UserConnection(PowerOnModel):
description="OAuth scopes granted for this connection", description="OAuth scopes granted for this connection",
json_schema_extra={"frontend_type": "list", "frontend_readonly": True, "frontend_required": False, "label": "Gewährte Berechtigungen"}, json_schema_extra={"frontend_type": "list", "frontend_readonly": True, "frontend_required": False, "label": "Gewährte Berechtigungen"},
) )
knowledgeIngestionEnabled: bool = Field(
default=False,
description="Whether the user has consented to knowledge ingestion for this connection",
json_schema_extra={"frontend_type": "boolean", "frontend_readonly": False, "frontend_required": False, "label": "Wissensdatenbank aktiv"},
)
knowledgePreferences: Optional[Dict[str, Any]] = Field(
default=None,
description=(
"Per-connection knowledge ingestion preferences. schemaVersion=1 keys: "
"neutralizeBeforeEmbed (bool), mailContentDepth (metadata|snippet|full), "
"mailIndexAttachments (bool), filesIndexBinaries (bool), mimeAllowlist (list[str]), "
"clickupScope (titles|title_description|with_comments), "
"surfaceToggles (dict per authority), maxAgeDays (int)."
),
json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False, "label": "Wissenspräferenzen"},
)
@computed_field @computed_field
@property @property
def connectionReference(self) -> str: def connectionReference(self) -> str:

View file

@ -174,14 +174,26 @@ async def indexSessionData(
for c in chunks for c in chunks
] ]
await knowledgeService.indexFile( from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
fileId=syntheticFileId,
fileName=f"coaching-session-{sessionId[:8]}", await knowledgeService.requestIngestion(
mimeType="application/x-coaching-session", IngestionJob(
userId=userId, sourceKind="coaching_session",
featureInstanceId=featureInstanceId, sourceId=syntheticFileId,
mandateId=mandateId, fileName=f"coaching-session-{sessionId[:8]}",
contentObjects=contentObjects, mimeType="application/x-coaching-session",
userId=userId,
featureInstanceId=featureInstanceId,
mandateId=mandateId,
contentObjects=contentObjects,
provenance={
"lane": "feature",
"feature": "commcoach",
"sessionId": sessionId,
"contextId": contextId,
"messageCount": len(messages or []),
},
)
) )
logger.info(f"Successfully indexed coaching session {sessionId} ({len(chunks)} chunks)") logger.info(f"Successfully indexed coaching session {sessionId} ({len(chunks)} chunks)")
except Exception as e: except Exception as e:

View file

@ -1160,6 +1160,9 @@ async def list_connection_services(
"drive": "Google Drive", "drive": "Google Drive",
"gmail": "Gmail", "gmail": "Gmail",
"files": "Files (FTP)", "files": "Files (FTP)",
"kdrive": "kDrive",
"calendar": "Calendar",
"contact": "Contacts",
} }
_serviceIcons = { _serviceIcons = {
"sharepoint": "sharepoint", "sharepoint": "sharepoint",
@ -1170,6 +1173,9 @@ async def list_connection_services(
"drive": "cloud", "drive": "cloud",
"gmail": "mail", "gmail": "mail",
"files": "folder", "files": "folder",
"kdrive": "cloud",
"calendar": "calendar",
"contact": "contact",
} }
items = [ items = [
{"service": s, "label": _serviceLabels.get(s, s), "icon": _serviceIcons.get(s, "folder")} {"service": s, "label": _serviceLabels.get(s, s), "icon": _serviceIcons.get(s, "folder")}

View file

@ -188,6 +188,9 @@ _SOURCE_TYPE_TO_SERVICE = {
"gmailFolder": "gmail", "gmailFolder": "gmail",
"ftpFolder": "files", "ftpFolder": "files",
"clickupList": "clickup", "clickupList": "clickup",
"kdriveFolder": "kdrive",
"calendarFolder": "calendar",
"contactFolder": "contact",
} }
@ -1818,6 +1821,9 @@ async def listConnectionServices(
"drive": "Google Drive", "drive": "Google Drive",
"gmail": "Gmail", "gmail": "Gmail",
"files": "Files (FTP)", "files": "Files (FTP)",
"kdrive": "kDrive",
"calendar": "Calendar",
"contact": "Contacts",
} }
_serviceIcons = { _serviceIcons = {
"sharepoint": "sharepoint", "sharepoint": "sharepoint",
@ -1827,6 +1833,9 @@ async def listConnectionServices(
"drive": "cloud", "drive": "cloud",
"gmail": "mail", "gmail": "mail",
"files": "folder", "files": "folder",
"kdrive": "cloud",
"calendar": "calendar",
"contact": "contact",
} }
items = [ items = [
{ {

View file

@ -1268,19 +1268,7 @@ class AppObjects:
result = [] result = []
for conn_dict in connections: for conn_dict in connections:
try: try:
# Create UserConnection object connection = UserConnection.model_validate(conn_dict)
connection = UserConnection(
id=conn_dict["id"],
userId=conn_dict["userId"],
authority=conn_dict.get("authority"),
externalId=conn_dict.get("externalId", ""),
externalUsername=conn_dict.get("externalUsername", ""),
externalEmail=conn_dict.get("externalEmail"),
status=conn_dict.get("status", "pending"),
connectedAt=conn_dict.get("connectedAt"),
lastChecked=conn_dict.get("lastChecked"),
expiresAt=conn_dict.get("expiresAt"),
)
result.append(connection) result.append(connection)
except Exception as e: except Exception as e:
logger.error( logger.error(
@ -1293,6 +1281,28 @@ class AppObjects:
logger.error(f"Error getting user connections: {str(e)}") logger.error(f"Error getting user connections: {str(e)}")
return [] return []
def getActiveKnowledgeConnections(self) -> List[UserConnection]:
"""Return all UserConnections with knowledgeIngestionEnabled=True and status=active.
Used by the daily re-sync scheduler to determine which connections to re-index.
"""
try:
rows = self.db.getRecordset(
UserConnection,
recordFilter={"knowledgeIngestionEnabled": True, "status": ConnectionStatus.ACTIVE.value},
)
result = []
for row in rows or []:
try:
conn = UserConnection.model_validate(row) if isinstance(row, dict) else row
result.append(conn)
except Exception as _e:
logger.warning(f"getActiveKnowledgeConnections: could not parse row: {_e}")
return result
except Exception as e:
logger.error(f"getActiveKnowledgeConnections failed: {e}")
return []
def getUserConnectionById(self, connectionId: str) -> Optional[UserConnection]: def getUserConnectionById(self, connectionId: str) -> Optional[UserConnection]:
"""Get a single UserConnection by ID or by reference string (connection:authority:username).""" """Get a single UserConnection by ID or by reference string (connection:authority:username)."""
try: try:
@ -1317,18 +1327,21 @@ class AppObjects:
if connections: if connections:
conn_dict = connections[0] conn_dict = connections[0]
return UserConnection( try:
id=conn_dict["id"], return UserConnection.model_validate(conn_dict)
userId=conn_dict["userId"], except Exception:
authority=conn_dict.get("authority"), return UserConnection(
externalId=conn_dict.get("externalId", ""), id=conn_dict["id"],
externalUsername=conn_dict.get("externalUsername", ""), userId=conn_dict["userId"],
externalEmail=conn_dict.get("externalEmail"), authority=conn_dict.get("authority"),
status=conn_dict.get("status", "pending"), externalId=conn_dict.get("externalId", ""),
connectedAt=conn_dict.get("connectedAt"), externalUsername=conn_dict.get("externalUsername", ""),
lastChecked=conn_dict.get("lastChecked"), externalEmail=conn_dict.get("externalEmail"),
expiresAt=conn_dict.get("expiresAt"), status=conn_dict.get("status", "pending"),
) connectedAt=conn_dict.get("connectedAt"),
lastChecked=conn_dict.get("lastChecked"),
expiresAt=conn_dict.get("expiresAt"),
)
return None return None
except Exception as e: except Exception as e:
logger.error(f"Error getting user connection by ID: {str(e)}") logger.error(f"Error getting user connection by ID: {str(e)}")
@ -3331,7 +3344,10 @@ class AppObjects:
) )
if not tokens: if not tokens:
logger.warning( # Pending connections legitimately have no token yet (PAT not
# submitted, OAuth callback not completed). Keep at DEBUG to
# avoid noisy warnings on every connection-list refresh.
logger.debug(
f"No connection token found for connectionId: {connectionId}" f"No connection token found for connectionId: {connectionId}"
) )
return None return None

View file

@ -93,6 +93,46 @@ class KnowledgeObjects:
self.db.recordModify(FileContentIndex, fileId, {"status": status}) self.db.recordModify(FileContentIndex, fileId, {"status": status})
return True return True
def deleteFileContentIndexByConnectionId(self, connectionId: str) -> Dict[str, int]:
"""Delete all FileContentIndex rows (and their ContentChunks) for a connection.
Used when a UserConnection is revoked / disconnected so the knowledge corpus
no longer references data the user no longer grants access to. Returns a dict
with counts to support observability logs.
"""
if not connectionId:
return {"indexRows": 0, "chunks": 0}
rows = self.db.getRecordset(
FileContentIndex, recordFilter={"connectionId": connectionId}
)
mandateIds: set = set()
chunkCount = 0
indexCount = 0
for row in rows:
fid = row.get("id") if isinstance(row, dict) else getattr(row, "id", None)
mid = row.get("mandateId") if isinstance(row, dict) else getattr(row, "mandateId", "")
if not fid:
continue
chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fid})
for chunk in chunks:
if self.db.recordDelete(ContentChunk, chunk["id"]):
chunkCount += 1
if self.db.recordDelete(FileContentIndex, fid):
indexCount += 1
if mid:
mandateIds.add(str(mid))
for mid in mandateIds:
try:
from modules.interfaces.interfaceDbBilling import _getRootInterface
_getRootInterface().reconcileMandateStorageBilling(mid)
except Exception as ex:
logger.warning("reconcileMandateStorageBilling after connection purge failed: %s", ex)
return {"indexRows": indexCount, "chunks": chunkCount}
def deleteFileContentIndex(self, fileId: str) -> bool: def deleteFileContentIndex(self, fileId: str) -> bool:
"""Delete a FileContentIndex and all associated ContentChunks.""" """Delete a FileContentIndex and all associated ContentChunks."""
existing = self.getFileContentIndex(fileId) existing = self.getFileContentIndex(fileId)

View file

@ -836,13 +836,25 @@ class ComponentObjects:
def checkForDuplicateFile(self, fileHash: str, fileName: str) -> Optional[FileItem]: def checkForDuplicateFile(self, fileHash: str, fileName: str) -> Optional[FileItem]:
"""Checks if a file with the same hash AND fileName already exists for the current user """Checks if a file with the same hash AND fileName already exists for the current user
**within the same scope** (mandateId + featureInstanceId). **within the same scope** (mandateId + featureInstanceId).
Duplicate = same user + same fileHash + same fileName + same scope. Duplicate = same user + same fileHash + same fileName + same scope + RBAC-visible.
Same hash with different name is allowed (intentional copy by user). Same hash with different name is allowed (intentional copy by user).
RBAC parity contract: this method must NEVER return a FileItem that
``getFile()`` would not return for the current user. Otherwise callers
(``saveUploadedFile`` / ``createFile``) hand back an id that the very
next ``updateFile`` / ``getFile`` then rejects with
``File with ID ... not found`` -- the well-known "ghost duplicate"
symptom seen when ``interfaceDbComponent`` is initialised without an
``featureInstanceId`` (e.g. via ``serviceHub``) but a same-hash+name
file exists in another featureInstance under the same mandate.
We therefore cross-check the candidate through the RBAC-aware ``getFile``
before returning it; if RBAC blocks it, we treat it as "no duplicate
for this scope" and the caller will create a fresh per-scope copy.
""" """
if not self.userId: if not self.userId:
return None return None
recordFilter: dict = { recordFilter: dict = {
"sysCreatedBy": self.userId, "sysCreatedBy": self.userId,
"fileHash": fileHash, "fileHash": fileHash,
@ -857,10 +869,10 @@ class ComponentObjects:
FileItem, FileItem,
recordFilter=recordFilter, recordFilter=recordFilter,
) )
if not matchingFiles: if not matchingFiles:
return None return None
file = matchingFiles[0] file = matchingFiles[0]
fileId = file["id"] fileId = file["id"]
@ -869,16 +881,17 @@ class ComponentObjects:
logger.warning(f"Duplicate FileItem {fileId} found but FileData missing — treating as new file") logger.warning(f"Duplicate FileItem {fileId} found but FileData missing — treating as new file")
return None return None
return FileItem( rbacVisible = self.getFile(fileId)
id=fileId, if rbacVisible is None:
mandateId=file.get("mandateId", ""), logger.info(
featureInstanceId=file.get("featureInstanceId", ""), f"Duplicate FileItem {fileId} ('{fileName}', hash {fileHash[:12]}...) found via "
fileName=file["fileName"], f"sysCreatedBy+hash+name match but is not RBAC-visible in current scope "
mimeType=file["mimeType"], f"(mandateId={self.mandateId or '-'}, featureInstanceId={self.featureInstanceId or '-'}). "
fileHash=file["fileHash"], f"Treating as no-duplicate so a fresh per-scope copy gets created."
fileSize=file["fileSize"], )
sysCreatedAt=file.get("sysCreatedAt"), return None
)
return rbacVisible
# Class-level cache — built once from the ExtractorRegistry # Class-level cache — built once from the ExtractorRegistry
_extensionToMime: Optional[Dict[str, str]] = None _extensionToMime: Optional[Dict[str, str]] = None

View file

@ -351,11 +351,18 @@ def create_connection(
externalUsername="", # Will be set after OAuth externalUsername="", # Will be set after OAuth
status=ConnectionStatus.PENDING # Start with PENDING status status=ConnectionStatus.PENDING # Start with PENDING status
) )
# Apply knowledge consent + preferences from request body before persisting
knowledge_enabled = connection_data.get("knowledgeIngestionEnabled")
if isinstance(knowledge_enabled, bool):
connection.knowledgeIngestionEnabled = knowledge_enabled
knowledge_prefs = connection_data.get("knowledgePreferences")
if isinstance(knowledge_prefs, dict):
connection.knowledgePreferences = knowledge_prefs
# Save connection record - models now handle timestamp serialization automatically # Save connection record - models now handle timestamp serialization automatically
interface.db.recordModify(UserConnection, connection.id, connection.model_dump()) interface.db.recordModify(UserConnection, connection.id, connection.model_dump())
return connection return connection
except HTTPException: except HTTPException:
@ -484,16 +491,23 @@ def update_connection(
def connect_service( def connect_service(
request: Request, request: Request,
connectionId: str = Path(..., description="The ID of the connection to connect"), connectionId: str = Path(..., description="The ID of the connection to connect"),
body: Optional[Dict[str, Any]] = Body(default=None),
currentUser: User = Depends(getCurrentUser) currentUser: User = Depends(getCurrentUser)
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Connect a service for the current user """Connect a service for the current user.
Optional body: ``{"reauth": true}`` -- forces the OAuth provider to re-show
the consent screen, which is required when new scopes have been added (e.g.
Calendar + Contacts after the connection was first created). Without this
flag the provider silently re-uses the previous consent and never grants
the new scopes, leaving the connection in a degraded state.
SECURITY: This endpoint is secure - users can only connect their own connections. SECURITY: This endpoint is secure - users can only connect their own connections.
""" """
try: try:
interface = getInterface(currentUser) interface = getInterface(currentUser)
# Find the connection # Find the connection
connection = None connection = None
# SECURITY FIX: All users (including admins) can only connect their own connections # SECURITY FIX: All users (including admins) can only connect their own connections
@ -503,29 +517,40 @@ def connect_service(
if conn.id == connectionId: if conn.id == connectionId:
connection = conn connection = conn
break break
if not connection: if not connection:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND, status_code=status.HTTP_404_NOT_FOUND,
detail=routeApiMsg("Connection not found") detail=routeApiMsg("Connection not found")
) )
reauth = bool((body or {}).get("reauth")) if isinstance(body, dict) else False
reauthSuffix = "&reauth=1" if reauth else ""
# Data-app OAuth (JWT state issued server-side in /auth/connect) # Data-app OAuth (JWT state issued server-side in /auth/connect)
auth_url = None auth_url = None
if connection.authority == AuthAuthority.MSFT: if connection.authority == AuthAuthority.MSFT:
auth_url = f"/api/msft/auth/connect?connectionId={quote(connectionId, safe='')}" auth_url = f"/api/msft/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}"
elif connection.authority == AuthAuthority.GOOGLE: elif connection.authority == AuthAuthority.GOOGLE:
auth_url = f"/api/google/auth/connect?connectionId={quote(connectionId, safe='')}" auth_url = f"/api/google/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}"
elif connection.authority == AuthAuthority.CLICKUP: elif connection.authority == AuthAuthority.CLICKUP:
auth_url = f"/api/clickup/auth/connect?connectionId={quote(connectionId, safe='')}" auth_url = f"/api/clickup/auth/connect?connectionId={quote(connectionId, safe='')}{reauthSuffix}"
elif connection.authority == AuthAuthority.INFOMANIAK: elif connection.authority == AuthAuthority.INFOMANIAK:
auth_url = f"/api/infomaniak/auth/connect?connectionId={quote(connectionId, safe='')}" # Infomaniak does not use OAuth for data access; the frontend posts a
# Personal Access Token directly to /api/infomaniak/connections/{id}/token.
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=routeApiMsg(
"Infomaniak uses a Personal Access Token instead of OAuth. "
"Submit the token via POST /api/infomaniak/connections/{connectionId}/token."
),
)
else: else:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Unsupported authority: {connection.authority}" detail=f"Unsupported authority: {connection.authority}"
) )
return {"authUrl": auth_url} return {"authUrl": auth_url}
except HTTPException: except HTTPException:
@ -568,8 +593,25 @@ def disconnect_service(
detail=routeApiMsg("Connection not found") detail=routeApiMsg("Connection not found")
) )
# Update connection status # Fire revoked event BEFORE DB status change so knowledge purge and
connection.status = ConnectionStatus.INACTIVE # status mutation form one logical step; subscribers see the
# connection as it was. INACTIVE does not exist on the enum — REVOKED
# is the correct terminal-but-retained state (deleted rows are
# handled in DELETE /{id}).
try:
from modules.shared.callbackRegistry import callbackRegistry
callbackRegistry.trigger(
"connection.revoked",
connectionId=connectionId,
authority=str(getattr(connection.authority, "value", connection.authority) or ""),
userId=str(currentUser.id),
reason="disconnected",
)
except Exception as _cbErr:
logger.warning("connection.revoked callback failed for %s: %s", connectionId, _cbErr)
connection.status = ConnectionStatus.REVOKED
connection.lastChecked = getUtcTimestamp() connection.lastChecked = getUtcTimestamp()
# Update connection record - models now handle timestamp serialization automatically # Update connection record - models now handle timestamp serialization automatically
@ -618,6 +660,23 @@ def delete_connection(
detail=routeApiMsg("Connection not found") detail=routeApiMsg("Connection not found")
) )
# Fire revoked event BEFORE the row disappears so consumers still
# have authority/connection context for observability; purge itself
# targets FileContentIndex rows by connectionId which are unaffected
# by the UserConnection delete.
try:
from modules.shared.callbackRegistry import callbackRegistry
callbackRegistry.trigger(
"connection.revoked",
connectionId=connectionId,
authority=str(getattr(connection.authority, "value", connection.authority) or ""),
userId=str(currentUser.id),
reason="deleted",
)
except Exception as _cbErr:
logger.warning("connection.revoked callback failed for %s: %s", connectionId, _cbErr)
# Remove the connection - only need connectionId since permissions are verified # Remove the connection - only need connectionId since permissions are verified
interface.removeUserConnection(connectionId) interface.removeUserConnection(connectionId)

View file

@ -77,7 +77,7 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
"""Background task: pre-scan + extraction + knowledge indexing. """Background task: pre-scan + extraction + knowledge indexing.
Step 1: Structure Pre-Scan (AI-free) -> FileContentIndex (persisted) Step 1: Structure Pre-Scan (AI-free) -> FileContentIndex (persisted)
Step 2: Content extraction via runExtraction -> ContentParts Step 2: Content extraction via runExtraction -> ContentParts
Step 3: KnowledgeService.indexFile -> chunking + embedding -> Knowledge Store""" Step 3: KnowledgeService.requestIngestion -> idempotent chunking + embedding -> Knowledge Store"""
userId = user.id if hasattr(user, "id") else str(user) userId = user.id if hasattr(user, "id") else str(user)
try: try:
mgmtInterface = interfaceDbManagement.getInterface(user) mgmtInterface = interfaceDbManagement.getInterface(user)
@ -122,9 +122,30 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
f"{contentIndex.totalObjects} objects" f"{contentIndex.totalObjects} objects"
) )
# Persist FileContentIndex immediately # Persist FileContentIndex immediately.
# IMPORTANT: preserve `_ingestion` metadata and `status="indexed"` from any
# prior successful run — otherwise this upsert wipes the idempotency cache
# and requestIngestion cannot detect duplicates (AC4 breaks).
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
knowledgeDb = getKnowledgeInterface() knowledgeDb = getKnowledgeInterface()
try:
_existing = knowledgeDb.getFileContentIndex(fileId)
except Exception:
_existing = None
if _existing:
_existingStruct = (
_existing.get("structure") if isinstance(_existing, dict)
else getattr(_existing, "structure", {})
) or {}
_existingStatus = (
_existing.get("status") if isinstance(_existing, dict)
else getattr(_existing, "status", "")
) or ""
if "_ingestion" in _existingStruct:
contentIndex.structure = dict(contentIndex.structure or {})
contentIndex.structure["_ingestion"] = _existingStruct["_ingestion"]
if _existingStatus == "indexed":
contentIndex.status = "indexed"
knowledgeDb.upsertFileContentIndex(contentIndex) knowledgeDb.upsertFileContentIndex(contentIndex)
# Step 2: Content extraction (AI-free, produces ContentParts) # Step 2: Content extraction (AI-free, produces ContentParts)
@ -134,7 +155,10 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
extractorRegistry = ExtractorRegistry() extractorRegistry = ExtractorRegistry()
chunkerRegistry = ChunkerRegistry() chunkerRegistry = ChunkerRegistry()
options = ExtractionOptions() # mergeStrategy=None: keep per-page / per-section granularity for RAG ingestion.
# The default MergeStrategy concatenates all text parts into a single blob, which
# collapses a 500-page PDF into one ContentChunk and destroys semantic retrieval.
options = ExtractionOptions(mergeStrategy=None)
extracted = runExtraction( extracted = runExtraction(
extractorRegistry, chunkerRegistry, extractorRegistry, chunkerRegistry,
@ -181,15 +205,21 @@ async def _autoIndexFile(fileId: str, fileName: str, mimeType: str, user):
) )
knowledgeService = getService("knowledge", ctx) knowledgeService = getService("knowledge", ctx)
await knowledgeService.indexFile( from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
fileId=fileId,
fileName=fileName, await knowledgeService.requestIngestion(
mimeType=mimeType, IngestionJob(
userId=userId, sourceKind="file",
featureInstanceId=str(feature_instance_id) if feature_instance_id else "", sourceId=fileId,
mandateId=str(mandate_id) if mandate_id else "", fileName=fileName,
contentObjects=contentObjects, mimeType=mimeType,
structure=contentIndex.structure, userId=userId,
featureInstanceId=str(feature_instance_id) if feature_instance_id else "",
mandateId=str(mandate_id) if mandate_id else "",
contentObjects=contentObjects,
structure=contentIndex.structure,
provenance={"lane": "upload", "route": "routeDataFiles._autoIndexFile"},
)
) )
# Re-acquire interface after await to avoid stale user context from the singleton # Re-acquire interface after await to avoid stale user context from the singleton

View file

@ -241,6 +241,29 @@ async def auth_connect_callback(
) )
interface.saveConnectionToken(token) interface.saveConnectionToken(token)
try:
from modules.shared.callbackRegistry import callbackRegistry
if connection.knowledgeIngestionEnabled:
callbackRegistry.trigger(
"connection.established",
connectionId=connection.id,
authority=str(getattr(connection.authority, "value", connection.authority) or "clickup"),
userId=str(user.id),
)
else:
logger.info(
"ingestion.connection.bootstrap.skipped — knowledge ingestion disabled by user",
extra={
"event": "ingestion.connection.bootstrap.skipped",
"connectionId": connection.id,
"authority": "clickup",
"reason": "consent_disabled",
},
)
except Exception as _cbErr:
logger.warning("connection.established callback failed for %s: %s", connection.id, _cbErr)
return HTMLResponse( return HTMLResponse(
content=f""" content=f"""
<html> <html>

View file

@ -281,9 +281,17 @@ async def auth_login_callback(
def auth_connect( def auth_connect(
request: Request, request: Request,
connectionId: str = Query(..., description="UserConnection id"), connectionId: str = Query(..., description="UserConnection id"),
reauth: Optional[int] = Query(0, description="If 1, force the consent screen so newly added scopes are granted"),
currentUser: User = Depends(getCurrentUser), currentUser: User = Depends(getCurrentUser),
) -> RedirectResponse: ) -> RedirectResponse:
"""Start Google Data OAuth for an existing connection (requires gateway session).""" """Start Google Data OAuth for an existing connection (requires gateway session).
Google already defaults to ``prompt=consent`` here, but ``include_granted_scopes=true``
can cause newly added scopes (e.g. calendar.readonly, contacts.readonly) to be
silently dropped on subsequent re-authorisations. With ``reauth=1`` we drop
``include_granted_scopes`` so Google re-issues a token strictly for the
current scope list.
"""
try: try:
_require_google_data_config() _require_google_data_config()
interface = getInterface(currentUser) interface = getInterface(currentUser)
@ -310,9 +318,10 @@ def auth_connect(
) )
extra_params: Dict[str, Any] = { extra_params: Dict[str, Any] = {
"access_type": "offline", "access_type": "offline",
"include_granted_scopes": "true",
"state": state_jwt, "state": state_jwt,
} }
if not reauth:
extra_params["include_granted_scopes"] = "true"
login_hint = connection.externalEmail or connection.externalUsername login_hint = connection.externalEmail or connection.externalUsername
if login_hint: if login_hint:
extra_params["login_hint"] = login_hint extra_params["login_hint"] = login_hint
@ -470,6 +479,29 @@ async def auth_connect_callback(
) )
interface.saveConnectionToken(token) interface.saveConnectionToken(token)
try:
from modules.shared.callbackRegistry import callbackRegistry
if connection.knowledgeIngestionEnabled:
callbackRegistry.trigger(
"connection.established",
connectionId=connection.id,
authority=str(getattr(connection.authority, "value", connection.authority) or "google"),
userId=str(user.id),
)
else:
logger.info(
"ingestion.connection.bootstrap.skipped — knowledge ingestion disabled by user",
extra={
"event": "ingestion.connection.bootstrap.skipped",
"connectionId": connection.id,
"authority": "google",
"reason": "consent_disabled",
},
)
except Exception as _cbErr:
logger.warning("connection.established callback failed for %s: %s", connection.id, _cbErr)
return HTMLResponse( return HTMLResponse(
content=f""" content=f"""
<html> <html>

View file

@ -1,69 +1,66 @@
# Copyright (c) 2025 Patrick Motsch # Copyright (c) 2025 Patrick Motsch
# All rights reserved. # All rights reserved.
"""Infomaniak OAuth for data connections (UserConnection + Token). """Infomaniak Personal-Access-Token onboarding for data connections.
Pure DATA_CONNECTION flow -- Infomaniak is NOT a login authority for PowerOn. Infomaniak does NOT support OAuth scopes for kDrive/kSuite data access.
The user must create a Personal Access Token (PAT) at
https://manager.infomaniak.com/v3/ng/accounts/token/list with the API
scopes:
- ``drive`` -> kDrive (active adapter)
- ``workspace:calendar`` -> Calendar (active adapter)
- ``workspace:contact`` -> Contacts (active adapter)
- ``workspace:mail`` -> Mail (adapter pending; scope reserved)
Validation strategy
-------------------
The submit endpoint validates the PAT in two deterministic steps,
each addressing one scope:
1. ``listAccessibleDrives(pat)`` -> ``GET /2/drive/init?with=drives``
proves the ``drive`` scope is on the PAT and -- as a side effect --
confirms the user has at least one accessible kDrive. This is the
*only* listing endpoint that returns drives where the user has
``role: 'user'`` (the documented ``/2/drive?account_id=...`` listing
is filtered to admin-only drives and would silently return ``[]``
for a standard kSuite member).
2. ``resolveOwnerIdentity(pat)`` -> PIM Calendar (preferred) or PIM
Contacts (fallback) yields the user's display name + their kSuite
account_id, used purely for connection labelling. This also proves
that at least one of ``workspace:calendar`` / ``workspace:contact``
is on the PAT (the connection would otherwise be blank in the UI).
Mail has no separate probe: its scope is recorded in ``grantedScopes``
so a future adapter can pick it up without re-issuing the token.
""" """
from fastapi import APIRouter, HTTPException, Request, status, Depends, Query from fastapi import APIRouter, HTTPException, Request, status, Depends, Path, Body
from fastapi.responses import HTMLResponse, RedirectResponse
import logging import logging
import json
import time
from typing import Dict, Any from typing import Dict, Any
from urllib.parse import urlencode import hashlib
import httpx
from jose import jwt as jose_jwt
from jose import JWTError
from modules.shared.configuration import APP_CONFIG from modules.interfaces.interfaceDbApp import getInterface
from modules.interfaces.interfaceDbApp import getInterface, getRootInterface
from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection
from modules.datamodels.datamodelSecurity import Token, TokenPurpose from modules.datamodels.datamodelSecurity import Token, TokenPurpose
from modules.auth import getCurrentUser, limiter, SECRET_KEY, ALGORITHM from modules.auth import getCurrentUser, limiter
from modules.auth.oauthProviderConfig import infomaniakDataScopes from modules.shared.timeUtils import getUtcTimestamp, createExpirationTimestamp
from modules.shared.timeUtils import createExpirationTimestamp, getUtcTimestamp, parseTimestamp
from modules.shared.i18nRegistry import apiRouteContext from modules.shared.i18nRegistry import apiRouteContext
from modules.connectors.providerInfomaniak.connectorInfomaniak import (
resolveOwnerIdentity,
listAccessibleDrives,
InfomaniakIdentityError,
)
routeApiMsg = apiRouteContext("routeSecurityInfomaniak") routeApiMsg = apiRouteContext("routeSecurityInfomaniak")
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
_FLOW_CONNECT = "infomaniak_connect" # Infomaniak PATs do not expire unless the user sets an explicit lifetime in
# the Manager (up to 30 years). We persist a 10-year horizon so the central
INFOMANIAK_AUTHORIZE_URL = "https://login.infomaniak.com/authorize" # tokenStatus helper does not flag the connection as "no token". Mirrors
INFOMANIAK_TOKEN_URL = "https://login.infomaniak.com/token" # ClickUp.
INFOMANIAK_API_BASE = "https://api.infomaniak.com" _INFOMANIAK_TOKEN_EXPIRES_IN_SEC = 10 * 365 * 24 * 3600
CLIENT_ID = APP_CONFIG.get("Service_INFOMANIAK_DATA_CLIENT_ID")
CLIENT_SECRET = APP_CONFIG.get("Service_INFOMANIAK_DATA_CLIENT_SECRET")
REDIRECT_URI = APP_CONFIG.get("Service_INFOMANIAK_OAUTH_REDIRECT_URI")
def _issue_oauth_state(claims: Dict[str, Any]) -> str:
body = {**claims, "exp": int(time.time()) + 600}
return jose_jwt.encode(body, SECRET_KEY, algorithm=ALGORITHM)
def _parse_oauth_state(state: str) -> Dict[str, Any]:
try:
return jose_jwt.decode(state, SECRET_KEY, algorithms=[ALGORITHM])
except JWTError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid OAuth state: {e}"
) from e
def _require_infomaniak_config():
if not CLIENT_ID or not CLIENT_SECRET or not REDIRECT_URI:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=routeApiMsg(
"Infomaniak OAuth is not configured "
"(Service_INFOMANIAK_DATA_CLIENT_ID, Service_INFOMANIAK_DATA_CLIENT_SECRET, "
"Service_INFOMANIAK_OAUTH_REDIRECT_URI)"
),
)
router = APIRouter( router = APIRouter(
@ -78,251 +75,143 @@ router = APIRouter(
) )
@router.get("/auth/connect") @router.post("/connections/{connectionId}/token")
@limiter.limit("5/minute") @limiter.limit("10/minute")
def auth_connect( async def submit_infomaniak_token(
request: Request, request: Request,
connectionId: str = Query(..., description="UserConnection id"), connectionId: str = Path(..., description="UserConnection id"),
body: Dict[str, Any] = Body(..., description="{ 'token': '<PAT>' }"),
currentUser: User = Depends(getCurrentUser), currentUser: User = Depends(getCurrentUser),
) -> RedirectResponse: ) -> Dict[str, Any]:
"""Start Infomaniak OAuth for an existing connection (requires gateway session).""" """Validate and persist an Infomaniak Personal Access Token (PAT).
try:
_require_infomaniak_config()
interface = getInterface(currentUser)
connections = interface.getUserConnections(currentUser.id)
connection = None
for conn in connections:
if conn.id == connectionId and conn.authority == AuthAuthority.INFOMANIAK:
connection = conn
break
if not connection:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=routeApiMsg("Infomaniak connection not found"),
)
state_jwt = _issue_oauth_state( Body:
{ { "token": "<personal-access-token from Infomaniak Manager>" }
"flow": _FLOW_CONNECT,
"connectionId": connectionId, Validation order (both must succeed before persisting):
"userId": str(currentUser.id), 1. ``listAccessibleDrives(pat)`` -> proves the ``drive`` scope
} is on the PAT and confirms the user can see at least one
) kDrive (uses ``/2/drive/init?with=drives`` so users with
query = urlencode( ``role: 'user'`` are also covered).
{ 2. ``resolveOwnerIdentity(pat)`` -> display name + kSuite
"client_id": CLIENT_ID, account_id for the connection UI label (proves at least one
"response_type": "code", of ``workspace:calendar`` / ``workspace:contact`` is present).
"access_type": "offline",
"redirect_uri": REDIRECT_URI, No PAT-derived data is stored as adapter state -- both the drive
"scope": " ".join(infomaniakDataScopes), list and the owner identity are re-resolved lazily by the adapters
"state": state_jwt, at request time.
} """
) pat = (body or {}).get("token")
auth_url = f"{INFOMANIAK_AUTHORIZE_URL}?{query}" if not isinstance(pat, str) or not pat.strip():
return RedirectResponse(auth_url)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error initiating Infomaniak connect: {str(e)}")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Failed to initiate Infomaniak connect: {str(e)}", detail=routeApiMsg("Missing 'token' in request body"),
) )
pat = pat.strip()
interface = getInterface(currentUser)
@router.get("/auth/connect/callback")
async def auth_connect_callback(
code: str = Query(...),
state: str = Query(...),
) -> HTMLResponse:
"""OAuth callback for Infomaniak data connection."""
state_data = _parse_oauth_state(state)
if state_data.get("flow") != _FLOW_CONNECT:
raise HTTPException(
status_code=400, detail=routeApiMsg("Invalid OAuth flow for this callback")
)
connection_id = state_data.get("connectionId")
user_id = state_data.get("userId")
if not connection_id or not user_id:
raise HTTPException(
status_code=400, detail=routeApiMsg("Missing connection or user in OAuth state")
)
_require_infomaniak_config()
async with httpx.AsyncClient() as client:
token_resp = await client.post(
INFOMANIAK_TOKEN_URL,
data={
"grant_type": "authorization_code",
"client_id": CLIENT_ID,
"client_secret": CLIENT_SECRET,
"code": code,
"redirect_uri": REDIRECT_URI,
},
headers={"Content-Type": "application/x-www-form-urlencoded"},
timeout=30.0,
)
if token_resp.status_code != 200:
logger.error(
f"Infomaniak token exchange failed: {token_resp.status_code} {token_resp.text}"
)
return HTMLResponse(
content=f"<html><body><h1>Connection Failed</h1><p>{token_resp.text}</p></body></html>",
status_code=400,
)
token_json = token_resp.json()
access_token = token_json.get("access_token")
refresh_token = token_json.get("refresh_token", "")
expires_in = int(token_json.get("expires_in", 0))
granted_scopes = token_json.get("scope", "")
if not access_token:
return HTMLResponse(
content="<html><body><h1>Connection Failed</h1><p>No access token.</p></body></html>",
status_code=400,
)
rootInterface = getRootInterface()
if not refresh_token:
try:
existing_tokens = rootInterface.getTokensByConnectionIdAndAuthority(
connection_id, AuthAuthority.INFOMANIAK
)
if existing_tokens:
existing_tokens.sort(
key=lambda x: parseTimestamp(x.createdAt, default=0), reverse=True
)
refresh_token = existing_tokens[0].tokenRefresh or ""
except Exception:
pass
async with httpx.AsyncClient() as client:
profile_resp = await client.get(
f"{INFOMANIAK_API_BASE}/1/profile",
headers={
"Authorization": f"Bearer {access_token}",
"Accept": "application/json",
},
timeout=30.0,
)
if profile_resp.status_code != 200:
logger.error(
f"Infomaniak profile lookup failed: {profile_resp.status_code} {profile_resp.text}"
)
return HTMLResponse(
content="<html><body><h1>Connection Failed</h1><p>Could not load Infomaniak profile.</p></body></html>",
status_code=400,
)
profile_payload = profile_resp.json()
profile = profile_payload.get("data") if isinstance(profile_payload, dict) else None
profile = profile or {}
user = rootInterface.getUser(user_id)
if not user:
return HTMLResponse(
content="""
<html><body><script>
if (window.opener) {
window.opener.postMessage({ type: 'infomaniak_connection_error', error: 'User not found' }, '*');
setTimeout(() => window.close(), 1000);
} else window.close();
</script></body></html>
""",
status_code=404,
)
interface = getInterface(user)
connections = interface.getUserConnections(user_id)
connection = None connection = None
for conn in connections: for conn in interface.getUserConnections(currentUser.id):
if conn.id == connection_id: if conn.id == connectionId and conn.authority == AuthAuthority.INFOMANIAK:
connection = conn connection = conn
break break
if not connection: if not connection:
return HTMLResponse( raise HTTPException(
content=""" status_code=status.HTTP_404_NOT_FOUND,
<html><body><script> detail=routeApiMsg("Infomaniak connection not found"),
if (window.opener) {
window.opener.postMessage({ type: 'infomaniak_connection_error', error: 'Connection not found' }, '*');
setTimeout(() => window.close(), 1000);
} else window.close();
</script></body></html>
""",
status_code=404,
) )
ext_id = str(profile.get("id", "")) if profile.get("id") is not None else "" try:
username = profile.get("login") or profile.get("email") or ext_id drives = await listAccessibleDrives(pat)
email = profile.get("email") except InfomaniakIdentityError as e:
logger.warning(
f"Infomaniak token submit for connection {connectionId} could not "
f"list drives: {e}"
)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=routeApiMsg(
"Token rejected by Infomaniak (missing scope 'drive'). "
"Required scopes: 'drive' (kDrive) and "
"'workspace:calendar' (or 'workspace:contact'). Mail "
"scope 'workspace:mail' is reserved."
),
)
expires_at = createExpirationTimestamp(expires_in) try:
granted_scopes_list = ( identity = await resolveOwnerIdentity(pat)
granted_scopes except InfomaniakIdentityError as e:
if isinstance(granted_scopes, list) logger.warning(
else (granted_scopes.split(" ") if granted_scopes else infomaniakDataScopes) f"Infomaniak token submit for connection {connectionId} could not "
) f"resolve owner identity: {e}"
)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=routeApiMsg(
"Could not derive your Infomaniak account from the token. "
"Please ensure the PAT carries 'workspace:calendar' or "
"'workspace:contact' so we can identify your account."
),
)
tokenFingerprint = "pat-" + hashlib.sha256(pat.encode("utf-8")).hexdigest()[:8]
username = identity["displayName"] or f"infomaniak-{tokenFingerprint}"
expiresAt = createExpirationTimestamp(_INFOMANIAK_TOKEN_EXPIRES_IN_SEC)
try: try:
connection.status = ConnectionStatus.ACTIVE connection.status = ConnectionStatus.ACTIVE
connection.lastChecked = getUtcTimestamp() connection.lastChecked = getUtcTimestamp()
connection.expiresAt = expires_at connection.expiresAt = expiresAt
connection.externalId = ext_id connection.externalId = str(identity["accountId"])
connection.externalUsername = username connection.externalUsername = username
if email: connection.grantedScopes = [
connection.externalEmail = email "drive",
connection.grantedScopes = granted_scopes_list "workspace:mail",
rootInterface.db.recordModify(UserConnection, connection_id, connection.model_dump()) "workspace:calendar",
"workspace:contact",
]
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
token = Token( token = Token(
userId=user.id, userId=currentUser.id,
authority=AuthAuthority.INFOMANIAK, authority=AuthAuthority.INFOMANIAK,
connectionId=connection_id, connectionId=connectionId,
tokenPurpose=TokenPurpose.DATA_CONNECTION, tokenPurpose=TokenPurpose.DATA_CONNECTION,
tokenAccess=access_token, tokenAccess=pat,
tokenRefresh=refresh_token, tokenRefresh=None,
tokenType=token_json.get("token_type", "bearer"), tokenType="bearer",
expiresAt=expires_at, expiresAt=expiresAt,
createdAt=getUtcTimestamp(), createdAt=getUtcTimestamp(),
) )
interface.saveConnectionToken(token) interface.saveConnectionToken(token)
return HTMLResponse( driveSummary = [
content=f""" {"id": d.get("id"), "name": d.get("name"), "role": d.get("role")}
<html> for d in drives
<head><title>Connection Successful</title></head> ]
<body> logger.info(
<script> f"Infomaniak PAT stored for connection {connectionId} "
if (window.opener) {{ f"(user {currentUser.id}, externalUsername={username}, "
window.opener.postMessage({{ f"kSuiteAccountId={identity['accountId']}, "
type: 'infomaniak_connection_success', f"accessibleDrives={driveSummary})"
connection: {{
id: '{connection.id}',
status: 'connected',
type: 'infomaniak',
lastChecked: {getUtcTimestamp()},
expiresAt: {expires_at}
}}
}}, '*');
setTimeout(() => window.close(), 1000);
}} else {{
window.close();
}}
</script>
</body>
</html>
"""
) )
return {
"id": connection.id,
"status": "connected",
"type": "infomaniak",
"externalUsername": username,
"externalEmail": None,
"lastChecked": connection.lastChecked,
}
except HTTPException:
raise
except Exception as e: except Exception as e:
logger.error(f"Error updating Infomaniak connection: {str(e)}", exc_info=True) logger.error(
return HTMLResponse( f"Error persisting Infomaniak token for connection {connectionId}: {e}",
content=f""" exc_info=True,
<html><body><script> )
if (window.opener) {{ raise HTTPException(
window.opener.postMessage({{ type: 'infomaniak_connection_error', error: {json.dumps(str(e))} }}, '*'); status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
setTimeout(() => window.close(), 1000); detail=routeApiMsg("Failed to store Infomaniak token"),
}} else window.close();
</script></body></html>
""",
status_code=500,
) )

View file

@ -244,9 +244,15 @@ async def auth_login_callback(
def auth_connect( def auth_connect(
request: Request, request: Request,
connectionId: str = Query(..., description="UserConnection id"), connectionId: str = Query(..., description="UserConnection id"),
reauth: Optional[int] = Query(0, description="If 1, force the consent screen so newly added scopes are granted"),
currentUser: User = Depends(getCurrentUser), currentUser: User = Depends(getCurrentUser),
) -> RedirectResponse: ) -> RedirectResponse:
"""Start Microsoft Data OAuth for an existing connection.""" """Start Microsoft Data OAuth for an existing connection.
With ``reauth=1`` the consent screen is forced (``prompt=consent``) so the
user re-grants permissions and any newly added scopes (e.g. Calendars.Read,
Contacts.Read) actually land on the access token.
"""
try: try:
_require_msft_data_config() _require_msft_data_config()
interface = getInterface(currentUser) interface = getInterface(currentUser)
@ -280,6 +286,8 @@ def auth_connect(
if "@" in login_hint: if "@" in login_hint:
login_kwargs["domain_hint"] = login_hint.split("@", 1)[1] login_kwargs["domain_hint"] = login_hint.split("@", 1)[1]
login_kwargs["prompt"] = "login" login_kwargs["prompt"] = "login"
if reauth:
login_kwargs["prompt"] = "consent"
auth_url = msal_app.get_authorization_request_url( auth_url = msal_app.get_authorization_request_url(
scopes=msftDataScopes, scopes=msftDataScopes,
@ -412,6 +420,29 @@ async def auth_connect_callback(
) )
interface.saveConnectionToken(token) interface.saveConnectionToken(token)
try:
from modules.shared.callbackRegistry import callbackRegistry
if connection.knowledgeIngestionEnabled:
callbackRegistry.trigger(
"connection.established",
connectionId=connection.id,
authority=str(getattr(connection.authority, "value", connection.authority) or "msft"),
userId=str(user.id),
)
else:
logger.info(
"ingestion.connection.bootstrap.skipped — knowledge ingestion disabled by user",
extra={
"event": "ingestion.connection.bootstrap.skipped",
"connectionId": connection.id,
"authority": "msft",
"reason": "consent_disabled",
},
)
except Exception as _cbErr:
logger.warning("connection.established callback failed for %s: %s", connection.id, _cbErr)
return HTMLResponse( return HTMLResponse(
content=f""" content=f"""
<html> <html>

View file

@ -187,7 +187,15 @@ def _catalogTypeToJsonSchema(typeStr: str, _depth: int = 0) -> Dict[str, Any]:
def _createDispatchHandler(actionExecutor, methodName: str, actionName: str): def _createDispatchHandler(actionExecutor, methodName: str, actionName: str):
"""Create an async handler that dispatches to the ActionExecutor.""" """Create an async handler that dispatches to the ActionExecutor.
Parameter validation and Ref-payload normalization (collapsing
``{id: ..., featureCode: ...}`` from the agent's typed tool schema to the
bare UUID expected by action implementations) happen centrally inside
``ActionExecutor.executeAction`` via ``parameterValidation``. This keeps
a single source of truth for the action parameter contract regardless
of caller (agent, workflow graph, REST route).
"""
async def _handler(args: Dict[str, Any], context: Dict[str, Any]) -> ToolResult: async def _handler(args: Dict[str, Any], context: Dict[str, Any]) -> ToolResult:
try: try:
if context: if context:

View file

@ -392,6 +392,18 @@ def buildSystemPrompt(
"- Prefer modular file structures over monolithic files.\n" "- Prefer modular file structures over monolithic files.\n"
"- When generating applications, create separate files for logical components.\n" "- When generating applications, create separate files for logical components.\n"
"- Always plan the structure before writing code.\n\n" "- Always plan the structure before writing code.\n\n"
"### Document references for AI tools (CRITICAL)\n"
"Tools that produce a file (`downloadFromDataSource`, `writeFile mode=create`, "
"`renderDocument`, `generateImage`, `createChart`) return a result line with TWO ids:\n"
"- `documentList ref: docItem:<chatDocId>` — pass this STRING VERBATIM as an entry of "
" `documentList` for `ai_process`, `ai_summarizeDocument`, `context_extractContent`, "
" `context_neutralizeData`, etc. Always as the literal `docItem:<id>` — do NOT wrap "
" in `{\"documents\":[{\"id\":...}]}` and do NOT use the file id here, the documentList "
" resolver only matches `docItem:` references.\n"
"- `file id: <fileId>` — use for `readFile`, `searchInFileContent`, `writeFile mode=append`, "
" and image embeds (`![alt](file:<fileId>)`).\n"
"Example: after `downloadFromDataSource` returns `docItem:abc123`, call "
"`ai_summarizeDocument(documentList=[\"docItem:abc123\"], summaryLength=\"medium\")`.\n\n"
) )
if toolsFormatted: if toolsFormatted:

View file

@ -9,7 +9,9 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import ( from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
_attachFileAsChatDocument,
_buildResolverDbFromServices, _buildResolverDbFromServices,
_formatToolFileResult,
_getOrCreateTempFolder, _getOrCreateTempFolder,
_looksLikeBinary, _looksLikeBinary,
_resolveFileScope, _resolveFileScope,
@ -37,6 +39,11 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
return getattr(chatService, "interfaceDbComponent", None) return getattr(chatService, "interfaceDbComponent", None)
# ---- DataSource convenience tools ---- # ---- DataSource convenience tools ----
# Maps the FE-side `sourceType` literal (see SourcesTab.tsx
# `_SERVICE_TO_SOURCE_TYPE`) to the Connector's `service` key in
# `_SERVICE_MAP`. Keep this table in sync with both the FE and the
# Connector `_SERVICE_MAP` entries -- a missing row produces
# "Service '<sourceType>' not available" in the agent tools.
_SOURCE_TYPE_TO_SERVICE = { _SOURCE_TYPE_TO_SERVICE = {
"sharepointFolder": "sharepoint", "sharepointFolder": "sharepoint",
"onedriveFolder": "onedrive", "onedriveFolder": "onedrive",
@ -45,6 +52,9 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
"gmailFolder": "gmail", "gmailFolder": "gmail",
"ftpFolder": "files", "ftpFolder": "files",
"clickupList": "clickup", "clickupList": "clickup",
"kdriveFolder": "kdrive",
"calendarFolder": "calendar",
"contactFolder": "contact",
} }
async def _resolveDataSource(dsId: str): async def _resolveDataSource(dsId: str):
@ -223,11 +233,27 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
tempFolderId = _getOrCreateTempFolder(chatService) tempFolderId = _getOrCreateTempFolder(chatService)
if tempFolderId: if tempFolderId:
chatService.interfaceDbComponent.updateFile(fileItem.id, {"folderId": tempFolderId}) chatService.interfaceDbComponent.updateFile(fileItem.id, {"folderId": tempFolderId})
chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"datasource:{dsId or directService or 'download'}",
userMessage=f"Downloaded {fileName} from external data source",
)
ext = fileName.rsplit(".", 1)[-1].lower() if "." in fileName else "" ext = fileName.rsplit(".", 1)[-1].lower() if "." in fileName else ""
hint = "Use readFile to read the text content." if ext in ("doc", "docx", "txt", "csv", "json", "xml", "html", "md", "rtf", "odt", "xls", "xlsx", "pptx", "pdf", "eml", "msg") else "Use readFile to access the content." hint = (
"Use readFile to read the text content."
if ext in ("doc", "docx", "txt", "csv", "json", "xml", "html", "md", "rtf", "odt", "xls", "xlsx", "pptx", "pdf", "eml", "msg")
else "Use readFile to access the content."
)
return ToolResult( return ToolResult(
toolCallId="", toolName="downloadFromDataSource", success=True, toolCallId="", toolName="downloadFromDataSource", success=True,
data=f"Downloaded '{fileName}' ({len(fileBytes)} bytes) → local file id: {fileItem.id}. {hint}" data=_formatToolFileResult(
fileItem=fileItem,
chatDocId=chatDocId,
actionLabel="Downloaded",
extraInfo=hint,
),
) )
except Exception as e: except Exception as e:
return ToolResult(toolCallId="", toolName="downloadFromDataSource", success=False, error=str(e)) return ToolResult(toolCallId="", toolName="downloadFromDataSource", success=False, error=str(e))
@ -300,8 +326,15 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
registry.register( registry.register(
"downloadFromDataSource", _downloadFromDataSource, "downloadFromDataSource", _downloadFromDataSource,
description=( description=(
"Download a file or email from a data source into local storage. Returns a local file ID " "Download a file or email from a data source into local storage. "
"to read with readFile. Accepts either dataSourceId OR connectionId+service. " "The result line contains TWO ids you must use for different purposes:\n"
" - `documentList ref: docItem:<chatDocId>` -- pass this string verbatim "
" inside the `documentList` parameter of `ai_process`, "
" `ai_summarizeDocument`, `context_extractContent`, `context_neutralizeData`, etc. "
" Always use the `docItem:<chatDocId>` form, NOT the file id, NOT a `{\"documents\":[{\"id\":...}]}` "
" wrapper -- the documentList resolver only matches `docItem:` references against the workflow.\n"
" - `file id: <fileId>` -- pass this to `readFile`, `searchInFileContent`, image embeds (`file:<fileId>`).\n"
"Accepts either dataSourceId OR connectionId+service. "
"For email sources (Outlook, Gmail), browse/search only return subjects -- use this to get full content." "For email sources (Outlook, Gmail), browse/search only return subjects -- use this to get full content."
), ),
parameters={ parameters={

View file

@ -11,8 +11,6 @@ from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistr
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import ( from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
_getOrCreateTempFolder, _getOrCreateTempFolder,
_looksLikeBinary,
_resolveFileScope,
_MAX_TOOL_RESULT_CHARS, _MAX_TOOL_RESULT_CHARS,
) )
@ -392,65 +390,7 @@ def _registerDocumentTools(registry: ToolRegistry, services):
if chunkMime: if chunkMime:
mimeType = chunkMime mimeType = chunkMime
# 2) File not yet indexed -> trigger extraction via ExtractionService, then retry # 2) Direct image file (not a container) - use raw file data
if not imageData and knowledgeService and not knowledgeService.isFileIndexed(fileId):
try:
chatService = services.chat
fileInfo = chatService.getFileInfo(fileId)
fileContent = chatService.getFileContent(fileId)
if fileContent and fileInfo:
rawData = fileContent.get("data", "")
if isinstance(rawData, str) and len(rawData) > 100:
rawBytes = _b64.b64decode(rawData)
elif isinstance(rawData, bytes):
rawBytes = rawData
else:
rawBytes = None
if rawBytes:
from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
from modules.datamodels.datamodelExtraction import ExtractionOptions
fileMime = fileInfo.get("mimeType", "application/octet-stream")
fileName = fileInfo.get("fileName", fileId)
extracted = runExtraction(
ExtractorRegistry(), None,
rawBytes, fileName, fileMime, ExtractionOptions(),
)
contentObjects = []
for part in extracted.parts:
tg = (part.typeGroup or "").lower()
ct = "image" if tg == "image" else "text"
if not part.data or not part.data.strip():
continue
contentObjects.append({
"contentObjectId": part.id,
"contentType": ct,
"data": part.data,
"contextRef": {"containerPath": fileName, "location": part.label, **(part.metadata or {})},
})
if contentObjects:
_diFiId, _diMId = _resolveFileScope(fileId, context)
await knowledgeService.indexFile(
fileId=fileId, fileName=fileName, mimeType=fileMime,
userId=context.get("userId", ""), contentObjects=contentObjects,
featureInstanceId=_diFiId,
mandateId=_diMId,
)
chunks = knowledgeService._knowledgeDb.getContentChunks(fileId)
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
if pageIndex is not None:
imageChunks = [c for c in imageChunks if c.get("contextRef", {}).get("pageIndex") == pageIndex]
if imageChunks:
imageData = imageChunks[0].get("data", "")
except Exception as extractErr:
logger.warning(f"describeImage: on-demand extraction failed: {extractErr}")
# 3) Direct image file (not a container) - use raw file data
if not imageData: if not imageData:
chatService = services.chat chatService = services.chat
fileContent = chatService.getFileContent(fileId) fileContent = chatService.getFileContent(fileId)
@ -460,7 +400,7 @@ def _registerDocumentTools(registry: ToolRegistry, services):
imageData = fileContent.get("data", "") imageData = fileContent.get("data", "")
mimeType = fileMimeType mimeType = fileMimeType
# 4) PDF page rendering: render the requested page as an image via PyMuPDF # 3) PDF page rendering: render the requested page as an image via PyMuPDF
if not imageData: if not imageData:
chatService = services.chat chatService = services.chat
fileInfo = chatService.getFileInfo(fileId) if hasattr(chatService, "getFileInfo") else None fileInfo = chatService.getFileInfo(fileId) if hasattr(chatService, "getFileInfo") else None

View file

@ -3,7 +3,8 @@
"""Shared helpers for core agent tools (file scope, binary detection, temp folder).""" """Shared helpers for core agent tools (file scope, binary detection, temp folder)."""
import logging import logging
from typing import Any, Optional import uuid
from typing import Any, Dict, Optional, Tuple
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -78,6 +79,138 @@ def _getOrCreateTempFolder(chatService) -> Optional[str]:
return None return None
def _attachFileAsChatDocument(
services: Any,
fileItem: Any,
*,
label: str = "agent_tool_output",
userMessage: str = "",
role: str = "assistant",
) -> Optional[str]:
"""Bind a persisted FileItem to the active workflow as a ChatDocument.
This is the **single canonical bridge** between agent-tool-produced
artefacts and the workflow's document model. Mirrors the pattern
used by workflow actions (``workflowProcessor.persistTaskResult`` /
``methodTrustee.extractFromFiles``): every artefact a workflow step
-- including agent tools -- materialises ends up addressable via
``docItem:<chatDocId>`` so downstream tools that consume
``documentList`` can resolve it against
``workflow.messages[*].documents[*].id``.
Without this bind the agent's ``downloadFromDataSource`` /
``writeFile(create)`` / ``renderDocument`` / ``generateImage`` /
``createChart`` outputs are FileItem-only and unreachable from
``getChatDocumentsFromDocumentList`` -- the symptom is
``ai_summarizeDocument`` etc. running with 0 ContentParts.
Args:
services: agent-tool services container (must expose ``.chat``).
fileItem: persisted FileItem (Pydantic obj or dict) returned
from ``saveUploadedFile`` / ``createFile`` /
``saveGeneratedFile``.
label: ``documentsLabel`` for the carrier ChatMessage --
picked up by ``docList:<label>`` references.
userMessage: optional human-readable message text.
role: ``"assistant"`` (default) or ``"tool"``; affects only
display semantics, not resolution.
Returns:
The new ``ChatDocument.id`` on success, or ``None`` when no
active workflow is bound to the chat service (e.g. standalone
agent calls outside a chat workflow). Never raises.
"""
try:
chatService = services.chat
workflow = getattr(chatService, "_workflow", None)
if not workflow or not getattr(workflow, "id", None):
return None
if isinstance(fileItem, dict):
fileId = fileItem.get("id")
fileName = fileItem.get("fileName")
fileSize = fileItem.get("fileSize") or 0
mimeType = fileItem.get("mimeType") or "application/octet-stream"
else:
fileId = getattr(fileItem, "id", None)
fileName = getattr(fileItem, "fileName", None)
fileSize = getattr(fileItem, "fileSize", None) or 0
mimeType = getattr(fileItem, "mimeType", None) or "application/octet-stream"
if not fileId:
logger.warning("_attachFileAsChatDocument: fileItem has no id, skipping bind.")
return None
chatDoc: Dict[str, Any] = {
"id": str(uuid.uuid4()),
"fileId": fileId,
"fileName": fileName or fileId,
"fileSize": fileSize,
"mimeType": mimeType,
"roundNumber": getattr(workflow, "currentRound", None),
"taskNumber": getattr(workflow, "currentTask", None),
"actionNumber": getattr(workflow, "currentAction", None),
}
messageData: Dict[str, Any] = {
"id": f"msg_tool_{uuid.uuid4().hex[:12]}",
"role": role,
"status": "step",
"message": userMessage or f"Tool result: {fileName or fileId}",
"documentsLabel": label,
}
createdMessage = chatService.storeMessageWithDocuments(
workflow, messageData, [chatDoc],
)
if not createdMessage or not getattr(createdMessage, "documents", None):
return None
return createdMessage.documents[0].id
except Exception as e:
logger.warning(f"_attachFileAsChatDocument failed (fileItem id={getattr(fileItem, 'id', None) or (fileItem.get('id') if isinstance(fileItem, dict) else '?')}): {e}")
return None
def _formatToolFileResult(
*,
fileItem: Any,
chatDocId: Optional[str],
actionLabel: str = "Created",
extraInfo: str = "",
) -> str:
"""Render the canonical agent-tool file result message.
Always presents BOTH ids the agent needs:
* ``docItem:<chatDocId>`` -- use as ``documentList`` entry for
tools like ``ai_process`` / ``ai_summarizeDocument`` /
``context_extractContent`` (resolved through ChatDocument).
* ``file id: <fileItem.id>`` -- use as ``fileId`` for direct
reads via ``readFile`` / ``downloadFile`` / image embedding
(``file:<fileItem.id>``).
When no active workflow is bound, ``chatDocId`` is ``None`` and
only the file-id line is shown -- the file is still usable for
direct reads, just not for ``documentList`` references (those
require a workflow context anyway).
"""
if isinstance(fileItem, dict):
fileId = fileItem.get("id", "?")
fileName = fileItem.get("fileName", "")
fileSize = fileItem.get("fileSize", 0)
else:
fileId = getattr(fileItem, "id", "?")
fileName = getattr(fileItem, "fileName", "")
fileSize = getattr(fileItem, "fileSize", 0)
head = f"{actionLabel} '{fileName}' ({fileSize} bytes)" if fileName else f"{actionLabel} file ({fileSize} bytes)"
parts = [head]
if chatDocId:
parts.append(f" documentList ref: docItem:{chatDocId}")
parts.append(f" file id: {fileId}")
if extraInfo:
parts.append(extraInfo)
return "\n".join(parts)
def _buildResolverDbFromServices(services: Any): def _buildResolverDbFromServices(services: Any):
"""DB adapter for ConnectorResolver: load UserConnections by id. """DB adapter for ConnectorResolver: load UserConnections by id.

View file

@ -9,6 +9,8 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import ( from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
_attachFileAsChatDocument,
_formatToolFileResult,
_getOrCreateTempFolder, _getOrCreateTempFolder,
_looksLikeBinary, _looksLikeBinary,
_resolveFileScope, _resolveFileScope,
@ -316,7 +318,13 @@ def _registerMediaTools(registry: ToolRegistry, services):
tempFolderId = _getOrCreateTempFolder(chatService) tempFolderId = _getOrCreateTempFolder(chatService)
if tempFolderId: if tempFolderId:
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId}) chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
savedFiles.append(f"- {docName} (id: {fid})") chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"renderDocument:{docName}",
userMessage=f"Rendered document {docName}",
)
refSuffix = f", doc ref: docItem:{chatDocId}" if chatDocId else ""
savedFiles.append(f"- {docName} (file id: {fid}{refSuffix})")
sideEvents.append({ sideEvents.append({
"type": "fileCreated", "type": "fileCreated",
"data": { "data": {
@ -340,7 +348,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
"Render markdown into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). " "Render markdown into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). "
"For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with " "For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with "
"`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. " "`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. "
"Images: ![alt text](file:fileId) in the markdown." "Images: ![alt text](file:fileId) in the markdown. "
"Each rendered file's result line contains `file id: <fileId>` (for embeds / readFile) AND "
"`doc ref: docItem:<chatDocId>` -- pass the latter inside `documentList` of subsequent "
"`ai_process` / `ai_summarizeDocument` / `context_extractContent` calls."
), ),
parameters={ parameters={
"type": "object", "type": "object",
@ -588,7 +599,13 @@ def _registerMediaTools(registry: ToolRegistry, services):
tempFolderId = _getOrCreateTempFolder(chatService) tempFolderId = _getOrCreateTempFolder(chatService)
if tempFolderId: if tempFolderId:
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId}) chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
savedFiles.append(f"- {docName} (id: {fid})") chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"generateImage:{docName}",
userMessage=f"Generated image {docName}",
)
refSuffix = f", doc ref: docItem:{chatDocId}" if chatDocId else ""
savedFiles.append(f"- {docName} (file id: {fid}{refSuffix})")
sideEvents.append({ sideEvents.append({
"type": "fileCreated", "type": "fileCreated",
"data": { "data": {
@ -612,7 +629,9 @@ def _registerMediaTools(registry: ToolRegistry, services):
"Generate an image from a text description using AI (DALL-E). " "Generate an image from a text description using AI (DALL-E). "
"The generated image is saved as a file in the workspace. " "The generated image is saved as a file in the workspace. "
"Use this when the user asks to create, generate, draw, or design an image, illustration, icon, logo, diagram, or any visual content. " "Use this when the user asks to create, generate, draw, or design an image, illustration, icon, logo, diagram, or any visual content. "
"Provide a detailed, descriptive prompt for best results." "Provide a detailed, descriptive prompt for best results. "
"Each image's result line carries `file id: <fileId>` (for embeds / readFile) and "
"`doc ref: docItem:<chatDocId>` (use inside `documentList` for downstream AI tools)."
), ),
parameters={ parameters={
"type": "object", "type": "object",
@ -743,14 +762,24 @@ def _registerMediaTools(registry: ToolRegistry, services):
if tempFolderId and fid != "?": if tempFolderId and fid != "?":
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId}) chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"createChart:{fileName}",
userMessage=f"Created chart {fileName}",
)
sideEvents = [{"type": "fileCreated", "data": { sideEvents = [{"type": "fileCreated", "data": {
"fileId": fid, "fileName": fileName, "fileId": fid, "fileName": fileName,
"mimeType": "image/png", "fileSize": len(pngData), "mimeType": "image/png", "fileSize": len(pngData),
}}] }}]
return ToolResult( return ToolResult(
toolCallId="", toolName="createChart", success=True, toolCallId="", toolName="createChart", success=True,
data=f"Chart saved as '{fileName}' (id: {fid}, {len(pngData)} bytes). " data=_formatToolFileResult(
f"Embed in documents with: ![{title}](file:{fid})", fileItem=fileItem,
chatDocId=chatDocId,
actionLabel="Chart saved as",
extraInfo=f"Embed in documents with: ![{title}](file:{fid})",
),
sideEvents=sideEvents, sideEvents=sideEvents,
) )
@ -764,7 +793,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
"Create a data chart/graph as a PNG image using matplotlib. " "Create a data chart/graph as a PNG image using matplotlib. "
"Supported types: bar, horizontalBar, line, area, scatter, pie, donut. " "Supported types: bar, horizontalBar, line, area, scatter, pie, donut. "
"The chart is saved as a file in the workspace. " "The chart is saved as a file in the workspace. "
"Use the returned fileId to embed in documents via renderDocument: ![title](file:fileId). " "Use the returned `file id: <fileId>` to embed in documents via "
"renderDocument: ![title](file:fileId). The result line also carries "
"`doc ref: docItem:<chatDocId>` -- use it inside `documentList` for "
"downstream AI tools that need the chart as a data source. "
"Provide structured data with labels and datasets." "Provide structured data with labels and datasets."
), ),
parameters={ parameters={

View file

@ -9,10 +9,11 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import ( from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
_attachFileAsChatDocument,
_formatToolFileResult,
_getOrCreateInstanceFolder, _getOrCreateInstanceFolder,
_getOrCreateTempFolder, _getOrCreateTempFolder,
_looksLikeBinary, _looksLikeBinary,
_resolveFileScope,
_MAX_TOOL_RESULT_CHARS, _MAX_TOOL_RESULT_CHARS,
) )
@ -48,6 +49,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
return ToolResult(toolCallId="", toolName="readFile", success=False, error="fileId is required") return ToolResult(toolCallId="", toolName="readFile", success=False, error="fileId is required")
try: try:
knowledgeService = services.getService("knowledge") if hasattr(services, "getService") else None knowledgeService = services.getService("knowledge") if hasattr(services, "getService") else None
fileStatus = None
# 1) Knowledge Store: return already-extracted text chunks # 1) Knowledge Store: return already-extracted text chunks
if knowledgeService: if knowledgeService:
@ -75,7 +77,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
data=f"[File {fileId} is currently being processed (status: {fileStatus}). Try again shortly.]", data=f"[File {fileId} is currently being processed (status: {fileStatus}). Try again shortly.]",
) )
# 2) Not indexed yet: try on-demand extraction # 2) Not indexed yet: inspect file type to decide how to serve the agent
# (binary -> instruct agent to wait / re-upload; text -> decode raw bytes inline)
chatService = services.chat chatService = services.chat
fileInfo = chatService.getFileInfo(fileId) fileInfo = chatService.getFileInfo(fileId)
if not fileInfo: if not fileInfo:
@ -98,83 +101,14 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
isBinary = _looksLikeBinary(rawBytes) isBinary = _looksLikeBinary(rawBytes)
if isBinary: if isBinary:
try:
from modules.serviceCenter.services.serviceExtraction.subRegistry import ExtractorRegistry, ChunkerRegistry
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
from modules.datamodels.datamodelExtraction import ExtractionOptions
extracted = runExtraction(
ExtractorRegistry(), ChunkerRegistry(),
rawBytes, fileName, mimeType, ExtractionOptions(),
)
contentObjects = []
for part in extracted.parts:
tg = (part.typeGroup or "").lower()
ct = "image" if tg == "image" else "text"
if not part.data or not part.data.strip():
continue
contentObjects.append({
"contentObjectId": part.id,
"contentType": ct,
"data": part.data,
"contextRef": {
"containerPath": fileName,
"location": part.label or "file",
**(part.metadata or {}),
},
})
if contentObjects:
if knowledgeService:
try:
userId = context.get("userId", "")
_fiId, _mId = _resolveFileScope(fileId, context)
await knowledgeService.indexFile(
fileId=fileId, fileName=fileName, mimeType=mimeType,
userId=userId, contentObjects=contentObjects,
featureInstanceId=_fiId,
mandateId=_mId,
)
except Exception as e:
logger.warning(f"readFile: knowledge indexing failed for {fileId}: {e}")
joined = ""
if knowledgeService:
_chunks = knowledgeService._knowledgeDb.getContentChunks(fileId)
_textChunks = [
c for c in (_chunks or [])
if c.get("contentType") != "image" and c.get("data")
]
if _textChunks:
joined = "\n\n".join(c["data"] for c in _textChunks)
if not joined:
textParts = [o["data"] for o in contentObjects if o["contentType"] != "image"]
joined = "\n\n".join(textParts) if textParts else ""
if joined:
chunked = _applyOffsetLimit(joined, offset, limit)
if chunked is not None:
return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked)
if len(joined) > _MAX_TOOL_RESULT_CHARS:
joined = joined[:_MAX_TOOL_RESULT_CHARS] + f"\n\n[Truncated showing first {_MAX_TOOL_RESULT_CHARS} chars of {len(joined)}. Use offset/limit to read specific sections.]"
return ToolResult(
toolCallId="", toolName="readFile", success=True,
data=joined,
)
imgCount = sum(1 for o in contentObjects if o["contentType"] == "image")
return ToolResult(
toolCallId="", toolName="readFile", success=True,
data=f"[Extracted {len(contentObjects)} content objects from '{fileName}' "
f"({imgCount} images, no readable text). "
f"Use describeImage(fileId='{fileId}') to analyze visual content.]",
)
except Exception as extractErr:
logger.warning(f"readFile extraction failed for {fileId} ({fileName}): {extractErr}")
return ToolResult( return ToolResult(
toolCallId="", toolName="readFile", success=True, toolCallId="", toolName="readFile", success=True,
data=f"[Binary file: '{fileName}', type={mimeType}, size={len(rawBytes)} bytes. " data=(
f"Text extraction not available. Use describeImage for images.]", f"[File '{fileName}' ({mimeType}) is not yet indexed "
f"(status: {fileStatus or 'unknown'}). Indexing runs automatically "
f"on upload. Please wait a few seconds and retry, or re-upload the file. "
f"For visual content use describeImage(fileId='{fileId}').]"
),
) )
# 3) Text file: decode raw bytes # 3) Text file: decode raw bytes
@ -428,9 +362,19 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
dbMgmt.updateFile(fileItem.id, {"folderId": instanceFolderId}) dbMgmt.updateFile(fileItem.id, {"folderId": instanceFolderId})
if args.get("tags"): if args.get("tags"):
dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]}) dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"writeFile:{name}",
userMessage=f"Created {name} via writeFile",
)
return ToolResult( return ToolResult(
toolCallId="", toolName="writeFile", success=True, toolCallId="", toolName="writeFile", success=True,
data=f"File '{name}' created (id: {fileItem.id})", data=_formatToolFileResult(
fileItem=fileItem,
chatDocId=chatDocId,
actionLabel="Created",
),
sideEvents=[{ sideEvents=[{
"type": "fileCreated", "type": "fileCreated",
"data": { "data": {
@ -573,7 +517,11 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
"- create (default): create a new file (name required).\n" "- create (default): create a new file (name required).\n"
"- append: append content to an existing file (fileId required). " "- append: append content to an existing file (fileId required). "
"Use for large content that exceeds a single tool call (~8000 chars per call).\n" "Use for large content that exceeds a single tool call (~8000 chars per call).\n"
"- overwrite: replace entire file content (fileId required)." "- overwrite: replace entire file content (fileId required).\n"
"On `mode=create` the result line contains BOTH a `documentList ref: docItem:<chatDocId>` "
"(use this for documentList parameters of `ai_process` / `ai_summarizeDocument` / "
"`context_extractContent` etc., always as the literal string `docItem:<id>`) AND a "
"`file id: <fileId>` (use this for `readFile`, `writeFile mode=append`, image embeds)."
), ),
parameters={ parameters={
"type": "object", "type": "object",

View file

@ -178,6 +178,33 @@ class AgentService:
if workflowId is None: if workflowId is None:
workflowId = getattr(self.services.workflow, "id", "unknown") if self.services.workflow else "unknown" workflowId = getattr(self.services.workflow, "id", "unknown") if self.services.workflow else "unknown"
# Propagate the active workflow into every service's request
# context so agent-tool side effects (e.g. _attachFileAsChatDocument
# for downloadFromDataSource / writeFile / renderDocument) can
# bind their FileItem outputs to the workflow as ChatDocuments.
# Without this, chatService._workflow (= chatService._context.workflow)
# stays None and the documentList resolver finds zero docs --
# which is exactly the "Building structure prompt with 0 valid
# ContentParts" symptom we see when the workspace route calls
# runAgent for an attached single-file data source.
# Mirrors workflowManager._propagateWorkflowToContext.
if workflowId and workflowId != "unknown":
try:
workflow = getattr(self.services, "workflow", None)
if workflow is None or getattr(workflow, "id", None) != workflowId:
workflow = self.services.chat.getWorkflow(workflowId)
if workflow is not None:
self.services.workflow = workflow
ctx = getattr(self.services, "_service_context", None)
if ctx is not None:
ctx.workflow = workflow
for attr in ("chat", "ai", "extraction", "sharepoint", "clickup", "utils", "billing", "generation"):
svc = getattr(self.services, attr, None)
if svc is not None and hasattr(svc, "_context") and svc._context is not None:
svc._context.workflow = workflow
except Exception as e:
logger.warning(f"runAgent: could not propagate workflow {workflowId} into service contexts: {e}")
resolvedLanguage = userLanguage or "" resolvedLanguage = userLanguage or ""
enrichedPrompt = await self._enrichPromptWithFiles(prompt, fileIds) enrichedPrompt = await self._enrichPromptWithFiles(prompt, fileIds)

View file

@ -164,12 +164,29 @@ class AiService:
# SPEECH_TEAMS: Dedicated pipeline, bypasses standard model selection # SPEECH_TEAMS: Dedicated pipeline, bypasses standard model selection
if request.options and request.options.operationType == OperationTypeEnum.SPEECH_TEAMS: if request.options and request.options.operationType == OperationTypeEnum.SPEECH_TEAMS:
return await self._handleSpeechTeams(request) return await self._handleSpeechTeams(request)
# FAIL-SAFE: Pre-flight billing validation (like 0 CHF credit card check) _opType = request.options.operationType if request.options else None
self._preflightBillingCheck() _isNeutralizationCall = _opType in (
OperationTypeEnum.NEUTRALIZATION_TEXT,
# Balance & provider permission checks OperationTypeEnum.NEUTRALIZATION_IMAGE,
await self._checkBillingBeforeAiCall() )
if not _isNeutralizationCall:
# FAIL-SAFE: Pre-flight billing validation (like 0 CHF credit card check)
self._preflightBillingCheck()
# Balance & provider permission checks
await self._checkBillingBeforeAiCall()
else:
# Neutralization calls are system-level operations (connector anonymization).
# They run without a mandate context (e.g. personal-scope connections) and
# are billed the same way as embedding calls: best-effort, skipped when no
# billing settings exist for an empty mandate.
logger.debug(
"callAi: skipping billing preflight for neutralization call "
"(operationType=%s, user=%s)",
_opType,
getattr(getattr(self.services, 'user', None), 'id', 'unknown'),
)
# Calculate effective allowedProviders: RBAC ∩ Workflow # Calculate effective allowedProviders: RBAC ∩ Workflow
effectiveProviders = self._calculateEffectiveProviders() effectiveProviders = self._calculateEffectiveProviders()
@ -218,8 +235,15 @@ class AiService:
Rehydration happens on the final AiCallResponse (not on individual str deltas). Rehydration happens on the final AiCallResponse (not on individual str deltas).
""" """
await self.ensureAiObjectsInitialized() await self.ensureAiObjectsInitialized()
self._preflightBillingCheck()
await self._checkBillingBeforeAiCall() _streamOpType = request.options.operationType if request.options else None
_isNeutralizationStream = _streamOpType in (
OperationTypeEnum.NEUTRALIZATION_TEXT,
OperationTypeEnum.NEUTRALIZATION_IMAGE,
)
if not _isNeutralizationStream:
self._preflightBillingCheck()
await self._checkBillingBeforeAiCall()
effectiveProviders = self._calculateEffectiveProviders() effectiveProviders = self._calculateEffectiveProviders()
if effectiveProviders and request.options: if effectiveProviders and request.options:

View file

@ -463,36 +463,38 @@ class ChatService:
Returns: Returns:
List of file info dicts. List of file info dicts.
""" """
# `getAllFiles` returns `List[dict]` (each entry is a
# `FileItem.model_dump()` enriched with label columns) -- not
# Pydantic objects -- so we use dict-access throughout.
allFiles = self.interfaceDbComponent.getAllFiles() allFiles = self.interfaceDbComponent.getAllFiles()
results = [] results = []
for fileItem in allFiles: for fileItem in allFiles:
if folderId is not None: if folderId is not None:
itemFolderId = getattr(fileItem, "folderId", None) if fileItem.get("folderId") != folderId:
if itemFolderId != folderId:
continue continue
if tags: if tags:
itemTags = getattr(fileItem, "tags", None) or [] itemTags = fileItem.get("tags") or []
if not any(t in itemTags for t in tags): if not any(t in itemTags for t in tags):
continue continue
if search: if search:
searchLower = search.lower() searchLower = search.lower()
nameMatch = searchLower in (fileItem.fileName or "").lower() nameMatch = searchLower in (fileItem.get("fileName") or "").lower()
descMatch = searchLower in (getattr(fileItem, "description", None) or "").lower() descMatch = searchLower in (fileItem.get("description") or "").lower()
if not nameMatch and not descMatch: if not nameMatch and not descMatch:
continue continue
results.append({ results.append({
"id": fileItem.id, "id": fileItem.get("id"),
"fileName": fileItem.fileName, "fileName": fileItem.get("fileName"),
"mimeType": fileItem.mimeType, "mimeType": fileItem.get("mimeType"),
"fileSize": fileItem.fileSize, "fileSize": fileItem.get("fileSize"),
"creationDate": fileItem.sysCreatedAt, "creationDate": fileItem.get("sysCreatedAt"),
"tags": getattr(fileItem, "tags", None), "tags": fileItem.get("tags"),
"folderId": getattr(fileItem, "folderId", None), "folderId": fileItem.get("folderId"),
"description": getattr(fileItem, "description", None), "description": fileItem.get("description"),
"status": getattr(fileItem, "status", None), "status": fileItem.get("status"),
}) })
return results return results

View file

@ -2,9 +2,13 @@
# All rights reserved. # All rights reserved.
"""Knowledge service: 3-tier RAG with indexing, semantic search, and context building.""" """Knowledge service: 3-tier RAG with indexing, semantic search, and context building."""
import hashlib
import json
import logging import logging
import re import re
from typing import Any, Callable, Dict, List, Optional import time
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional, Union
from modules.datamodels.datamodelKnowledge import ( from modules.datamodels.datamodelKnowledge import (
FileContentIndex, ContentChunk, WorkflowMemory, FileContentIndex, ContentChunk, WorkflowMemory,
@ -20,6 +24,68 @@ DEFAULT_CHUNK_TOKENS = 400
DEFAULT_CONTEXT_BUDGET = 12000 DEFAULT_CONTEXT_BUDGET = 12000
# =============================================================================
# Ingestion façade (P0 of unified-knowledge-indexing concept)
# =============================================================================
@dataclass
class IngestionJob:
"""One request to add or refresh content in the unified knowledge store.
Callers from any lane (routes, feature hooks, agent tools, connector sync)
describe the work they want done via this object; idempotency, scope
resolution, and embedding are handled by KnowledgeService.requestIngestion.
"""
sourceKind: str
sourceId: str
fileName: str
mimeType: str
userId: str
contentObjects: List[Dict[str, Any]] = field(default_factory=list)
featureInstanceId: str = ""
mandateId: str = ""
structure: Optional[Dict[str, Any]] = None
containerPath: Optional[str] = None
contentVersion: Optional[str] = None
provenance: Optional[Dict[str, Any]] = None
# Connector-driven neutralization: True when the user opted in via §2.6 preferences.
# For sourceKind == "file", _indexFileInternal resolves this from FileItem.neutralize instead.
neutralize: bool = False
@dataclass
class IngestionHandle:
"""Result of requestIngestion. Stable across in-process and future queue impls."""
jobId: str
status: str
contentHash: str
fileId: str
index: Optional[FileContentIndex] = None
error: Optional[str] = None
def _computeIngestionHash(contentObjects: List[Dict[str, Any]]) -> str:
"""Deterministic SHA256 over (contentType, data) tuples in extractor order.
`contentObjectId` is intentionally excluded because extractors generate
fresh UUIDs per run (`uuid.uuid4()`), which would make the hash unstable
across re-extractions of the same source defeating idempotency.
Order is preserved (no sort) because two different documents can share the
same multiset of parts but differ in arrangement (e.g. swapped pages).
Text whitespace is preserved intentionally because chunk boundaries
depend on it.
"""
normalized = [
(
str(o.get("contentType", "text") or "text"),
o.get("data", "") or "",
)
for o in (contentObjects or [])
]
payload = json.dumps(normalized, ensure_ascii=False, separators=(",", ":"))
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
class KnowledgeService: class KnowledgeService:
"""Service for Knowledge Store operations: indexing, retrieval, and context building.""" """Service for Knowledge Store operations: indexing, retrieval, and context building."""
@ -46,6 +112,224 @@ class KnowledgeService:
results = await self._embed([text]) results = await self._embed([text])
return results[0] if results else [] return results[0] if results else []
# =========================================================================
# Ingestion façade (single entry point for all lanes)
# =========================================================================
async def requestIngestion(self, job: IngestionJob) -> IngestionHandle:
"""Unified entry point for filling the knowledge corpus.
Applies idempotency based on a content hash (or caller-supplied
`contentVersion`) persisted in `FileContentIndex.structure._ingestion`.
Re-runs indexing only when the hash differs or the previous run did
not reach `indexed` state. Runs embedding synchronously for now
(callers already schedule background tasks where needed).
"""
jobId = f"{job.sourceKind}:{job.sourceId}"
startMs = time.time()
contentHash = job.contentVersion or _computeIngestionHash(job.contentObjects)
# 1. Check for duplicate via existing FileContentIndex row.
existing = None
try:
existing = self._knowledgeDb.getFileContentIndex(job.sourceId)
except Exception:
existing = None
if existing:
existingStructure = (
existing.get("structure") if isinstance(existing, dict)
else getattr(existing, "structure", {})
) or {}
existingMeta = existingStructure.get("_ingestion", {}) or {}
existingStatus = (
existing.get("status") if isinstance(existing, dict)
else getattr(existing, "status", "")
) or ""
if existingMeta.get("hash") == contentHash and existingStatus == "indexed":
logger.info(
"ingestion.skipped.duplicate sourceKind=%s sourceId=%s hash=%s",
job.sourceKind, job.sourceId, contentHash[:12],
extra={
"event": "ingestion.skipped.duplicate",
"jobId": jobId,
"sourceKind": job.sourceKind,
"sourceId": job.sourceId,
"hash": contentHash,
"durationMs": int((time.time() - startMs) * 1000),
},
)
return IngestionHandle(
jobId=jobId,
status="duplicate",
contentHash=contentHash,
fileId=job.sourceId,
index=None,
)
# 2. Prepare ingestion metadata; stays in structure._ingestion so
# later connector revoke/purge can filter chunks by sourceKind /
# provenance.connectionId without a schema migration.
ingestionMeta = {
"hash": contentHash,
"sourceKind": job.sourceKind,
"sourceId": job.sourceId,
"contentVersion": job.contentVersion,
"indexedAt": getUtcTimestamp(),
"provenance": dict(job.provenance or {}),
}
structure = dict(job.structure or {})
structure["_ingestion"] = ingestionMeta
logger.info(
"ingestion.queued sourceKind=%s sourceId=%s objects=%d hash=%s",
job.sourceKind, job.sourceId, len(job.contentObjects or []), contentHash[:12],
extra={
"event": "ingestion.queued",
"jobId": jobId,
"sourceKind": job.sourceKind,
"sourceId": job.sourceId,
"hash": contentHash,
"objectCount": len(job.contentObjects or []),
},
)
# 3. Run real indexing.
try:
index = await self._indexFileInternal(
fileId=job.sourceId,
fileName=job.fileName,
mimeType=job.mimeType,
userId=job.userId,
featureInstanceId=job.featureInstanceId,
mandateId=job.mandateId,
contentObjects=job.contentObjects or [],
structure=structure,
containerPath=job.containerPath,
sourceKind=job.sourceKind,
connectionId=(job.provenance or {}).get("connectionId"),
neutralize=job.neutralize,
)
except Exception as exc:
logger.error(
"ingestion.failed sourceKind=%s sourceId=%s error=%s",
job.sourceKind, job.sourceId, exc,
exc_info=True,
extra={
"event": "ingestion.failed",
"jobId": jobId,
"sourceKind": job.sourceKind,
"sourceId": job.sourceId,
"hash": contentHash,
"error": str(exc),
"durationMs": int((time.time() - startMs) * 1000),
},
)
try:
self._knowledgeDb.updateFileStatus(job.sourceId, "failed")
except Exception:
pass
return IngestionHandle(
jobId=jobId,
status="failed",
contentHash=contentHash,
fileId=job.sourceId,
index=None,
error=str(exc),
)
logger.info(
"ingestion.indexed sourceKind=%s sourceId=%s objects=%d durationMs=%d",
job.sourceKind, job.sourceId, len(job.contentObjects or []),
int((time.time() - startMs) * 1000),
extra={
"event": "ingestion.indexed",
"jobId": jobId,
"sourceKind": job.sourceKind,
"sourceId": job.sourceId,
"hash": contentHash,
"objectCount": len(job.contentObjects or []),
"durationMs": int((time.time() - startMs) * 1000),
},
)
return IngestionHandle(
jobId=jobId,
status="indexed",
contentHash=contentHash,
fileId=job.sourceId,
index=index,
)
def purgeConnection(self, connectionId: str) -> Dict[str, int]:
"""Delete every FileContentIndex + ContentChunk linked to a UserConnection.
Called on `connection.revoked` events so the knowledge corpus never
holds chunks the user has withdrawn access to. Returns deletion counts
for observability.
"""
if not connectionId:
return {"indexRows": 0, "chunks": 0}
startMs = time.time()
result = self._knowledgeDb.deleteFileContentIndexByConnectionId(connectionId)
logger.info(
"ingestion.connection.purged connectionId=%s rows=%d chunks=%d durationMs=%d",
connectionId, result["indexRows"], result["chunks"],
int((time.time() - startMs) * 1000),
extra={
"event": "ingestion.connection.purged",
"connectionId": connectionId,
"indexRows": result["indexRows"],
"chunks": result["chunks"],
"durationMs": int((time.time() - startMs) * 1000),
},
)
return result
def getIngestionStatus(
self, handleOrJobId: Union[IngestionHandle, str]
) -> Dict[str, Any]:
"""Map a handle or `sourceKind:sourceId` jobId to a status snapshot."""
if isinstance(handleOrJobId, IngestionHandle):
sourceId = handleOrJobId.fileId
jobId = handleOrJobId.jobId
elif isinstance(handleOrJobId, str) and ":" in handleOrJobId:
jobId = handleOrJobId
sourceId = handleOrJobId.split(":", 1)[1]
else:
jobId = str(handleOrJobId)
sourceId = str(handleOrJobId)
row = None
try:
row = self._knowledgeDb.getFileContentIndex(sourceId)
except Exception:
row = None
if not row:
return {
"jobId": jobId,
"sourceId": sourceId,
"status": "unknown",
"contentHash": None,
}
structure = (
row.get("structure") if isinstance(row, dict)
else getattr(row, "structure", {})
) or {}
meta = structure.get("_ingestion", {}) or {}
status = (
row.get("status") if isinstance(row, dict)
else getattr(row, "status", "")
) or "unknown"
return {
"jobId": jobId,
"sourceId": sourceId,
"status": status,
"contentHash": meta.get("hash"),
"sourceKind": meta.get("sourceKind"),
"indexedAt": meta.get("indexedAt"),
}
# ========================================================================= # =========================================================================
# File Indexing (called after extraction, before embedding) # File Indexing (called after extraction, before embedding)
# ========================================================================= # =========================================================================
@ -61,6 +345,57 @@ class KnowledgeService:
contentObjects: List[Dict[str, Any]] = None, contentObjects: List[Dict[str, Any]] = None,
structure: Dict[str, Any] = None, structure: Dict[str, Any] = None,
containerPath: str = None, containerPath: str = None,
) -> Optional[FileContentIndex]:
"""Backward-compatible wrapper delegating to requestIngestion.
Existing callers that still invoke `indexFile` directly automatically
participate in the idempotency/metrics layer. New callers should
prefer `requestIngestion` so they can pass `sourceKind` and
`provenance` for connector revoke/purge later.
"""
job = IngestionJob(
sourceKind="file",
sourceId=fileId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
featureInstanceId=featureInstanceId,
mandateId=mandateId,
contentObjects=list(contentObjects or []),
structure=structure,
containerPath=containerPath,
)
handle = await self.requestIngestion(job)
if handle.index is not None:
return handle.index
if handle.status == "duplicate":
row = None
try:
row = self._knowledgeDb.getFileContentIndex(fileId)
except Exception:
row = None
if isinstance(row, dict):
try:
return FileContentIndex(**row)
except Exception:
return None
return row
return None
async def _indexFileInternal(
self,
fileId: str,
fileName: str,
mimeType: str,
userId: str,
featureInstanceId: str = "",
mandateId: str = "",
contentObjects: List[Dict[str, Any]] = None,
structure: Dict[str, Any] = None,
containerPath: str = None,
sourceKind: str = "file",
connectionId: Optional[str] = None,
neutralize: bool = False,
) -> FileContentIndex: ) -> FileContentIndex:
"""Index a file's content objects and create embeddings for text chunks. """Index a file's content objects and create embeddings for text chunks.
@ -83,39 +418,41 @@ class KnowledgeService:
""" """
contentObjects = contentObjects or [] contentObjects = contentObjects or []
# 1. Resolve scope fields from FileItem (Single Source of Truth) # 1. Resolve scope fields from FileItem (Single Source of Truth) for
# FileItem lives in poweron_management; its scope/mandateId/featureInstanceId # uploaded files. Connector-sourced ingestion (sharepoint_item,
# are authoritative and must be mirrored onto the FileContentIndex. # outlook_message, ...) has no FileItem row — trust the caller's
# scope + ids directly.
resolvedScope = "personal" resolvedScope = "personal"
resolvedMandateId = mandateId resolvedMandateId = mandateId
resolvedFeatureInstanceId = featureInstanceId resolvedFeatureInstanceId = featureInstanceId
resolvedUserId = userId resolvedUserId = userId
_shouldNeutralize = False _shouldNeutralize = neutralize # caller-supplied flag (connector prefs / IngestionJob)
try: if sourceKind == "file":
from modules.datamodels.datamodelFiles import FileItem as _FileItem try:
_dbComponent = getattr(self._context, "interfaceDbComponent", None) from modules.datamodels.datamodelFiles import FileItem as _FileItem
_fileRecords = _dbComponent.getRecordset(_FileItem, recordFilter={"id": fileId}) if _dbComponent else [] _dbComponent = getattr(self._context, "interfaceDbComponent", None)
if not _fileRecords: _fileRecords = _dbComponent.getRecordset(_FileItem, recordFilter={"id": fileId}) if _dbComponent else []
from modules.interfaces.interfaceDbManagement import ComponentObjects if not _fileRecords:
_row = ComponentObjects().db._loadRecord(_FileItem, fileId) from modules.interfaces.interfaceDbManagement import ComponentObjects
if _row: _row = ComponentObjects().db._loadRecord(_FileItem, fileId)
_fileRecords = [_row] if _row:
if _fileRecords: _fileRecords = [_row]
_fileRecord = _fileRecords[0] if _fileRecords:
_get = (lambda k, d=None: _fileRecord.get(k, d)) if isinstance(_fileRecord, dict) else (lambda k, d=None: getattr(_fileRecord, k, d)) _fileRecord = _fileRecords[0]
_shouldNeutralize = bool(_get("neutralize", False)) _get = (lambda k, d=None: _fileRecord.get(k, d)) if isinstance(_fileRecord, dict) else (lambda k, d=None: getattr(_fileRecord, k, d))
_fileScope = _get("scope") _shouldNeutralize = bool(_get("neutralize", False)) # FileItem is authoritative for uploads
if _fileScope: _fileScope = _get("scope")
resolvedScope = _fileScope if _fileScope:
if not resolvedMandateId: resolvedScope = _fileScope
resolvedMandateId = str(_get("mandateId", "") or "") if not resolvedMandateId:
if not resolvedFeatureInstanceId: resolvedMandateId = str(_get("mandateId", "") or "")
resolvedFeatureInstanceId = str(_get("featureInstanceId", "") or "") if not resolvedFeatureInstanceId:
_fileCreatedBy = _get("sysCreatedBy") resolvedFeatureInstanceId = str(_get("featureInstanceId", "") or "")
if _fileCreatedBy: _fileCreatedBy = _get("sysCreatedBy")
resolvedUserId = str(_fileCreatedBy) if _fileCreatedBy:
except Exception: resolvedUserId = str(_fileCreatedBy)
pass except Exception:
pass
# 2. Create FileContentIndex with correct scope from the start # 2. Create FileContentIndex with correct scope from the start
index = FileContentIndex( index = FileContentIndex(
@ -124,6 +461,8 @@ class KnowledgeService:
featureInstanceId=resolvedFeatureInstanceId, featureInstanceId=resolvedFeatureInstanceId,
mandateId=resolvedMandateId, mandateId=resolvedMandateId,
scope=resolvedScope, scope=resolvedScope,
sourceKind=sourceKind,
connectionId=connectionId,
fileName=fileName, fileName=fileName,
mimeType=mimeType, mimeType=mimeType,
containerPath=containerPath, containerPath=containerPath,
@ -300,7 +639,12 @@ class KnowledgeService:
Formatted context string for injection into the agent's system prompt. Formatted context string for injection into the agent's system prompt.
""" """
queryVector = await self._embedSingle(currentPrompt) queryVector = await self._embedSingle(currentPrompt)
logger.debug(
"buildAgentContext.start userId=%s featureInstanceId=%s mandateId=%s isSysAdmin=%s prompt=%r",
userId, featureInstanceId, mandateId, isSysAdmin, (currentPrompt or "")[:120],
)
if not queryVector: if not queryVector:
logger.debug("buildAgentContext.abort reason=no_query_vector")
return "" return ""
builder = _ContextBuilder(budget=contextBudget) builder = _ContextBuilder(budget=contextBudget)
@ -327,9 +671,14 @@ class KnowledgeService:
featureInstanceId=featureInstanceId, featureInstanceId=featureInstanceId,
mandateId=mandateId, mandateId=mandateId,
limit=15, limit=15,
minScore=0.65, minScore=0.35,
isSysAdmin=isSysAdmin, isSysAdmin=isSysAdmin,
) )
logger.debug(
"buildAgentContext.layer1 instanceChunks=%d top_scores=%s",
len(instanceChunks),
[round(float(c.get("_score", 0) or 0), 3) for c in (instanceChunks or [])[:3]],
)
if instanceChunks: if instanceChunks:
builder.add(priority=1, label="Relevant Documents", items=instanceChunks, maxChars=4000) builder.add(priority=1, label="Relevant Documents", items=instanceChunks, maxChars=4000)
@ -338,7 +687,7 @@ class KnowledgeService:
queryVector=queryVector, queryVector=queryVector,
workflowId=workflowId, workflowId=workflowId,
limit=10, limit=10,
minScore=0.55, minScore=0.35,
) )
if roundMemories: if roundMemories:
memItems = [] memItems = []
@ -376,7 +725,7 @@ class KnowledgeService:
scope="mandate", scope="mandate",
mandateId=mandateId, mandateId=mandateId,
limit=10, limit=10,
minScore=0.7, minScore=0.35,
isSysAdmin=isSysAdmin, isSysAdmin=isSysAdmin,
) )
if mandateChunks: if mandateChunks:
@ -392,7 +741,12 @@ class KnowledgeService:
maxChars=500, maxChars=500,
) )
return builder.build() _result = builder.build()
logger.debug(
"buildAgentContext.done totalChars=%d userId=%s",
len(_result), userId,
)
return _result
# ========================================================================= # =========================================================================
# Workflow Memory # Workflow Memory

View file

@ -0,0 +1,334 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Connection-lifecycle consumer bridging OAuth events to ingestion jobs.
Subscribes to `connection.established` and `connection.revoked` callbacks
emitted by the OAuth callbacks / connection management routes and dispatches:
- `connection.established` -> enqueue a `connection.bootstrap` BackgroundJob
that walks the connector and ingests all reachable items via
KnowledgeService.requestIngestion (file-like or virtual documents).
- `connection.revoked` -> run `KnowledgeService.purgeConnection` synchronously
so the knowledge corpus releases the data before the UI confirms the revoke.
The consumer is registered once at process boot (see `app.py` lifespan).
It intentionally does NOT hold a per-user service context; each callback
creates whatever context it needs from the UserConnection row itself.
"""
from __future__ import annotations
import asyncio
import logging
from typing import Any, Dict, Optional
from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
from modules.shared.callbackRegistry import callbackRegistry
from modules.serviceCenter.services.serviceBackgroundJobs import (
registerJobHandler,
startJob,
)
logger = logging.getLogger(__name__)
BOOTSTRAP_JOB_TYPE = "connection.bootstrap"
_registered = False
def _onConnectionEstablished(
*,
connectionId: str,
authority: str,
userId: Optional[str] = None,
**kwargs: Any,
) -> None:
"""Fire-and-forget bootstrap enqueue for a freshly connected UserConnection."""
if not connectionId:
logger.warning("connection.established without connectionId; ignoring")
return
payload: Dict[str, Any] = {
"connectionId": connectionId,
"authority": (authority or "").lower(),
"userId": userId,
}
logger.info(
"ingestion.connection.bootstrap.queued connectionId=%s authority=%s",
connectionId, authority,
extra={
"event": "ingestion.connection.bootstrap.queued",
"connectionId": connectionId,
"authority": authority,
},
)
async def _enqueue() -> None:
try:
await startJob(
BOOTSTRAP_JOB_TYPE,
payload,
triggeredBy=userId,
)
except Exception as exc:
logger.error(
"ingestion.connection.bootstrap.enqueue_failed connectionId=%s error=%s",
connectionId, exc, exc_info=True,
)
try:
loop = asyncio.get_event_loop()
if loop.is_running():
loop.create_task(_enqueue())
else:
loop.run_until_complete(_enqueue())
except RuntimeError:
asyncio.run(_enqueue())
def _onConnectionRevoked(
*,
connectionId: str,
authority: Optional[str] = None,
userId: Optional[str] = None,
reason: Optional[str] = None,
**kwargs: Any,
) -> None:
"""Run the knowledge purge synchronously so UI feedback is authoritative."""
if not connectionId:
logger.warning("connection.revoked without connectionId; ignoring")
return
try:
# Purge lives on the DB interface to avoid ServiceCenter/user-context
# plumbing here; the service method is a thin wrapper on top of this.
result = getKnowledgeInterface(None).deleteFileContentIndexByConnectionId(connectionId)
except Exception as exc:
logger.error(
"ingestion.connection.purged.failed connectionId=%s error=%s",
connectionId, exc, exc_info=True,
)
return
logger.info(
"ingestion.connection.purged connectionId=%s authority=%s reason=%s rows=%d chunks=%d",
connectionId, authority, reason,
result.get("indexRows", 0), result.get("chunks", 0),
extra={
"event": "ingestion.connection.purged",
"connectionId": connectionId,
"authority": authority,
"reason": reason,
"indexRows": result.get("indexRows", 0),
"chunks": result.get("chunks", 0),
},
)
async def _bootstrapJobHandler(
job: Dict[str, Any],
progressCb,
) -> Dict[str, Any]:
"""Dispatch bootstrap by authority. Each authority runs its own sub-bootstraps."""
payload = job.get("payload") or {}
connectionId = payload.get("connectionId")
authority = (payload.get("authority") or "").lower()
if not connectionId:
raise ValueError("connection.bootstrap requires payload.connectionId")
progressCb(5, f"resolving {authority} connection")
# Defensive consent check: if the connection has since disabled knowledge ingestion
# (e.g. user toggled setting after the job was enqueued), skip all walkers.
try:
from modules.interfaces.interfaceDbApp import getRootInterface
_root = getRootInterface()
_conn = _root.getUserConnectionById(connectionId)
if _conn and not getattr(_conn, "knowledgeIngestionEnabled", True):
logger.info(
"ingestion.connection.bootstrap.skipped — consent disabled connectionId=%s",
connectionId,
extra={
"event": "ingestion.connection.bootstrap.skipped",
"connectionId": connectionId,
"authority": authority,
"reason": "consent_disabled",
},
)
return {"connectionId": connectionId, "authority": authority, "skipped": True, "reason": "consent_disabled"}
except Exception as _guardErr:
logger.debug("Could not load connection for consent guard: %s", _guardErr)
def _normalize(res: Any, label: str) -> Dict[str, Any]:
if isinstance(res, Exception):
logger.error(
"ingestion.connection.bootstrap.failed part=%s connectionId=%s error=%s",
label, connectionId, res, exc_info=res,
)
return {"error": str(res)}
return res or {}
if authority == "msft":
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
bootstrapSharepoint,
)
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncOutlook import (
bootstrapOutlook,
)
progressCb(10, "sharepoint + outlook")
spResult, olResult = await asyncio.gather(
bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb),
bootstrapOutlook(connectionId=connectionId, progressCb=progressCb),
return_exceptions=True,
)
return {
"connectionId": connectionId,
"authority": authority,
"sharepoint": _normalize(spResult, "sharepoint"),
"outlook": _normalize(olResult, "outlook"),
}
if authority == "google":
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGdrive import (
bootstrapGdrive,
)
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
bootstrapGmail,
)
progressCb(10, "drive + gmail")
gdResult, gmResult = await asyncio.gather(
bootstrapGdrive(connectionId=connectionId, progressCb=progressCb),
bootstrapGmail(connectionId=connectionId, progressCb=progressCb),
return_exceptions=True,
)
return {
"connectionId": connectionId,
"authority": authority,
"drive": _normalize(gdResult, "gdrive"),
"gmail": _normalize(gmResult, "gmail"),
}
if authority == "clickup":
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup import (
bootstrapClickup,
)
progressCb(10, "clickup tasks")
cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb)
return {
"connectionId": connectionId,
"authority": authority,
"clickup": _normalize(cuResult, "clickup"),
}
logger.info(
"ingestion.connection.bootstrap.skipped reason=unsupported_authority authority=%s connectionId=%s",
authority, connectionId,
extra={
"event": "ingestion.connection.bootstrap.skipped",
"authority": authority,
"connectionId": connectionId,
"reason": "unsupported_authority",
},
)
return {
"connectionId": connectionId,
"authority": authority,
"skipped": True,
"reason": "unsupported_authority",
}
async def _scheduledDailyResync() -> None:
"""Enqueue a connection.bootstrap job for every active knowledge connection.
Runs once per day (default 2 AM Europe/Zurich). Each job re-walks the
connector and hands new / changed items to KnowledgeService.requestIngestion.
Unchanged items are deduplicated by content-hash and skipped automatically.
"""
try:
from modules.interfaces.interfaceDbApp import getRootInterface
rootInterface = getRootInterface()
connections = rootInterface.getActiveKnowledgeConnections()
except Exception as exc:
logger.error("knowledge.daily_resync: could not load connections: %s", exc, exc_info=True)
return
if not connections:
logger.info("knowledge.daily_resync: no active knowledge connections — nothing to do")
return
logger.info(
"knowledge.daily_resync: enqueuing bootstrap for %d connection(s)",
len(connections),
extra={"event": "knowledge.daily_resync.started", "count": len(connections)},
)
enqueued = 0
skipped = 0
for conn in connections:
connectionId = str(conn.id)
authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority)
userId = str(conn.userId)
payload: Dict[str, Any] = {
"connectionId": connectionId,
"authority": authority.lower(),
"userId": userId,
}
try:
await startJob(
BOOTSTRAP_JOB_TYPE,
payload,
triggeredBy="scheduler.daily_resync",
)
enqueued += 1
logger.debug(
"knowledge.daily_resync: queued connectionId=%s authority=%s",
connectionId, authority,
)
except Exception as exc:
skipped += 1
logger.error(
"knowledge.daily_resync: failed to enqueue connectionId=%s: %s",
connectionId, exc,
)
logger.info(
"knowledge.daily_resync: done — enqueued=%d skipped=%d",
enqueued, skipped,
extra={"event": "knowledge.daily_resync.done", "enqueued": enqueued, "skipped": skipped},
)
def registerDailyResyncScheduler(*, hour: int = 2, minute: int = 0) -> None:
"""Register the daily knowledge re-sync cron job. Idempotent.
Args:
hour: Hour of day to run (023, default 2 2 AM Europe/Zurich).
minute: Minute within the hour (default 0).
"""
try:
from modules.shared.eventManagement import eventManager
eventManager.registerCron(
jobId="knowledge.daily_resync",
func=_scheduledDailyResync,
cronKwargs={"hour": str(hour), "minute": str(minute)},
)
logger.info(
"knowledge.daily_resync scheduler registered (daily %02d:%02d Europe/Zurich)",
hour, minute,
)
except Exception as exc:
logger.warning("knowledge.daily_resync scheduler registration failed (non-critical): %s", exc)
def registerKnowledgeIngestionConsumer() -> None:
"""Register callback subscribers + background job handler. Idempotent."""
global _registered
if _registered:
return
callbackRegistry.register("connection.established", _onConnectionEstablished)
callbackRegistry.register("connection.revoked", _onConnectionRevoked)
registerJobHandler(BOOTSTRAP_JOB_TYPE, _bootstrapJobHandler)
registerDailyResyncScheduler()
_registered = True
logger.info("KnowledgeIngestionConsumer registered (established/revoked + %s handler + daily resync)", BOOTSTRAP_JOB_TYPE)

View file

@ -0,0 +1,101 @@
"""Per-connection knowledge ingestion preference helpers.
Walkers call `loadConnectionPrefs(connectionId)` once at bootstrap start and
receive a `ConnectionIngestionPrefs` dataclass they can pass down into their
inner loops. All fields have safe defaults so walkers stay backward-compatible
with connections that predate the §2.6 preference schema (knowledgePreferences
is None).
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
_DEFAULT_MAX_AGE_DAYS = 90
_DEFAULT_MAIL_DEPTH = "full"
_DEFAULT_CLICKUP_SCOPE = "title_description"
@dataclass
class ConnectionIngestionPrefs:
"""Parsed per-connection preferences for knowledge ingestion walkers."""
# PII
neutralizeBeforeEmbed: bool = False
# Mail (Outlook + Gmail)
mailContentDepth: str = _DEFAULT_MAIL_DEPTH # "metadata" | "snippet" | "full"
mailIndexAttachments: bool = False
# Files (Drive / SharePoint / OneDrive)
filesIndexBinaries: bool = True
mimeAllowlist: List[str] = field(default_factory=list) # empty = all allowed
# ClickUp
clickupScope: str = _DEFAULT_CLICKUP_SCOPE # "titles" | "title_description" | "with_comments"
clickupIndexAttachments: bool = False
# Per-authority surface toggles (default everything on)
gmailEnabled: bool = True
driveEnabled: bool = True
sharepointEnabled: bool = True
outlookEnabled: bool = True
# Time window
maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS # 0 = no limit
def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs:
"""Load and parse per-connection preferences from the database.
Returns safe defaults for any missing or unparseable values so walkers
never fail due to missing preference data.
"""
try:
from modules.interfaces.interfaceDbApp import getRootInterface
root = getRootInterface()
conn = root.getUserConnectionById(connectionId)
if not conn:
logger.debug("loadConnectionPrefs: connection %s not found, using defaults", connectionId)
return ConnectionIngestionPrefs()
raw: Optional[Dict[str, Any]] = getattr(conn, "knowledgePreferences", None)
if not raw or not isinstance(raw, dict):
return ConnectionIngestionPrefs()
def _bool(key: str, default: bool) -> bool:
v = raw.get(key)
return bool(v) if isinstance(v, bool) else default
def _str(key: str, allowed: List[str], default: str) -> str:
v = raw.get(key)
return v if v in allowed else default
def _int(key: str, default: int) -> int:
v = raw.get(key)
return int(v) if isinstance(v, int) else default
surface = raw.get("surfaceToggles") or {}
google_surf = surface.get("google") or {}
msft_surf = surface.get("msft") or {}
return ConnectionIngestionPrefs(
neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False),
mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH),
mailIndexAttachments=_bool("mailIndexAttachments", False),
filesIndexBinaries=_bool("filesIndexBinaries", True),
mimeAllowlist=list(raw.get("mimeAllowlist") or []),
clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE),
clickupIndexAttachments=_bool("clickupIndexAttachments", False),
gmailEnabled=bool(google_surf.get("gmail", True)),
driveEnabled=bool(google_surf.get("drive", True)),
sharepointEnabled=bool(msft_surf.get("sharepoint", True)),
outlookEnabled=bool(msft_surf.get("outlook", True)),
maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS),
)
except Exception as exc:
logger.warning("loadConnectionPrefs failed for %s, using defaults: %s", connectionId, exc)
return ConnectionIngestionPrefs()

View file

@ -0,0 +1,512 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""ClickUp bootstrap for the unified knowledge ingestion lane.
ClickUp tasks are ingested as *virtual documents* we never download file
bytes. Each task becomes a `sourceKind="clickup_task"` IngestionJob whose
`contentObjects` carry a summary header (name + status + metadata) and the
task description / text content so retrieval finds them without a live API
call.
Hierarchy traversal: workspace (team) spaces folders / folderless lists
tasks. We cap the fan-out with `maxWorkspaces` / `maxListsPerWorkspace` /
`maxTasks` and skip tasks older than `maxAgeDays` (default 180 d).
Idempotency: `date_updated` from the ClickUp task payload is a millisecond
timestamp and strictly monotonic per revision used as `contentVersion`.
"""
from __future__ import annotations
import hashlib
import logging
import time
from dataclasses import dataclass, field
from datetime import datetime, timedelta, timezone
from typing import Any, Callable, Dict, List, Optional
logger = logging.getLogger(__name__)
MAX_TASKS_DEFAULT = 500
MAX_WORKSPACES_DEFAULT = 3
MAX_LISTS_PER_WORKSPACE_DEFAULT = 20
MAX_DESCRIPTION_CHARS_DEFAULT = 8000
MAX_AGE_DAYS_DEFAULT = 180
@dataclass
class ClickupBootstrapLimits:
maxTasks: int = MAX_TASKS_DEFAULT
maxWorkspaces: int = MAX_WORKSPACES_DEFAULT
maxListsPerWorkspace: int = MAX_LISTS_PER_WORKSPACE_DEFAULT
maxDescriptionChars: int = MAX_DESCRIPTION_CHARS_DEFAULT
# Only ingest tasks updated within the last N days. None disables filter.
maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
# Include closed/archived tasks if they still meet the recency filter.
# ClickUp `closed` tasks often carry the most useful RAG context
# ("why was this shipped the way it was?").
includeClosed: bool = True
# Pass-through to IngestionJob.neutralize
neutralize: bool = False
# Content scope: "titles" | "title_description" | "with_comments"
clickupScope: str = "title_description"
@dataclass
class ClickupBootstrapResult:
connectionId: str
indexed: int = 0
skippedDuplicate: int = 0
skippedPolicy: int = 0
failed: int = 0
workspaces: int = 0
lists: int = 0
errors: List[str] = field(default_factory=list)
def _syntheticTaskId(connectionId: str, taskId: str) -> str:
token = hashlib.sha256(f"{connectionId}:{taskId}".encode("utf-8")).hexdigest()[:16]
return f"cu:{connectionId[:8]}:{token}"
def _truncate(value: Any, limit: int) -> str:
text = str(value or "").strip()
if not text:
return ""
if len(text) <= limit:
return text
return text[:limit].rstrip() + "\n[truncated]"
def _isRecent(dateUpdatedMs: Any, maxAgeDays: Optional[int]) -> bool:
if not maxAgeDays:
return True
if not dateUpdatedMs:
return True
try:
ts = datetime.fromtimestamp(int(dateUpdatedMs) / 1000.0, tz=timezone.utc)
except Exception:
return True
cutoff = datetime.now(timezone.utc) - timedelta(days=maxAgeDays)
return ts >= cutoff
def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -> List[Dict[str, Any]]:
"""Header (name/status/metadata) + optional description + text_content.
`limits.clickupScope` controls how much is embedded:
- "titles": task name + status metadata only
- "title_description": header + description / text_content (default)
- "with_comments": header + description + text_content
(comments themselves are not yet fetched in v1)
"""
name = task.get("name") or f"Task {task.get('id', '')}"
status = ((task.get("status") or {}).get("status")) or ""
assignees = ", ".join(
filter(None, [
(a.get("username") or a.get("email") or "")
for a in (task.get("assignees") or [])
])
)
tags = ", ".join(filter(None, [t.get("name", "") for t in (task.get("tags") or [])]))
listInfo = task.get("list") or {}
folderInfo = task.get("folder") or {}
spaceInfo = task.get("space") or {}
dueMs = task.get("due_date")
dueIso = ""
if dueMs:
try:
dueIso = datetime.fromtimestamp(int(dueMs) / 1000.0, tz=timezone.utc).strftime("%Y-%m-%d")
except Exception:
dueIso = ""
headerLines = [
f"Task: {name}",
f"Status: {status}" if status else "",
f"List: {listInfo.get('name', '')}" if listInfo else "",
f"Folder: {folderInfo.get('name', '')}" if folderInfo else "",
f"Space: {spaceInfo.get('name', '')}" if spaceInfo else "",
f"Assignees: {assignees}" if assignees else "",
f"Tags: {tags}" if tags else "",
f"Due: {dueIso}" if dueIso else "",
f"Url: {task.get('url', '')}" if task.get("url") else "",
]
header = "\n".join(line for line in headerLines if line)
parts: List[Dict[str, Any]] = [{
"contentObjectId": "header",
"contentType": "text",
"data": header,
"contextRef": {"part": "header"},
}]
scope = getattr(limits, "clickupScope", "title_description")
if scope in ("title_description", "with_comments"):
description = _truncate(task.get("description"), limits.maxDescriptionChars)
if description:
parts.append({
"contentObjectId": "description",
"contentType": "text",
"data": description,
"contextRef": {"part": "description"},
})
# text_content is ClickUp's rendered-markdown version; include if it adds
# something beyond the plain description (common for bullet lists, checklists).
textContent = _truncate(task.get("text_content"), limits.maxDescriptionChars)
if textContent and textContent != description:
parts.append({
"contentObjectId": "text_content",
"contentType": "text",
"data": textContent,
"contextRef": {"part": "text_content"},
})
return parts
async def bootstrapClickup(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[ClickupBootstrapLimits] = None,
) -> Dict[str, Any]:
"""Walk workspaces → lists → tasks and ingest each task as a virtual doc."""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
if not limits:
limits = ClickupBootstrapLimits(
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
neutralize=prefs.neutralizeBeforeEmbed,
clickupScope=prefs.clickupScope,
)
startMs = time.time()
result = ClickupBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=clickup connectionId=%s",
connectionId,
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "clickup",
"connectionId": connectionId,
},
)
if adapter is None or knowledgeService is None or connection is None:
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
svc = getattr(adapter, "_svc", None)
if svc is None:
result.errors.append("adapter missing _svc instance")
return _finalizeResult(connectionId, result, startMs)
try:
teamsResp = await svc.getAuthorizedTeams()
except Exception as exc:
logger.error("clickup team discovery failed for %s: %s", connectionId, exc, exc_info=True)
result.errors.append(f"teams: {exc}")
return _finalizeResult(connectionId, result, startMs)
teams = (teamsResp or {}).get("teams") or []
for team in teams[: limits.maxWorkspaces]:
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
break
teamId = str(team.get("id", "") or "")
if not teamId:
continue
result.workspaces += 1
try:
await _walkTeam(
svc=svc,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
team=team,
limits=limits,
result=result,
progressCb=progressCb,
)
except Exception as exc:
logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
result.errors.append(f"team({teamId}): {exc}")
return _finalizeResult(connectionId, result, startMs)
async def _resolveDependencies(connectionId: str):
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.auth import TokenManager
from modules.connectors.providerClickup.connectorClickup import ClickupConnector
from modules.serviceCenter import getService
from modules.serviceCenter.context import ServiceCenterContext
from modules.security.rootAccess import getRootUser
rootInterface = getRootInterface()
connection = rootInterface.getUserConnectionById(connectionId)
if connection is None:
raise ValueError(f"UserConnection not found: {connectionId}")
token = TokenManager().getFreshToken(connectionId)
if not token or not token.tokenAccess:
raise ValueError(f"No valid token for connection {connectionId}")
provider = ClickupConnector(connection, token.tokenAccess)
adapter = provider.getServiceAdapter("clickup")
rootUser = getRootUser()
ctx = ServiceCenterContext(
user=rootUser,
mandate_id=str(getattr(connection, "mandateId", "") or ""),
)
knowledgeService = getService("knowledge", ctx)
return adapter, connection, knowledgeService
async def _walkTeam(
*,
svc,
knowledgeService,
connectionId: str,
mandateId: str,
userId: str,
team: Dict[str, Any],
limits: ClickupBootstrapLimits,
result: ClickupBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
) -> None:
teamId = str(team.get("id", "") or "")
spacesResp = await svc.getSpaces(teamId)
spaces = (spacesResp or {}).get("spaces") or []
listsCollected: List[Dict[str, Any]] = []
for space in spaces:
if len(listsCollected) >= limits.maxListsPerWorkspace:
break
spaceId = str(space.get("id", "") or "")
if not spaceId:
continue
# Folderless lists directly under the space
folderless = await svc.getFolderlessLists(spaceId)
for lst in (folderless or {}).get("lists") or []:
if len(listsCollected) >= limits.maxListsPerWorkspace:
break
listsCollected.append({**lst, "_space": space})
# Lists inside folders
foldersResp = await svc.getFolders(spaceId)
for folder in (foldersResp or {}).get("folders") or []:
if len(listsCollected) >= limits.maxListsPerWorkspace:
break
folderId = str(folder.get("id", "") or "")
if not folderId:
continue
folderLists = await svc.getListsInFolder(folderId)
for lst in (folderLists or {}).get("lists") or []:
if len(listsCollected) >= limits.maxListsPerWorkspace:
break
listsCollected.append({**lst, "_space": space, "_folder": folder})
for lst in listsCollected:
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
return
result.lists += 1
await _walkList(
svc=svc,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
teamId=teamId,
lst=lst,
limits=limits,
result=result,
progressCb=progressCb,
)
async def _walkList(
*,
svc,
knowledgeService,
connectionId: str,
mandateId: str,
userId: str,
teamId: str,
lst: Dict[str, Any],
limits: ClickupBootstrapLimits,
result: ClickupBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
) -> None:
listId = str(lst.get("id", "") or "")
if not listId:
return
page = 0
while result.indexed + result.skippedDuplicate < limits.maxTasks:
resp = await svc.getTasksInList(
listId,
page=page,
include_closed=limits.includeClosed,
subtasks=True,
)
if isinstance(resp, dict) and resp.get("error"):
logger.warning("clickup tasks list=%s page=%d error: %s", listId, page, resp.get("error"))
result.errors.append(f"list({listId}): {resp.get('error')}")
return
tasks = (resp or {}).get("tasks") or []
if not tasks:
return
for task in tasks:
if result.indexed + result.skippedDuplicate >= limits.maxTasks:
return
if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
result.skippedPolicy += 1
continue
# Inject the list/folder/space metadata we already loaded.
task["list"] = task.get("list") or {"id": listId, "name": lst.get("name")}
task["folder"] = task.get("folder") or lst.get("_folder") or {}
task["space"] = task.get("space") or lst.get("_space") or {}
await _ingestTask(
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
teamId=teamId,
task=task,
limits=limits,
result=result,
progressCb=progressCb,
)
if len(tasks) < 100: # ClickUp page-size hint: fewer than 100 => last page
return
page += 1
async def _ingestTask(
*,
knowledgeService,
connectionId: str,
mandateId: str,
userId: str,
teamId: str,
task: Dict[str, Any],
limits: ClickupBootstrapLimits,
result: ClickupBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
taskId = str(task.get("id", "") or "")
if not taskId:
result.skippedPolicy += 1
return
revision = str(task.get("date_updated") or task.get("date_created") or "")
name = task.get("name") or f"Task {taskId}"
syntheticId = _syntheticTaskId(connectionId, taskId)
fileName = f"{name[:80].strip() or taskId}.task.json"
contentObjects = _buildContentObjects(task, limits)
try:
handle = await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="clickup_task",
sourceId=syntheticId,
fileName=fileName,
mimeType="application/vnd.clickup.task+json",
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=revision or None,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"authority": "clickup",
"service": "clickup",
"externalItemId": taskId,
"teamId": teamId,
"listId": ((task.get("list") or {}).get("id")),
"spaceId": ((task.get("space") or {}).get("id")),
"url": task.get("url"),
"status": ((task.get("status") or {}).get("status")),
"tier": limits.clickupScope,
},
)
)
except Exception as exc:
logger.error("clickup ingestion %s failed: %s", taskId, exc, exc_info=True)
result.failed += 1
result.errors.append(f"ingest({taskId}): {exc}")
return
if handle.status == "duplicate":
result.skippedDuplicate += 1
elif handle.status == "indexed":
result.indexed += 1
else:
result.failed += 1
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
processed = result.indexed + result.skippedDuplicate
try:
progressCb(
min(90, 10 + int(80 * processed / max(1, limits.maxTasks))),
f"clickup processed={processed}",
)
except Exception:
pass
logger.info(
"ingestion.connection.bootstrap.progress part=clickup processed=%d skippedDup=%d failed=%d",
processed, result.skippedDuplicate, result.failed,
extra={
"event": "ingestion.connection.bootstrap.progress",
"part": "clickup",
"connectionId": connectionId,
"processed": processed,
"skippedDup": result.skippedDuplicate,
"failed": result.failed,
},
)
def _finalizeResult(connectionId: str, result: ClickupBootstrapResult, startMs: float) -> Dict[str, Any]:
durationMs = int((time.time() - startMs) * 1000)
logger.info(
"ingestion.connection.bootstrap.done part=clickup connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d workspaces=%d lists=%d durationMs=%d",
connectionId,
result.indexed, result.skippedDuplicate, result.skippedPolicy,
result.failed, result.workspaces, result.lists, durationMs,
extra={
"event": "ingestion.connection.bootstrap.done",
"part": "clickup",
"connectionId": connectionId,
"indexed": result.indexed,
"skippedDup": result.skippedDuplicate,
"skippedPolicy": result.skippedPolicy,
"failed": result.failed,
"workspaces": result.workspaces,
"lists": result.lists,
"durationMs": durationMs,
},
)
return {
"connectionId": result.connectionId,
"indexed": result.indexed,
"skippedDuplicate": result.skippedDuplicate,
"skippedPolicy": result.skippedPolicy,
"failed": result.failed,
"workspaces": result.workspaces,
"lists": result.lists,
"durationMs": durationMs,
"errors": result.errors[:20],
}

View file

@ -0,0 +1,443 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Google Drive bootstrap for the unified knowledge ingestion lane.
Mirrors the SharePoint pilot (see subConnectorSyncSharepoint.py). Walks the
user's *My Drive* tree from the virtual `root` folder, downloads each
file-like item via `DriveAdapter.download` (which handles native Google docs
via export), runs the standard extraction pipeline and routes results through
`KnowledgeService.requestIngestion` with `sourceKind="gdrive_item"` and
`contentVersion = modifiedTime` (monotonic per-revision).
"""
from __future__ import annotations
import hashlib
import logging
import time
from dataclasses import dataclass, field
from datetime import datetime, timedelta, timezone
from typing import Any, Callable, Dict, List, Optional
from modules.datamodels.datamodelExtraction import ExtractionOptions
logger = logging.getLogger(__name__)
MAX_ITEMS_DEFAULT = 500
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
MAX_DEPTH_DEFAULT = 4
MAX_AGE_DAYS_DEFAULT = 365
# Google Drive uses virtual mime-types for folders and non-downloadable assets.
FOLDER_MIME = "application/vnd.google-apps.folder"
@dataclass
class GdriveBootstrapLimits:
maxItems: int = MAX_ITEMS_DEFAULT
maxBytes: int = MAX_BYTES_DEFAULT
maxFileSize: int = MAX_FILE_SIZE_DEFAULT
skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
maxDepth: int = MAX_DEPTH_DEFAULT
# Only ingest files modified within the last N days. None disables filter.
maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
# Pass-through to IngestionJob.neutralize
neutralize: bool = False
# Whether to skip binary/non-text files
filesIndexBinaries: bool = True
@dataclass
class GdriveBootstrapResult:
connectionId: str
indexed: int = 0
skippedDuplicate: int = 0
skippedPolicy: int = 0
failed: int = 0
bytesProcessed: int = 0
errors: List[str] = field(default_factory=list)
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
token = hashlib.sha256(f"{connectionId}:{externalItemId}".encode("utf-8")).hexdigest()[:16]
return f"gd:{connectionId[:8]}:{token}"
def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
parts = getattr(extracted, "parts", None) or []
out: List[Dict[str, Any]] = []
for part in parts:
data = getattr(part, "data", None) or ""
if not data or not str(data).strip():
continue
typeGroup = getattr(part, "typeGroup", "text") or "text"
contentType = "text"
if typeGroup == "image":
contentType = "image"
elif typeGroup in ("binary", "container"):
contentType = "other"
out.append({
"contentObjectId": getattr(part, "id", ""),
"contentType": contentType,
"data": data,
"contextRef": {
"containerPath": fileName,
"location": getattr(part, "label", None) or "file",
**(getattr(part, "metadata", None) or {}),
},
})
return out
def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
if not maxAgeDays:
return True
if not modifiedIso:
# No timestamp -> be permissive (Drive native docs sometimes omit it on export).
return True
try:
# Google returns RFC 3339 with `Z` or offset; python 3.11+ parses both.
ts = datetime.fromisoformat(modifiedIso.replace("Z", "+00:00"))
except Exception:
return True
cutoff = datetime.now(timezone.utc) - timedelta(days=maxAgeDays)
if ts.tzinfo is None:
ts = ts.replace(tzinfo=timezone.utc)
return ts >= cutoff
async def bootstrapGdrive(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[GdriveBootstrapLimits] = None,
runExtractionFn: Optional[Callable[..., Any]] = None,
) -> Dict[str, Any]:
"""Walk My Drive starting from the virtual root folder."""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
if not limits:
limits = GdriveBootstrapLimits(
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
neutralize=prefs.neutralizeBeforeEmbed,
filesIndexBinaries=prefs.filesIndexBinaries,
)
startMs = time.time()
result = GdriveBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=gdrive connectionId=%s",
connectionId,
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "gdrive",
"connectionId": connectionId,
},
)
if adapter is None or knowledgeService is None or connection is None:
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
if runExtractionFn is None:
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
ExtractorRegistry, ChunkerRegistry,
)
extractorRegistry = ExtractorRegistry()
chunkerRegistry = ChunkerRegistry()
def runExtractionFn(bytesData, name, mime, options): # type: ignore[no-redef]
return runExtraction(extractorRegistry, chunkerRegistry, bytesData, name, mime, options)
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
try:
await _walkFolder(
adapter=adapter,
knowledgeService=knowledgeService,
runExtractionFn=runExtractionFn,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderPath="/", # DriveAdapter.browse maps "" / "/" -> "root"
depth=0,
limits=limits,
result=result,
progressCb=progressCb,
)
except Exception as exc:
logger.error("gdrive walk failed for %s: %s", connectionId, exc, exc_info=True)
result.errors.append(f"walk: {exc}")
return _finalizeResult(connectionId, result, startMs)
async def _resolveDependencies(connectionId: str):
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.auth import TokenManager
from modules.connectors.providerGoogle.connectorGoogle import GoogleConnector
from modules.serviceCenter import getService
from modules.serviceCenter.context import ServiceCenterContext
from modules.security.rootAccess import getRootUser
rootInterface = getRootInterface()
connection = rootInterface.getUserConnectionById(connectionId)
if connection is None:
raise ValueError(f"UserConnection not found: {connectionId}")
token = TokenManager().getFreshToken(connectionId)
if not token or not token.tokenAccess:
raise ValueError(f"No valid token for connection {connectionId}")
provider = GoogleConnector(connection, token.tokenAccess)
adapter = provider.getServiceAdapter("drive")
rootUser = getRootUser()
ctx = ServiceCenterContext(
user=rootUser,
mandate_id=str(getattr(connection, "mandateId", "") or ""),
)
knowledgeService = getService("knowledge", ctx)
return adapter, connection, knowledgeService
async def _walkFolder(
*,
adapter,
knowledgeService,
runExtractionFn,
connectionId: str,
mandateId: str,
userId: str,
folderPath: str,
depth: int,
limits: GdriveBootstrapLimits,
result: GdriveBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
) -> None:
if depth > limits.maxDepth:
return
try:
entries = await adapter.browse(folderPath)
except Exception as exc:
logger.warning("gdrive browse %s failed: %s", folderPath, exc)
result.errors.append(f"browse({folderPath}): {exc}")
return
for entry in entries:
if result.indexed + result.skippedDuplicate >= limits.maxItems:
return
if result.bytesProcessed >= limits.maxBytes:
return
entryPath = getattr(entry, "path", "") or ""
metadata = getattr(entry, "metadata", {}) or {}
mimeType = getattr(entry, "mimeType", None) or metadata.get("mimeType")
if getattr(entry, "isFolder", False) or mimeType == FOLDER_MIME:
await _walkFolder(
adapter=adapter,
knowledgeService=knowledgeService,
runExtractionFn=runExtractionFn,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderPath=entryPath,
depth=depth + 1,
limits=limits,
result=result,
progressCb=progressCb,
)
continue
effectiveMime = mimeType or "application/octet-stream"
if any(effectiveMime.startswith(prefix) for prefix in limits.skipMimePrefixes):
result.skippedPolicy += 1
continue
size = int(getattr(entry, "size", 0) or 0)
if size and size > limits.maxFileSize:
result.skippedPolicy += 1
continue
modifiedTime = metadata.get("modifiedTime")
if not _isRecent(modifiedTime, limits.maxAgeDays):
result.skippedPolicy += 1
continue
externalItemId = metadata.get("id") or entryPath
revision = modifiedTime
await _ingestOne(
adapter=adapter,
knowledgeService=knowledgeService,
runExtractionFn=runExtractionFn,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
entry=entry,
entryPath=entryPath,
mimeType=effectiveMime,
externalItemId=externalItemId,
revision=revision,
limits=limits,
result=result,
progressCb=progressCb,
)
async def _ingestOne(
*,
adapter,
knowledgeService,
runExtractionFn,
connectionId: str,
mandateId: str,
userId: str,
entry,
entryPath: str,
mimeType: str,
externalItemId: str,
revision: Optional[str],
limits: GdriveBootstrapLimits,
result: GdriveBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
syntheticFileId = _syntheticFileId(connectionId, externalItemId)
fileName = getattr(entry, "name", "") or externalItemId
try:
downloaded = await adapter.download(entryPath)
except Exception as exc:
logger.warning("gdrive download %s failed: %s", entryPath, exc)
result.failed += 1
result.errors.append(f"download({entryPath}): {exc}")
return
# Adapter.download returns raw bytes today; guard DownloadResult shape too.
fileBytes: bytes
if isinstance(downloaded, (bytes, bytearray)):
fileBytes = bytes(downloaded)
else:
fileBytes = bytes(getattr(downloaded, "data", b"") or b"")
if getattr(downloaded, "mimeType", None):
mimeType = downloaded.mimeType # export may have changed the type
if not fileBytes:
result.failed += 1
return
if len(fileBytes) > limits.maxFileSize:
result.skippedPolicy += 1
return
result.bytesProcessed += len(fileBytes)
try:
extracted = runExtractionFn(
fileBytes, fileName, mimeType,
ExtractionOptions(mergeStrategy=None),
)
except Exception as exc:
logger.warning("gdrive extraction %s failed: %s", entryPath, exc)
result.failed += 1
result.errors.append(f"extract({entryPath}): {exc}")
return
contentObjects = _toContentObjects(extracted, fileName)
if not contentObjects:
result.skippedPolicy += 1
return
try:
handle = await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="gdrive_item",
sourceId=syntheticFileId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=revision,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"authority": "google",
"service": "drive",
"externalItemId": externalItemId,
"entryPath": entryPath,
"tier": "body",
},
)
)
except Exception as exc:
logger.error("gdrive ingestion %s failed: %s", entryPath, exc, exc_info=True)
result.failed += 1
result.errors.append(f"ingest({entryPath}): {exc}")
return
if handle.status == "duplicate":
result.skippedDuplicate += 1
elif handle.status == "indexed":
result.indexed += 1
else:
result.failed += 1
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
processed = result.indexed + result.skippedDuplicate
try:
progressCb(
min(90, 10 + int(80 * processed / max(1, limits.maxItems))),
f"gdrive processed={processed}",
)
except Exception:
pass
logger.info(
"ingestion.connection.bootstrap.progress part=gdrive processed=%d skippedDup=%d failed=%d",
processed, result.skippedDuplicate, result.failed,
extra={
"event": "ingestion.connection.bootstrap.progress",
"part": "gdrive",
"connectionId": connectionId,
"processed": processed,
"skippedDup": result.skippedDuplicate,
"failed": result.failed,
},
)
def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
durationMs = int((time.time() - startMs) * 1000)
logger.info(
"ingestion.connection.bootstrap.done part=gdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d bytes=%d durationMs=%d",
connectionId,
result.indexed, result.skippedDuplicate, result.skippedPolicy,
result.failed, result.bytesProcessed, durationMs,
extra={
"event": "ingestion.connection.bootstrap.done",
"part": "gdrive",
"connectionId": connectionId,
"indexed": result.indexed,
"skippedDup": result.skippedDuplicate,
"skippedPolicy": result.skippedPolicy,
"failed": result.failed,
"bytes": result.bytesProcessed,
"durationMs": durationMs,
},
)
return {
"connectionId": result.connectionId,
"indexed": result.indexed,
"skippedDuplicate": result.skippedDuplicate,
"skippedPolicy": result.skippedPolicy,
"failed": result.failed,
"bytesProcessed": result.bytesProcessed,
"durationMs": durationMs,
"errors": result.errors[:20],
}

View file

@ -0,0 +1,606 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Gmail bootstrap for the unified knowledge ingestion lane.
Mirrors the Outlook pilot (see subConnectorSyncOutlook.py) but talks to Google
Mail's REST API. Messages become `sourceKind="gmail_message"` virtual documents
with header / snippet / cleaned body content-objects; attachments are optional
child jobs with `sourceKind="gmail_attachment"`.
Idempotency: Gmail's stable `historyId` (or `internalDate` as fallback) is
passed as `contentVersion`, so rerunning the bootstrap yields
`ingestion.skipped.duplicate` for unchanged messages.
"""
from __future__ import annotations
import asyncio
import base64
import hashlib
import logging
import time
from dataclasses import dataclass, field
from datetime import datetime, timedelta, timezone
from typing import Any, Callable, Dict, List, Optional
from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
logger = logging.getLogger(__name__)
MAX_MESSAGES_DEFAULT = 500
MAX_BODY_CHARS_DEFAULT = 8000
MAX_ATTACHMENT_BYTES_DEFAULT = 10 * 1024 * 1024
DEFAULT_LABELS = ("INBOX", "SENT")
@dataclass
class GmailBootstrapLimits:
maxMessages: int = MAX_MESSAGES_DEFAULT
labels: tuple = DEFAULT_LABELS
maxBodyChars: int = MAX_BODY_CHARS_DEFAULT
includeAttachments: bool = False
maxAttachmentBytes: int = MAX_ATTACHMENT_BYTES_DEFAULT
# Only fetch messages newer than N days. None disables filter.
maxAgeDays: Optional[int] = 90
# Content depth: "metadata" | "snippet" | "full"
mailContentDepth: str = "full"
# Pass-through to IngestionJob.neutralize
neutralize: bool = False
@dataclass
class GmailBootstrapResult:
connectionId: str
indexed: int = 0
skippedDuplicate: int = 0
skippedPolicy: int = 0
failed: int = 0
attachmentsIndexed: int = 0
errors: List[str] = field(default_factory=list)
def _syntheticMessageId(connectionId: str, messageId: str) -> str:
token = hashlib.sha256(f"{connectionId}:{messageId}".encode("utf-8")).hexdigest()[:16]
return f"gm:{connectionId[:8]}:{token}"
def _syntheticAttachmentId(connectionId: str, messageId: str, attachmentId: str) -> str:
token = hashlib.sha256(
f"{connectionId}:{messageId}:{attachmentId}".encode("utf-8")
).hexdigest()[:16]
return f"ga:{connectionId[:8]}:{token}"
def _decodeBase64Url(data: str) -> bytes:
if not data:
return b""
# Gmail uses URL-safe base64 without padding.
padding = 4 - (len(data) % 4)
if padding != 4:
data = data + ("=" * padding)
try:
return base64.urlsafe_b64decode(data)
except Exception:
return b""
def _walkPayloadForBody(payload: Dict[str, Any]) -> Dict[str, str]:
"""Return {"text": ..., "html": ...} by walking MIME parts.
Gmail `payload` is a tree of parts. We prefer `text/plain` for the cleaned
body, but capture `text/html` as a fallback so `cleanEmailBody` can strip
markup if plain is missing.
"""
found: Dict[str, str] = {"text": "", "html": ""}
def _walk(part: Dict[str, Any]) -> None:
mime = (part.get("mimeType") or "").lower()
body = part.get("body") or {}
raw = body.get("data") or ""
if raw and mime.startswith("text/"):
decoded = _decodeBase64Url(raw).decode("utf-8", errors="replace")
key = "text" if mime == "text/plain" else ("html" if mime == "text/html" else "")
if key and not found[key]:
found[key] = decoded
for sub in part.get("parts") or []:
_walk(sub)
_walk(payload or {})
return found
def _headerMap(payload: Dict[str, Any]) -> Dict[str, str]:
return {
(h.get("name") or "").lower(): (h.get("value") or "")
for h in (payload.get("headers") or [])
}
def _buildContentObjects(
message: Dict[str, Any],
maxBodyChars: int,
mailContentDepth: str = "full",
) -> List[Dict[str, Any]]:
"""Build content objects for a Gmail message.
`mailContentDepth` controls how much is embedded:
- "metadata": header only (subject, from, to, date)
- "snippet": header + Gmail snippet (~155 chars, no full body)
- "full": header + snippet + cleaned full body (default)
"""
payload = message.get("payload") or {}
headers = _headerMap(payload)
subject = headers.get("subject") or "(no subject)"
fromAddr = headers.get("from") or ""
toAddr = headers.get("to") or ""
ccAddr = headers.get("cc") or ""
date = headers.get("date") or ""
snippet = message.get("snippet") or ""
parts: List[Dict[str, Any]] = []
header = (
f"Subject: {subject}\n"
f"From: {fromAddr}\n"
f"To: {toAddr}\n"
+ (f"Cc: {ccAddr}\n" if ccAddr else "")
+ f"Date: {date}"
)
parts.append({
"contentObjectId": "header",
"contentType": "text",
"data": header,
"contextRef": {"part": "header"},
})
if mailContentDepth in ("snippet", "full") and snippet:
parts.append({
"contentObjectId": "snippet",
"contentType": "text",
"data": snippet,
"contextRef": {"part": "snippet"},
})
if mailContentDepth == "full":
bodies = _walkPayloadForBody(payload)
rawBody = bodies["text"] or bodies["html"]
cleanedBody = cleanEmailBody(rawBody, maxChars=maxBodyChars) if rawBody else ""
if cleanedBody:
parts.append({
"contentObjectId": "body",
"contentType": "text",
"data": cleanedBody,
"contextRef": {"part": "body"},
})
return parts
async def bootstrapGmail(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[GmailBootstrapLimits] = None,
googleGetFn: Optional[Callable[..., Any]] = None,
) -> Dict[str, Any]:
"""Enumerate Gmail labels (INBOX + SENT default) and ingest messages."""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
if not limits:
limits = GmailBootstrapLimits(
includeAttachments=prefs.mailIndexAttachments,
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
mailContentDepth=prefs.mailContentDepth,
neutralize=prefs.neutralizeBeforeEmbed,
)
startMs = time.time()
result = GmailBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=gmail connectionId=%s",
connectionId,
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "gmail",
"connectionId": connectionId,
},
)
if adapter is None or knowledgeService is None or connection is None:
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
if googleGetFn is None:
from modules.connectors.providerGoogle.connectorGoogle import _googleGet as _defaultGet
token = getattr(adapter, "_token", "")
async def googleGetFn(url: str) -> Dict[str, Any]: # type: ignore[no-redef]
return await _defaultGet(token, url)
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
for labelId in limits.labels:
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
break
try:
await _ingestLabel(
googleGetFn=googleGetFn,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
labelId=labelId,
limits=limits,
result=result,
progressCb=progressCb,
)
except Exception as exc:
logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
result.errors.append(f"label({labelId}): {exc}")
return _finalizeResult(connectionId, result, startMs)
async def _resolveDependencies(connectionId: str):
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.auth import TokenManager
from modules.connectors.providerGoogle.connectorGoogle import GoogleConnector
from modules.serviceCenter import getService
from modules.serviceCenter.context import ServiceCenterContext
from modules.security.rootAccess import getRootUser
rootInterface = getRootInterface()
connection = rootInterface.getUserConnectionById(connectionId)
if connection is None:
raise ValueError(f"UserConnection not found: {connectionId}")
token = TokenManager().getFreshToken(connectionId)
if not token or not token.tokenAccess:
raise ValueError(f"No valid token for connection {connectionId}")
provider = GoogleConnector(connection, token.tokenAccess)
adapter = provider.getServiceAdapter("gmail")
rootUser = getRootUser()
ctx = ServiceCenterContext(
user=rootUser,
mandate_id=str(getattr(connection, "mandateId", "") or ""),
)
knowledgeService = getService("knowledge", ctx)
return adapter, connection, knowledgeService
async def _ingestLabel(
*,
googleGetFn,
knowledgeService,
connectionId: str,
mandateId: str,
userId: str,
labelId: str,
limits: GmailBootstrapLimits,
result: GmailBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
) -> None:
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
if remaining <= 0:
return
pageSize = min(100, remaining)
query = ""
if limits.maxAgeDays:
cutoff = datetime.now(timezone.utc) - timedelta(days=limits.maxAgeDays)
# Gmail uses YYYY/MM/DD.
query = f"after:{cutoff.strftime('%Y/%m/%d')}"
baseUrl = (
"https://gmail.googleapis.com/gmail/v1/users/me/messages"
f"?labelIds={labelId}&maxResults={pageSize}"
)
if query:
baseUrl = f"{baseUrl}&q={query}"
nextPageToken: Optional[str] = None
while (result.indexed + result.skippedDuplicate) < limits.maxMessages:
url = baseUrl if not nextPageToken else f"{baseUrl}&pageToken={nextPageToken}"
page = await googleGetFn(url)
if not isinstance(page, dict) or "error" in page:
err = (page or {}).get("error") if isinstance(page, dict) else "unknown"
logger.warning("gmail list page error for label %s: %s", labelId, err)
result.errors.append(f"list({labelId}): {err}")
return
messageStubs = page.get("messages") or []
for stub in messageStubs:
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
break
msgId = stub.get("id")
if not msgId:
continue
detailUrl = (
f"https://gmail.googleapis.com/gmail/v1/users/me/messages/{msgId}?format=full"
)
detail = await googleGetFn(detailUrl)
if not isinstance(detail, dict) or "error" in detail:
result.failed += 1
continue
await _ingestMessage(
googleGetFn=googleGetFn,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
labelId=labelId,
message=detail,
limits=limits,
result=result,
progressCb=progressCb,
)
nextPageToken = page.get("nextPageToken")
if not nextPageToken:
break
async def _ingestMessage(
*,
googleGetFn,
knowledgeService,
connectionId: str,
mandateId: str,
userId: str,
labelId: str,
message: Dict[str, Any],
limits: GmailBootstrapLimits,
result: GmailBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
messageId = message.get("id")
if not messageId:
result.skippedPolicy += 1
return
revision = message.get("historyId") or message.get("internalDate")
headers = _headerMap(message.get("payload") or {})
subject = headers.get("subject") or "(no subject)"
syntheticId = _syntheticMessageId(connectionId, messageId)
fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml"
contentObjects = _buildContentObjects(
message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth
)
try:
handle = await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="gmail_message",
sourceId=syntheticId,
fileName=fileName,
mimeType="message/rfc822",
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=str(revision) if revision else None,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"authority": "google",
"service": "gmail",
"externalItemId": messageId,
"label": labelId,
"threadId": message.get("threadId"),
"tier": limits.mailContentDepth,
},
)
)
except Exception as exc:
logger.error("gmail ingestion %s failed: %s", messageId, exc, exc_info=True)
result.failed += 1
result.errors.append(f"ingest({messageId}): {exc}")
return
if handle.status == "duplicate":
result.skippedDuplicate += 1
elif handle.status == "indexed":
result.indexed += 1
else:
result.failed += 1
if limits.includeAttachments:
try:
await _ingestAttachments(
googleGetFn=googleGetFn,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
message=message,
parentSyntheticId=syntheticId,
limits=limits,
result=result,
)
except Exception as exc:
logger.warning("gmail attachments %s failed: %s", messageId, exc)
result.errors.append(f"attachments({messageId}): {exc}")
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
processed = result.indexed + result.skippedDuplicate
try:
progressCb(
min(90, 10 + int(80 * processed / max(1, limits.maxMessages))),
f"gmail processed={processed}",
)
except Exception:
pass
logger.info(
"ingestion.connection.bootstrap.progress part=gmail processed=%d skippedDup=%d failed=%d",
processed, result.skippedDuplicate, result.failed,
extra={
"event": "ingestion.connection.bootstrap.progress",
"part": "gmail",
"connectionId": connectionId,
"processed": processed,
"skippedDup": result.skippedDuplicate,
"failed": result.failed,
},
)
await asyncio.sleep(0)
async def _ingestAttachments(
*,
googleGetFn,
knowledgeService,
connectionId: str,
mandateId: str,
userId: str,
message: Dict[str, Any],
parentSyntheticId: str,
limits: GmailBootstrapLimits,
result: GmailBootstrapResult,
) -> None:
"""Child ingestion jobs for file attachments. Skips inline images (cid: refs)."""
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
from modules.datamodels.datamodelExtraction import ExtractionOptions
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
ExtractorRegistry, ChunkerRegistry,
)
messageId = message.get("id") or ""
def _collectAttachmentStubs(part: Dict[str, Any], acc: List[Dict[str, Any]]) -> None:
filename = part.get("filename") or ""
body = part.get("body") or {}
attId = body.get("attachmentId")
if filename and attId:
acc.append({
"filename": filename,
"mimeType": part.get("mimeType") or "application/octet-stream",
"attachmentId": attId,
"size": int(body.get("size") or 0),
})
for sub in part.get("parts") or []:
_collectAttachmentStubs(sub, acc)
stubs: List[Dict[str, Any]] = []
_collectAttachmentStubs(message.get("payload") or {}, stubs)
if not stubs:
return
extractorRegistry = ExtractorRegistry()
chunkerRegistry = ChunkerRegistry()
for stub in stubs:
if stub["size"] and stub["size"] > limits.maxAttachmentBytes:
result.skippedPolicy += 1
continue
attUrl = (
f"https://gmail.googleapis.com/gmail/v1/users/me/messages/{messageId}"
f"/attachments/{stub['attachmentId']}"
)
detail = await googleGetFn(attUrl)
if not isinstance(detail, dict) or "error" in detail:
result.failed += 1
continue
rawBytes = _decodeBase64Url(detail.get("data") or "")
if not rawBytes:
continue
fileName = stub["filename"]
mimeType = stub["mimeType"]
syntheticId = _syntheticAttachmentId(connectionId, messageId, stub["attachmentId"])
try:
extracted = runExtraction(
extractorRegistry, chunkerRegistry,
rawBytes, fileName, mimeType,
ExtractionOptions(mergeStrategy=None),
)
except Exception as exc:
logger.warning("gmail attachment extract %s failed: %s", stub["attachmentId"], exc)
result.failed += 1
continue
contentObjects: List[Dict[str, Any]] = []
for part in getattr(extracted, "parts", None) or []:
data = getattr(part, "data", None) or ""
if not data or not str(data).strip():
continue
typeGroup = getattr(part, "typeGroup", "text") or "text"
contentType = "text"
if typeGroup == "image":
contentType = "image"
elif typeGroup in ("binary", "container"):
contentType = "other"
contentObjects.append({
"contentObjectId": getattr(part, "id", ""),
"contentType": contentType,
"data": data,
"contextRef": {
"containerPath": fileName,
"location": getattr(part, "label", None) or "attachment",
**(getattr(part, "metadata", None) or {}),
},
})
if not contentObjects:
result.skippedPolicy += 1
continue
try:
await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="gmail_attachment",
sourceId=syntheticId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
provenance={
"connectionId": connectionId,
"authority": "google",
"service": "gmail",
"parentId": parentSyntheticId,
"externalItemId": stub["attachmentId"],
"parentMessageId": messageId,
},
)
)
result.attachmentsIndexed += 1
except Exception as exc:
logger.warning("gmail attachment ingest %s failed: %s", stub["attachmentId"], exc)
result.failed += 1
def _finalizeResult(connectionId: str, result: GmailBootstrapResult, startMs: float) -> Dict[str, Any]:
durationMs = int((time.time() - startMs) * 1000)
logger.info(
"ingestion.connection.bootstrap.done part=gmail connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d attachments=%d failed=%d durationMs=%d",
connectionId,
result.indexed, result.skippedDuplicate, result.skippedPolicy,
result.attachmentsIndexed, result.failed, durationMs,
extra={
"event": "ingestion.connection.bootstrap.done",
"part": "gmail",
"connectionId": connectionId,
"indexed": result.indexed,
"skippedDup": result.skippedDuplicate,
"skippedPolicy": result.skippedPolicy,
"attachmentsIndexed": result.attachmentsIndexed,
"failed": result.failed,
"durationMs": durationMs,
},
)
return {
"connectionId": result.connectionId,
"indexed": result.indexed,
"skippedDuplicate": result.skippedDuplicate,
"skippedPolicy": result.skippedPolicy,
"attachmentsIndexed": result.attachmentsIndexed,
"failed": result.failed,
"durationMs": durationMs,
"errors": result.errors[:20],
}

View file

@ -0,0 +1,576 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Outlook bootstrap for the unified knowledge ingestion lane.
Unlike SharePoint, Outlook messages are "virtual documents" we never persist
file bytes in the store. Each message becomes a `sourceKind="outlook_message"`
IngestionJob whose `contentObjects` carry the header, snippet and cleaned body
so retrieval can show a compact answer without fetching Graph again.
Attachments are optional (`includeAttachments` limit flag) and enqueued as
child jobs with `sourceKind="outlook_attachment"` + `provenance.parentId`.
"""
from __future__ import annotations
import asyncio
import hashlib
import logging
import time
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional
from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
logger = logging.getLogger(__name__)
MAX_MESSAGES_DEFAULT = 500
MAX_FOLDERS_DEFAULT = 5
MAX_BODY_CHARS_DEFAULT = 8000
MAX_ATTACHMENT_BYTES_DEFAULT = 10 * 1024 * 1024
WELL_KNOWN_FOLDERS = ("inbox", "sentitems")
@dataclass
class OutlookBootstrapLimits:
maxMessages: int = MAX_MESSAGES_DEFAULT
maxFolders: int = MAX_FOLDERS_DEFAULT
maxBodyChars: int = MAX_BODY_CHARS_DEFAULT
includeAttachments: bool = False
maxAttachmentBytes: int = MAX_ATTACHMENT_BYTES_DEFAULT
# Only fetch messages newer than N days. None disables filter.
maxAgeDays: Optional[int] = 90
# Content depth: "metadata" | "snippet" | "full"
mailContentDepth: str = "full"
# Pass-through to IngestionJob.neutralize
neutralize: bool = False
@dataclass
class OutlookBootstrapResult:
connectionId: str
indexed: int = 0
skippedDuplicate: int = 0
skippedPolicy: int = 0
failed: int = 0
attachmentsIndexed: int = 0
errors: List[str] = field(default_factory=list)
def _syntheticMessageId(connectionId: str, messageId: str) -> str:
token = hashlib.sha256(f"{connectionId}:{messageId}".encode("utf-8")).hexdigest()[:16]
return f"om:{connectionId[:8]}:{token}"
def _syntheticAttachmentId(connectionId: str, messageId: str, attachmentId: str) -> str:
token = hashlib.sha256(
f"{connectionId}:{messageId}:{attachmentId}".encode("utf-8")
).hexdigest()[:16]
return f"oa:{connectionId[:8]}:{token}"
def _extractRecipient(recipient: Dict[str, Any]) -> str:
email = (recipient or {}).get("emailAddress") or {}
name = email.get("name") or ""
addr = email.get("address") or ""
if name and addr:
return f"{name} <{addr}>"
return addr or name
def _joinRecipients(recipients: List[Dict[str, Any]]) -> str:
return ", ".join(filter(None, [_extractRecipient(r) for r in recipients or []]))
def _buildContentObjects(
message: Dict[str, Any],
maxBodyChars: int,
mailContentDepth: str = "full",
) -> List[Dict[str, Any]]:
"""Build content objects for an Outlook message.
`mailContentDepth` mirrors the Gmail walker:
- "metadata": header only
- "snippet": header + bodyPreview (~255 chars)
- "full": header + snippet + cleaned body (default)
"""
subject = message.get("subject") or "(no subject)"
fromAddr = _extractRecipient(message.get("from") or {})
toAddr = _joinRecipients(message.get("toRecipients") or [])
ccAddr = _joinRecipients(message.get("ccRecipients") or [])
received = message.get("receivedDateTime") or ""
snippet = message.get("bodyPreview") or ""
parts: List[Dict[str, Any]] = []
header = (
f"Subject: {subject}\n"
f"From: {fromAddr}\n"
f"To: {toAddr}\n"
+ (f"Cc: {ccAddr}\n" if ccAddr else "")
+ f"Date: {received}"
)
parts.append({
"contentObjectId": "header",
"contentType": "text",
"data": header,
"contextRef": {"part": "header"},
})
if mailContentDepth in ("snippet", "full") and snippet:
parts.append({
"contentObjectId": "snippet",
"contentType": "text",
"data": snippet,
"contextRef": {"part": "snippet"},
})
if mailContentDepth == "full":
body = message.get("body") or {}
bodyContent = body.get("content") or ""
cleanedBody = cleanEmailBody(bodyContent, maxChars=maxBodyChars) if bodyContent else ""
if cleanedBody:
parts.append({
"contentObjectId": "body",
"contentType": "text",
"data": cleanedBody,
"contextRef": {"part": "body"},
})
return parts
async def bootstrapOutlook(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[OutlookBootstrapLimits] = None,
) -> Dict[str, Any]:
"""Enumerate Outlook folders (inbox + sent by default) and ingest messages."""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
if not limits:
limits = OutlookBootstrapLimits(
includeAttachments=prefs.mailIndexAttachments,
maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
mailContentDepth=prefs.mailContentDepth,
neutralize=prefs.neutralizeBeforeEmbed,
)
startMs = time.time()
result = OutlookBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=outlook connectionId=%s",
connectionId,
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "outlook",
"connectionId": connectionId,
},
)
if adapter is None or knowledgeService is None or connection is None:
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
folderIds = await _selectFolderIds(adapter, limits)
for folderId in folderIds:
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
break
try:
await _ingestFolder(
adapter=adapter,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderId=folderId,
limits=limits,
result=result,
progressCb=progressCb,
)
except Exception as exc:
logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
result.errors.append(f"folder({folderId}): {exc}")
return _finalizeResult(connectionId, result, startMs)
async def _resolveDependencies(connectionId: str):
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.auth import TokenManager
from modules.connectors.providerMsft.connectorMsft import MsftConnector
from modules.serviceCenter import getService
from modules.serviceCenter.context import ServiceCenterContext
from modules.security.rootAccess import getRootUser
rootInterface = getRootInterface()
connection = rootInterface.getUserConnectionById(connectionId)
if connection is None:
raise ValueError(f"UserConnection not found: {connectionId}")
token = TokenManager().getFreshToken(connectionId)
if not token or not token.tokenAccess:
raise ValueError(f"No valid token for connection {connectionId}")
provider = MsftConnector(connection, token.tokenAccess)
adapter = provider.getServiceAdapter("outlook")
rootUser = getRootUser()
ctx = ServiceCenterContext(
user=rootUser,
mandate_id=str(getattr(connection, "mandateId", "") or ""),
)
knowledgeService = getService("knowledge", ctx)
return adapter, connection, knowledgeService
async def _selectFolderIds(adapter, limits: OutlookBootstrapLimits) -> List[str]:
"""Prefer well-known folders (inbox, sentitems); fall back to browse()."""
folderIds: List[str] = []
for wellKnown in WELL_KNOWN_FOLDERS:
if len(folderIds) >= limits.maxFolders:
break
try:
row = await adapter._graphGet(f"me/mailFolders/{wellKnown}")
except Exception:
row = None
if isinstance(row, dict) and "error" not in row and row.get("id"):
folderIds.append(row["id"])
if len(folderIds) < limits.maxFolders:
try:
entries = await adapter.browse("/")
except Exception:
entries = []
for entry in entries:
metadata = getattr(entry, "metadata", {}) or {}
fid = metadata.get("id")
if fid and fid not in folderIds:
folderIds.append(fid)
if len(folderIds) >= limits.maxFolders:
break
return folderIds
async def _ingestFolder(
*,
adapter,
knowledgeService,
connectionId: str,
mandateId: str,
userId: str,
folderId: str,
limits: OutlookBootstrapLimits,
result: OutlookBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
) -> None:
remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
if remaining <= 0:
return
pageSize = min(100, remaining)
select = (
"id,subject,from,toRecipients,ccRecipients,receivedDateTime,"
"bodyPreview,body,internetMessageId,hasAttachments,changeKey"
)
endpoint: Optional[str] = (
f"me/mailFolders/{folderId}/messages"
f"?$top={pageSize}&$orderby=receivedDateTime desc&$select={select}"
)
# Keep header-based age filter in Graph itself to avoid shipping ancient
# messages we'd discard client-side.
if limits.maxAgeDays:
from datetime import datetime, timezone, timedelta
cutoff = datetime.now(timezone.utc) - timedelta(days=limits.maxAgeDays)
cutoffIso = cutoff.strftime("%Y-%m-%dT%H:%M:%SZ")
endpoint = f"{endpoint}&$filter=receivedDateTime ge {cutoffIso}"
while endpoint and (result.indexed + result.skippedDuplicate) < limits.maxMessages:
try:
page = await adapter._graphGet(endpoint)
except Exception as exc:
logger.warning("outlook graph page failed for folder %s: %s", folderId, exc)
result.errors.append(f"graph({folderId}): {exc}")
return
if not isinstance(page, dict) or "error" in page:
err = (page or {}).get("error") if isinstance(page, dict) else "unknown"
logger.warning("outlook graph page error for folder %s: %s", folderId, err)
result.errors.append(f"graph({folderId}): {err}")
return
for message in page.get("value", []) or []:
if result.indexed + result.skippedDuplicate >= limits.maxMessages:
break
await _ingestMessage(
adapter=adapter,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
message=message,
limits=limits,
result=result,
progressCb=progressCb,
)
nextLink = page.get("@odata.nextLink")
if not nextLink:
break
# Strip Graph base so adapter._graphGet accepts the relative path.
from modules.connectors.providerMsft.connectorMsft import _stripGraphBase
endpoint = _stripGraphBase(nextLink)
async def _ingestMessage(
*,
adapter,
knowledgeService,
connectionId: str,
mandateId: str,
userId: str,
message: Dict[str, Any],
limits: OutlookBootstrapLimits,
result: OutlookBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
messageId = message.get("id")
if not messageId:
result.skippedPolicy += 1
return
revision = message.get("changeKey") or message.get("internetMessageId")
subject = message.get("subject") or "(no subject)"
syntheticId = _syntheticMessageId(connectionId, messageId)
fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml"
contentObjects = _buildContentObjects(
message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth
)
# Always at least the header is emitted, so `contentObjects` is non-empty.
try:
handle = await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="outlook_message",
sourceId=syntheticId,
fileName=fileName,
mimeType="message/rfc822",
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=revision,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"authority": "msft",
"service": "outlook",
"externalItemId": messageId,
"internetMessageId": message.get("internetMessageId"),
"tier": limits.mailContentDepth,
},
)
)
except Exception as exc:
logger.error("outlook ingestion %s failed: %s", messageId, exc, exc_info=True)
result.failed += 1
result.errors.append(f"ingest({messageId}): {exc}")
return
if handle.status == "duplicate":
result.skippedDuplicate += 1
elif handle.status == "indexed":
result.indexed += 1
else:
result.failed += 1
if limits.includeAttachments and message.get("hasAttachments"):
try:
await _ingestAttachments(
adapter=adapter,
knowledgeService=knowledgeService,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
messageId=messageId,
parentSyntheticId=syntheticId,
limits=limits,
result=result,
)
except Exception as exc:
logger.warning("outlook attachments %s failed: %s", messageId, exc)
result.errors.append(f"attachments({messageId}): {exc}")
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
processed = result.indexed + result.skippedDuplicate
try:
progressCb(
min(90, 10 + int(80 * processed / max(1, limits.maxMessages))),
f"outlook processed={processed}",
)
except Exception:
pass
logger.info(
"ingestion.connection.bootstrap.progress part=outlook processed=%d skippedDup=%d failed=%d",
processed, result.skippedDuplicate, result.failed,
extra={
"event": "ingestion.connection.bootstrap.progress",
"part": "outlook",
"connectionId": connectionId,
"processed": processed,
"skippedDup": result.skippedDuplicate,
"failed": result.failed,
},
)
await asyncio.sleep(0)
async def _ingestAttachments(
*,
adapter,
knowledgeService,
connectionId: str,
mandateId: str,
userId: str,
messageId: str,
parentSyntheticId: str,
limits: OutlookBootstrapLimits,
result: OutlookBootstrapResult,
) -> None:
"""Child ingestion jobs for file attachments (skip inline & oversized)."""
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
from modules.datamodels.datamodelExtraction import ExtractionOptions
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
ExtractorRegistry, ChunkerRegistry,
)
import base64
page = await adapter._graphGet(f"me/messages/{messageId}/attachments")
if not isinstance(page, dict) or "error" in page:
return
extractorRegistry = ExtractorRegistry()
chunkerRegistry = ChunkerRegistry()
for attachment in page.get("value", []) or []:
if attachment.get("@odata.type") != "#microsoft.graph.fileAttachment":
continue
if attachment.get("isInline"):
continue
size = int(attachment.get("size") or 0)
if size and size > limits.maxAttachmentBytes:
result.skippedPolicy += 1
continue
contentBytesB64 = attachment.get("contentBytes")
if not contentBytesB64:
continue
try:
rawBytes = base64.b64decode(contentBytesB64)
except Exception:
result.skippedPolicy += 1
continue
fileName = attachment.get("name") or "attachment"
mimeType = attachment.get("contentType") or "application/octet-stream"
attachmentId = attachment.get("id") or fileName
syntheticId = _syntheticAttachmentId(connectionId, messageId, attachmentId)
try:
extracted = runExtraction(
extractorRegistry, chunkerRegistry,
rawBytes, fileName, mimeType,
ExtractionOptions(mergeStrategy=None),
)
except Exception as exc:
logger.warning("outlook attachment extract %s failed: %s", attachmentId, exc)
result.failed += 1
continue
contentObjects: List[Dict[str, Any]] = []
for part in getattr(extracted, "parts", None) or []:
data = getattr(part, "data", None) or ""
if not data or not str(data).strip():
continue
typeGroup = getattr(part, "typeGroup", "text") or "text"
contentType = "text"
if typeGroup == "image":
contentType = "image"
elif typeGroup in ("binary", "container"):
contentType = "other"
contentObjects.append({
"contentObjectId": getattr(part, "id", ""),
"contentType": contentType,
"data": data,
"contextRef": {
"containerPath": fileName,
"location": getattr(part, "label", None) or "attachment",
**(getattr(part, "metadata", None) or {}),
},
})
if not contentObjects:
result.skippedPolicy += 1
continue
try:
await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="outlook_attachment",
sourceId=syntheticId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
neutralize=limits.neutralize,
provenance={
"connectionId": connectionId,
"authority": "msft",
"service": "outlook",
"parentId": parentSyntheticId,
"externalItemId": attachmentId,
"parentMessageId": messageId,
},
)
)
result.attachmentsIndexed += 1
except Exception as exc:
logger.warning("outlook attachment ingest %s failed: %s", attachmentId, exc)
result.failed += 1
def _finalizeResult(connectionId: str, result: OutlookBootstrapResult, startMs: float) -> Dict[str, Any]:
durationMs = int((time.time() - startMs) * 1000)
logger.info(
"ingestion.connection.bootstrap.done part=outlook connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d attachments=%d failed=%d durationMs=%d",
connectionId,
result.indexed, result.skippedDuplicate, result.skippedPolicy,
result.attachmentsIndexed, result.failed, durationMs,
extra={
"event": "ingestion.connection.bootstrap.done",
"part": "outlook",
"connectionId": connectionId,
"indexed": result.indexed,
"skippedDup": result.skippedDuplicate,
"skippedPolicy": result.skippedPolicy,
"attachmentsIndexed": result.attachmentsIndexed,
"failed": result.failed,
"durationMs": durationMs,
},
)
return {
"connectionId": result.connectionId,
"indexed": result.indexed,
"skippedDuplicate": result.skippedDuplicate,
"skippedPolicy": result.skippedPolicy,
"attachmentsIndexed": result.attachmentsIndexed,
"failed": result.failed,
"durationMs": durationMs,
"errors": result.errors[:20],
}

View file

@ -0,0 +1,433 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""SharePoint bootstrap for the unified knowledge ingestion lane.
Walks the SharePoint drive(s) reachable via a UserConnection, downloads each
file-like item, runs the standard content extraction pipeline and hands the
result to `KnowledgeService.requestIngestion`. Idempotency is provided by the
ingestion façade itself; repeat bootstraps therefore produce
`ingestion.skipped.duplicate` for every unchanged item because we pass the
Graph `eTag` as `contentVersion`.
"""
from __future__ import annotations
import asyncio
import hashlib
import logging
import time
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional
from modules.datamodels.datamodelExtraction import ExtractionOptions
logger = logging.getLogger(__name__)
MAX_ITEMS_DEFAULT = 500
MAX_BYTES_DEFAULT = 200 * 1024 * 1024
MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024
SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
MAX_DEPTH_DEFAULT = 4
MAX_SITES_DEFAULT = 3
@dataclass
class SharepointBootstrapLimits:
maxItems: int = MAX_ITEMS_DEFAULT
maxBytes: int = MAX_BYTES_DEFAULT
maxFileSize: int = MAX_FILE_SIZE_DEFAULT
skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
maxDepth: int = MAX_DEPTH_DEFAULT
maxSites: int = MAX_SITES_DEFAULT
# Pass-through to IngestionJob.neutralize
neutralize: bool = False
@dataclass
class SharepointBootstrapResult:
connectionId: str
indexed: int = 0
skippedDuplicate: int = 0
skippedPolicy: int = 0
failed: int = 0
bytesProcessed: int = 0
errors: List[str] = field(default_factory=list)
def _syntheticFileId(connectionId: str, externalItemId: str) -> str:
"""Deterministic synthetic FileContentIndex id for a SharePoint item.
Stable across bootstraps idempotency works; independent of file name so
moves/renames don't duplicate chunks.
"""
token = hashlib.sha256(f"{connectionId}:{externalItemId}".encode("utf-8")).hexdigest()[:16]
return f"sp:{connectionId[:8]}:{token}"
def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
"""Translate ExtractionResult → content objects accepted by requestIngestion."""
parts = getattr(extracted, "parts", None) or []
out: List[Dict[str, Any]] = []
for part in parts:
data = getattr(part, "data", None) or ""
if not data or not str(data).strip():
continue
typeGroup = getattr(part, "typeGroup", "text") or "text"
contentType = "text"
if typeGroup == "image":
contentType = "image"
elif typeGroup in ("binary", "container"):
contentType = "other"
out.append({
"contentObjectId": getattr(part, "id", ""),
"contentType": contentType,
"data": data,
"contextRef": {
"containerPath": fileName,
"location": getattr(part, "label", None) or "file",
**(getattr(part, "metadata", None) or {}),
},
})
return out
async def bootstrapSharepoint(
connectionId: str,
*,
progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
adapter: Any = None,
connection: Any = None,
knowledgeService: Any = None,
limits: Optional[SharepointBootstrapLimits] = None,
runExtractionFn: Optional[Callable[..., Any]] = None,
) -> Dict[str, Any]:
"""Enumerate SharePoint drives and ingest every reachable file via the façade.
Parameters allow injection for tests; production callers pass only
`connectionId` (and optionally a progressCb) and everything else is
resolved against the registered services.
"""
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
prefs = loadConnectionPrefs(connectionId)
if not limits:
limits = SharepointBootstrapLimits(neutralize=prefs.neutralizeBeforeEmbed)
startMs = time.time()
result = SharepointBootstrapResult(connectionId=connectionId)
logger.info(
"ingestion.connection.bootstrap.started part=sharepoint connectionId=%s",
connectionId,
extra={
"event": "ingestion.connection.bootstrap.started",
"part": "sharepoint",
"connectionId": connectionId,
},
)
if adapter is None or knowledgeService is None or connection is None:
adapter, connection, knowledgeService = await _resolveDependencies(connectionId)
if runExtractionFn is None:
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
ExtractorRegistry, ChunkerRegistry,
)
extractorRegistry = ExtractorRegistry()
chunkerRegistry = ChunkerRegistry()
def runExtractionFn(bytesData, name, mime, options): # type: ignore[no-redef]
return runExtraction(extractorRegistry, chunkerRegistry, bytesData, name, mime, options)
mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
try:
sites = await adapter.browse("/", limit=limits.maxSites)
except Exception as exc:
logger.error("sharepoint site discovery failed for %s: %s", connectionId, exc, exc_info=True)
result.errors.append(f"site_discovery: {exc}")
return _finalizeResult(connectionId, result, startMs)
for site in sites[: limits.maxSites]:
if result.indexed + result.skippedDuplicate >= limits.maxItems:
break
sitePath = getattr(site, "path", "") or ""
try:
await _walkFolder(
adapter=adapter,
knowledgeService=knowledgeService,
runExtractionFn=runExtractionFn,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderPath=sitePath,
depth=0,
limits=limits,
result=result,
progressCb=progressCb,
)
except Exception as exc:
logger.error("sharepoint walk failed for site %s: %s", sitePath, exc, exc_info=True)
result.errors.append(f"walk({sitePath}): {exc}")
return _finalizeResult(connectionId, result, startMs)
async def _resolveDependencies(connectionId: str):
"""Load connection, instantiate SharepointAdapter, and build a KnowledgeService.
Runs with root privileges: bootstrap is a system operation triggered by an
authenticated user via callback; it must not be gated by a per-user
service-center context.
"""
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.auth import TokenManager
from modules.connectors.providerMsft.connectorMsft import MsftConnector
from modules.serviceCenter import getService
from modules.serviceCenter.context import ServiceCenterContext
from modules.security.rootAccess import getRootUser
rootInterface = getRootInterface()
connection = rootInterface.getUserConnectionById(connectionId)
if connection is None:
raise ValueError(f"UserConnection not found: {connectionId}")
token = TokenManager().getFreshToken(connectionId)
if not token or not token.tokenAccess:
raise ValueError(f"No valid token for connection {connectionId}")
provider = MsftConnector(connection, token.tokenAccess)
adapter = provider.getServiceAdapter("sharepoint")
rootUser = getRootUser()
ctx = ServiceCenterContext(
user=rootUser,
mandate_id=str(getattr(connection, "mandateId", "") or ""),
)
knowledgeService = getService("knowledge", ctx)
return adapter, connection, knowledgeService
async def _walkFolder(
*,
adapter,
knowledgeService,
runExtractionFn,
connectionId: str,
mandateId: str,
userId: str,
folderPath: str,
depth: int,
limits: SharepointBootstrapLimits,
result: SharepointBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
) -> None:
if depth > limits.maxDepth:
return
try:
entries = await adapter.browse(folderPath)
except Exception as exc:
logger.warning("sharepoint browse %s failed: %s", folderPath, exc)
result.errors.append(f"browse({folderPath}): {exc}")
return
for entry in entries:
if result.indexed + result.skippedDuplicate >= limits.maxItems:
return
if result.bytesProcessed >= limits.maxBytes:
return
entryPath = getattr(entry, "path", "") or ""
if getattr(entry, "isFolder", False):
await _walkFolder(
adapter=adapter,
knowledgeService=knowledgeService,
runExtractionFn=runExtractionFn,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
folderPath=entryPath,
depth=depth + 1,
limits=limits,
result=result,
progressCb=progressCb,
)
continue
mimeType = getattr(entry, "mimeType", None) or "application/octet-stream"
if any(mimeType.startswith(prefix) for prefix in limits.skipMimePrefixes):
result.skippedPolicy += 1
continue
size = int(getattr(entry, "size", 0) or 0)
if size and size > limits.maxFileSize:
result.skippedPolicy += 1
continue
metadata = getattr(entry, "metadata", {}) or {}
externalItemId = metadata.get("id") or entryPath
revision = metadata.get("revision") or metadata.get("lastModifiedDateTime")
await _ingestOne(
adapter=adapter,
knowledgeService=knowledgeService,
runExtractionFn=runExtractionFn,
connectionId=connectionId,
mandateId=mandateId,
userId=userId,
entry=entry,
entryPath=entryPath,
mimeType=mimeType,
externalItemId=externalItemId,
revision=revision,
limits=limits,
result=result,
progressCb=progressCb,
)
async def _ingestOne(
*,
adapter,
knowledgeService,
runExtractionFn,
connectionId: str,
mandateId: str,
userId: str,
entry,
entryPath: str,
mimeType: str,
externalItemId: str,
revision: Optional[str],
limits: SharepointBootstrapLimits,
result: SharepointBootstrapResult,
progressCb: Optional[Callable[[int, Optional[str]], None]],
) -> None:
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
syntheticFileId = _syntheticFileId(connectionId, externalItemId)
fileName = getattr(entry, "name", "") or externalItemId
try:
fileBytes = await adapter.download(entryPath)
except Exception as exc:
logger.warning("sharepoint download %s failed: %s", entryPath, exc)
result.failed += 1
result.errors.append(f"download({entryPath}): {exc}")
return
if not fileBytes:
result.failed += 1
return
result.bytesProcessed += len(fileBytes)
try:
extracted = runExtractionFn(
fileBytes, fileName, mimeType,
ExtractionOptions(mergeStrategy=None),
)
except Exception as exc:
logger.warning("sharepoint extraction %s failed: %s", entryPath, exc)
result.failed += 1
result.errors.append(f"extract({entryPath}): {exc}")
return
contentObjects = _toContentObjects(extracted, fileName)
if not contentObjects:
result.skippedPolicy += 1
return
provenance: Dict[str, Any] = {
"connectionId": connectionId,
"authority": "msft",
"service": "sharepoint",
"externalItemId": externalItemId,
"externalPath": entryPath,
"revision": revision,
}
try:
handle = await knowledgeService.requestIngestion(
IngestionJob(
sourceKind="sharepoint_item",
sourceId=syntheticFileId,
fileName=fileName,
mimeType=mimeType,
userId=userId,
mandateId=mandateId,
contentObjects=contentObjects,
contentVersion=revision,
neutralize=limits.neutralize,
provenance=provenance,
)
)
except Exception as exc:
logger.error("sharepoint ingestion %s failed: %s", entryPath, exc, exc_info=True)
result.failed += 1
result.errors.append(f"ingest({entryPath}): {exc}")
return
if handle.status == "duplicate":
result.skippedDuplicate += 1
elif handle.status == "indexed":
result.indexed += 1
else:
result.failed += 1
if handle.error:
result.errors.append(f"ingest({entryPath}): {handle.error}")
if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
processed = result.indexed + result.skippedDuplicate
try:
progressCb(
min(90, 10 + int(80 * processed / max(1, limits.maxItems))),
f"sharepoint processed={processed}",
)
except Exception:
pass
logger.info(
"ingestion.connection.bootstrap.progress part=sharepoint processed=%d skippedDup=%d failed=%d",
processed, result.skippedDuplicate, result.failed,
extra={
"event": "ingestion.connection.bootstrap.progress",
"part": "sharepoint",
"connectionId": connectionId,
"processed": processed,
"skippedDup": result.skippedDuplicate,
"failed": result.failed,
},
)
# Yield so the event loop can interleave other tasks (download/extract are
# CPU-ish and extraction uses sync libs; cooperative scheduling prevents
# starving other workers).
await asyncio.sleep(0)
def _finalizeResult(connectionId: str, result: SharepointBootstrapResult, startMs: float) -> Dict[str, Any]:
durationMs = int((time.time() - startMs) * 1000)
logger.info(
"ingestion.connection.bootstrap.done part=sharepoint connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d",
connectionId,
result.indexed, result.skippedDuplicate, result.skippedPolicy, result.failed,
durationMs,
extra={
"event": "ingestion.connection.bootstrap.done",
"part": "sharepoint",
"connectionId": connectionId,
"indexed": result.indexed,
"skippedDup": result.skippedDuplicate,
"skippedPolicy": result.skippedPolicy,
"failed": result.failed,
"durationMs": durationMs,
},
)
return {
"connectionId": result.connectionId,
"indexed": result.indexed,
"skippedDuplicate": result.skippedDuplicate,
"skippedPolicy": result.skippedPolicy,
"failed": result.failed,
"bytesProcessed": result.bytesProcessed,
"durationMs": durationMs,
"errors": result.errors[:20],
}

View file

@ -0,0 +1,107 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Text normalisation utilities used by knowledge ingestion.
The email body cleaning logic is intentionally regex-based and works on plain
text after an HTMLtext pass so we never store unsanitised HTML/JS in the
knowledge store and retrieval stays robust (no extraneous markup tokens
eating embedding budget).
"""
from __future__ import annotations
import re
from typing import Optional
DEFAULT_MAX_CHARS = 8000
_QUOTE_MARKER_PATTERNS = [
re.compile(r"^\s*(?:On\s.+?\swrote:)\s*$", re.MULTILINE | re.IGNORECASE),
re.compile(r"^\s*(?:Am\s.+?\sschrieb.+?:)\s*$", re.MULTILINE | re.IGNORECASE),
re.compile(r"^\s*-{2,}\s*Original\s*Message\s*-{2,}\s*$", re.MULTILINE | re.IGNORECASE),
re.compile(r"^\s*-{2,}\s*Urspr.+Nachricht\s*-{2,}\s*$", re.MULTILINE | re.IGNORECASE),
re.compile(r"^\s*From:\s+.+$", re.MULTILINE | re.IGNORECASE),
re.compile(r"^\s*Von:\s+.+$", re.MULTILINE | re.IGNORECASE),
re.compile(r"^\s*Sent:\s+.+$", re.MULTILINE | re.IGNORECASE),
re.compile(r"^\s*Gesendet:\s+.+$", re.MULTILINE | re.IGNORECASE),
]
_SIGNATURE_MARKERS = [
re.compile(r"^\s*-{2,}\s*$", re.MULTILINE),
re.compile(r"^\s*—\s*$", re.MULTILINE),
re.compile(r"^\s*Best regards\b.*$", re.MULTILINE | re.IGNORECASE),
re.compile(r"^\s*Kind regards\b.*$", re.MULTILINE | re.IGNORECASE),
re.compile(r"^\s*Mit freundlichen Gr[üu]ßen\b.*$", re.MULTILINE | re.IGNORECASE),
re.compile(r"^\s*Viele Gr[üu]ße\b.*$", re.MULTILINE | re.IGNORECASE),
re.compile(r"^\s*Best,\s*$", re.MULTILINE | re.IGNORECASE),
]
def _htmlToText(html: str) -> str:
"""Prefer BeautifulSoup when available, fall back to regex."""
try:
from bs4 import BeautifulSoup # type: ignore
soup = BeautifulSoup(html, "html.parser")
for tag in soup(["script", "style", "head"]):
tag.decompose()
for br in soup.find_all(["br"]):
br.replace_with("\n")
for p in soup.find_all(["p", "div", "li", "tr"]):
p.append("\n")
text = soup.get_text()
except Exception:
# Minimal fallback: strip tags crudely.
text = re.sub(r"<br\s*/?>", "\n", html, flags=re.IGNORECASE)
text = re.sub(r"</(?:p|div|li|tr)>", "\n", text, flags=re.IGNORECASE)
text = re.sub(r"<[^>]+>", "", text)
# Collapse non-breaking + zero-width whitespace.
text = text.replace("\u00a0", " ").replace("\u200b", "")
return text
def _stripQuotedThread(text: str) -> str:
"""Remove reply-chain content so only the author's own contribution remains."""
earliest = len(text)
for pattern in _QUOTE_MARKER_PATTERNS:
match = pattern.search(text)
if match and match.start() < earliest:
earliest = match.start()
# Drop any block starting with "> " quoted lines (often Gmail/Thunderbird).
quotedBlock = re.search(r"^(?:\s*>.*\n?)+", text, re.MULTILINE)
if quotedBlock and quotedBlock.start() < earliest:
earliest = quotedBlock.start()
return text[:earliest].rstrip()
def _stripSignature(text: str) -> str:
earliest = len(text)
for pattern in _SIGNATURE_MARKERS:
match = pattern.search(text)
if match and match.start() < earliest:
earliest = match.start()
return text[:earliest].rstrip()
def _collapseWhitespace(text: str) -> str:
text = re.sub(r"[ \t]+", " ", text)
text = re.sub(r"\n{3,}", "\n\n", text)
return text.strip()
def cleanEmailBody(html: str, maxChars: Optional[int] = DEFAULT_MAX_CHARS) -> str:
"""Return a compact plain-text view of an email body suitable for embedding.
Steps: HTML text, remove quoted reply chain, remove signature, collapse
whitespace, truncate to maxChars. Always returns a string (possibly empty).
"""
if not html:
return ""
text = _htmlToText(html) if "<" in html and ">" in html else html
text = _stripQuotedThread(text)
text = _stripSignature(text)
text = _collapseWhitespace(text)
if maxChars and len(text) > maxChars:
text = text[:maxChars].rstrip() + ""
return text

View file

@ -100,12 +100,18 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
# Update progress - preparing parameters # Update progress - preparing parameters
self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters") self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
from modules.datamodels.datamodelDocref import DocumentReferenceList from modules.datamodels.datamodelDocref import (
DocumentReferenceList,
coerceDocumentReferenceList,
)
documentListParam = parameters.get("documentList") documentListParam = parameters.get("documentList")
inline_content_parts: Optional[List[ContentPart]] = None inline_content_parts: Optional[List[ContentPart]] = None
# Handle inline ActionDocuments (e.g. from SharePoint/email in automation2 no persistence) # Inline ActionDocuments (SharePoint/email in automation2, no
# persistence) are list[ActionDocument-like dict] -- handled
# separately because they carry pre-extracted content. Everything
# else is normalised through the tolerant coercer.
is_inline = ( is_inline = (
isinstance(documentListParam, list) isinstance(documentListParam, list)
and len(documentListParam) > 0 and len(documentListParam) > 0
@ -117,28 +123,12 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
logger.info( logger.info(
f"ai.process: Extracted {len(inline_content_parts)} ContentParts from {len(documentListParam)} inline ActionDocuments (no persistence)" f"ai.process: Extracted {len(inline_content_parts)} ContentParts from {len(documentListParam)} inline ActionDocuments (no persistence)"
) )
elif documentListParam is None:
documentList = DocumentReferenceList(references=[])
logger.debug(f"ai.process: documentList is None, using empty DocumentReferenceList")
elif isinstance(documentListParam, DocumentReferenceList):
documentList = documentListParam
logger.info(f"ai.process: Received DocumentReferenceList with {len(documentList.references)} references")
for idx, ref in enumerate(documentList.references):
logger.info(f" Reference {idx + 1}: documentId={ref.documentId}, type={type(ref).__name__}")
elif isinstance(documentListParam, str):
documentList = DocumentReferenceList.from_string_list([documentListParam])
logger.info(f"ai.process: Converted string to DocumentReferenceList with {len(documentList.references)} references")
elif isinstance(documentListParam, list):
first = documentListParam[0] if documentListParam else None
logger.info(
f"ai.process: documentList is list of {len(documentListParam)} items, "
f"first type={type(first).__name__}, has_documentData={_is_action_document_like(first) if first else False}"
)
documentList = DocumentReferenceList.from_string_list(documentListParam)
logger.info(f"ai.process: Converted list to DocumentReferenceList with {len(documentList.references)} references")
else: else:
logger.error(f"Invalid documentList type: {type(documentListParam)}") documentList = coerceDocumentReferenceList(documentListParam)
documentList = DocumentReferenceList(references=[]) logger.info(
f"ai.process: Coerced documentList ({type(documentListParam).__name__}) "
f"to DocumentReferenceList with {len(documentList.references)} references"
)
# Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only. # Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only.
resultType = parameters.get("resultType") resultType = parameters.get("resultType")

View file

@ -5,7 +5,10 @@ import logging
import time import time
from typing import Dict, Any from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList from modules.datamodels.datamodelDocref import (
DocumentReferenceList,
coerceDocumentReferenceList,
)
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -16,20 +19,17 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"context_extract_{workflowId}_{int(time.time())}" operationId = f"context_extract_{workflowId}_{int(time.time())}"
# Extract documentList from parameters dict
documentListParam = parameters.get("documentList") documentListParam = parameters.get("documentList")
if not documentListParam: if not documentListParam:
return ActionResult.isFailure(error="documentList is required") return ActionResult.isFailure(error="documentList is required")
# Convert to DocumentReferenceList if needed documentList = coerceDocumentReferenceList(documentListParam)
if isinstance(documentListParam, DocumentReferenceList): if not documentList.references:
documentList = documentListParam return ActionResult.isFailure(
elif isinstance(documentListParam, str): error=f"documentList could not be parsed (type={type(documentListParam).__name__}); "
documentList = DocumentReferenceList.from_string_list([documentListParam]) f"expected DocumentReferenceList, list of strings/dicts, or "
elif isinstance(documentListParam, list): f"a wrapper dict like {{'documents': [...]}}"
documentList = DocumentReferenceList.from_string_list(documentListParam) )
else:
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
# Start progress tracking # Start progress tracking
parentOperationId = parameters.get('parentOperationId') parentOperationId = parameters.get('parentOperationId')

View file

@ -5,7 +5,10 @@ import logging
import time import time
from typing import Dict, Any from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList from modules.datamodels.datamodelDocref import (
DocumentReferenceList,
coerceDocumentReferenceList,
)
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -26,20 +29,15 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
if not neutralizationEnabled: if not neutralizationEnabled:
logger.info("Neutralization is not enabled, returning documents unchanged") logger.info("Neutralization is not enabled, returning documents unchanged")
# Return original documents if neutralization is disabled # Return original documents if neutralization is disabled
# Get documents from documentList
documentListParam = parameters.get("documentList") documentListParam = parameters.get("documentList")
if not documentListParam: if not documentListParam:
return ActionResult.isFailure(error="documentList is required") return ActionResult.isFailure(error="documentList is required")
# Convert to DocumentReferenceList if needed documentList = coerceDocumentReferenceList(documentListParam)
if isinstance(documentListParam, DocumentReferenceList): if not documentList.references:
documentList = documentListParam return ActionResult.isFailure(
elif isinstance(documentListParam, str): error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
documentList = DocumentReferenceList.from_string_list([documentListParam]) )
elif isinstance(documentListParam, list):
documentList = DocumentReferenceList.from_string_list(documentListParam)
else:
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
# Get ChatDocuments from documentList # Get ChatDocuments from documentList
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
@ -65,20 +63,15 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
return ActionResult.isSuccess(documents=actionDocuments) return ActionResult.isSuccess(documents=actionDocuments)
# Extract documentList from parameters dict
documentListParam = parameters.get("documentList") documentListParam = parameters.get("documentList")
if not documentListParam: if not documentListParam:
return ActionResult.isFailure(error="documentList is required") return ActionResult.isFailure(error="documentList is required")
# Convert to DocumentReferenceList if needed documentList = coerceDocumentReferenceList(documentListParam)
if isinstance(documentListParam, DocumentReferenceList): if not documentList.references:
documentList = documentListParam return ActionResult.isFailure(
elif isinstance(documentListParam, str): error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
documentList = DocumentReferenceList.from_string_list([documentListParam]) )
elif isinstance(documentListParam, list):
documentList = DocumentReferenceList.from_string_list(documentListParam)
else:
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
# Start progress tracking # Start progress tracking
parentOperationId = parameters.get('parentOperationId') parentOperationId = parameters.get('parentOperationId')

View file

@ -9,6 +9,9 @@ from modules.datamodels.datamodelChat import ActionResult, ActionItem, TaskStep
from modules.datamodels.datamodelChat import ChatWorkflow from modules.datamodels.datamodelChat import ChatWorkflow
from modules.workflows.processing.shared.methodDiscovery import methods from modules.workflows.processing.shared.methodDiscovery import methods
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
from modules.workflows.processing.shared.parameterValidation import (
InvalidActionParameterError, validateAndCoerceParameters,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -20,20 +23,32 @@ class ActionExecutor:
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult: async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
"""Execute a method action""" """Execute a method action with validated/coerced parameters.
Parameter validation is centralised here so the contract holds for
every execution path (agent tool calls, workflow graph nodes,
REST routes) actions can rely on declared types without
defensive isinstance branches.
"""
try: try:
if methodName not in methods: if methodName not in methods:
raise ValueError(f"Unknown method: {methodName}") raise ValueError(f"Unknown method: {methodName}")
method = methods[methodName] method = methods[methodName]
if actionName not in method['actions']: if actionName not in method['actions']:
raise ValueError(f"Unknown action: {actionName} for method {methodName}") raise ValueError(f"Unknown action: {actionName} for method {methodName}")
action = method['actions'][actionName] action = method['actions'][actionName]
# Execute the action actionDef = method['instance']._actions.get(actionName)
if actionDef is not None:
parameters = validateAndCoerceParameters(actionDef, parameters or {})
return await action['method'](parameters) return await action['method'](parameters)
except InvalidActionParameterError as e:
logger.error(f"Invalid parameters for {methodName}.{actionName}: {e}")
raise
except Exception as e: except Exception as e:
logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}") logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}")
raise raise

View file

@ -0,0 +1,198 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Universal parameter validation + coercion for workflow actions.
Workflow actions historically received their ``parameters`` as a raw
``Dict[str, Any]`` with no enforcement of the declared parameter schema.
That implicit contract masked two whole classes of bugs:
1. **Type confusion at the agent boundary.** The agent's tool schema
(Phase-3 Typed Action Architecture) exposes ``FeatureInstanceRef`` /
``ConnectionRef`` etc. as typed *objects* with ``id`` plus a
discriminator (``featureCode`` / ``authority``) so the LLM can pick
the right instance among several. The action implementations, however,
use the value as a bare UUID string in ``recordFilter={"col": <value>}``.
Without normalization Postgres fails with "can't adapt type 'dict'",
the connector's previous swallow-and-return-[] hid the failure, and the
action returned the misleading "no record found" error.
2. **Unchecked optional flags.** ``forceRefresh`` arriving as the string
``"true"`` instead of a real bool, ``periodMonth`` arriving as ``"12"``
instead of ``12``, etc. Every action grew its own ad-hoc coercion code.
This module centralises validation and coercion at exactly one boundary:
``ActionExecutor.executeAction``. By the time the action body runs, the
``parameters`` dict is guaranteed to satisfy the declared schema.
Unknown extra keys (e.g. ``parentOperationId`` injected by the executor,
``expectedDocumentFormats`` from action items) are passed through
untouched the schema only constrains *declared* parameters.
"""
from __future__ import annotations
import logging
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
class InvalidActionParameterError(ValueError):
"""Raised when a declared action parameter is missing, malformed, or
cannot be coerced into the declared type.
The message identifies the action and parameter so the agent and
workflow log can pinpoint the offending call instead of getting an
opaque downstream "no record found" or "can't adapt type 'X'".
"""
def __init__(self, actionId: str, paramName: str, reason: str):
super().__init__(f"{actionId}.{paramName}: {reason}")
self.actionId = actionId
self.paramName = paramName
self.reason = reason
_TRUE_STRINGS = {"true", "1", "yes", "on"}
_FALSE_STRINGS = {"false", "0", "no", "off", ""}
def _isRefSchema(typeStr: str) -> bool:
"""A declared type is a Ref-Schema iff its name ends with ``Ref`` AND it
resolves to a PORT_TYPE_CATALOG schema with an ``id`` field.
The catalog is imported lazily to keep this module light at startup.
"""
if not typeStr or not typeStr.endswith("Ref"):
return False
from modules.features.graphicalEditor.portTypes import PORT_TYPE_CATALOG
schema = PORT_TYPE_CATALOG.get(typeStr)
if schema is None:
return False
return any(f.name == "id" for f in schema.fields)
def _coerceRef(actionId: str, paramName: str, value: Any) -> Optional[str]:
"""Collapse a Ref payload to its ``id`` string.
Accepts:
* already a string returned as-is (workflow execution path),
* dict with non-empty ``id`` field returns the id (agent path),
* ``None`` returned as-is so optional Ref params stay optional.
"""
if value is None or isinstance(value, str):
return value
if isinstance(value, dict):
refId = value.get("id")
if isinstance(refId, str) and refId:
return refId
raise InvalidActionParameterError(
actionId, paramName,
f"Ref payload missing or empty 'id' field: {value!r}",
)
raise InvalidActionParameterError(
actionId, paramName,
f"Ref must be a string id or {{'id': ...}} dict, got {type(value).__name__}",
)
def _coercePrimitive(actionId: str, paramName: str, value: Any, typeStr: str) -> Any:
"""Best-effort coercion of primitive types from string-form payloads.
The agent's JSON tool calls deliver everything as strings/numbers; the
workflow executor passes through raw template values which are also
often strings. Coercing here removes ad-hoc ``isinstance(x, str)``
branches inside every action.
"""
if value is None:
return None
if typeStr == "bool":
if isinstance(value, bool):
return value
if isinstance(value, str):
lower = value.strip().lower()
if lower in _TRUE_STRINGS:
return True
if lower in _FALSE_STRINGS:
return False
if isinstance(value, (int, float)):
return bool(value)
raise InvalidActionParameterError(
actionId, paramName, f"cannot coerce {value!r} to bool",
)
if typeStr == "int":
if isinstance(value, bool):
return int(value)
if isinstance(value, int):
return value
if isinstance(value, str) and value.strip():
try:
return int(value.strip(), 10)
except ValueError:
pass
if isinstance(value, float) and value.is_integer():
return int(value)
raise InvalidActionParameterError(
actionId, paramName, f"cannot coerce {value!r} to int",
)
if typeStr == "float":
if isinstance(value, (int, float)):
return float(value)
if isinstance(value, str) and value.strip():
try:
return float(value.strip())
except ValueError:
pass
raise InvalidActionParameterError(
actionId, paramName, f"cannot coerce {value!r} to float",
)
return value
def validateAndCoerceParameters(actionDef, parameters: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and coerce ``parameters`` against ``actionDef.parameters``.
Behaviour per declared parameter:
* **Missing + required** raises ``InvalidActionParameterError``.
* **Missing + optional** left absent (action uses its own default).
* **Present + Ref-Schema (e.g. FeatureInstanceRef)** ``{id: ..., ...}``
collapsed to the bare id string; pass-through if already a string.
* **Present + primitive (bool/int/float)** coerced from common
string forms (e.g. ``"true"`` ``True``).
* **Present + other types** (catalog objects, ``str``, ``Any``,
containers) passed through untouched.
Unknown keys (e.g. ``parentOperationId``, ``expectedDocumentFormats``,
ad-hoc fields injected by the executor) are passed through unchanged.
Returns a new dict (does not mutate the caller's parameters).
"""
if not parameters:
parameters = {}
actionId = getattr(actionDef, "actionId", None) or "<unknown.action>"
declared = getattr(actionDef, "parameters", {}) or {}
coerced: Dict[str, Any] = dict(parameters)
for paramName, paramSchema in declared.items():
typeStr = getattr(paramSchema, "type", None) or "Any"
required = bool(getattr(paramSchema, "required", False))
if paramName not in coerced or coerced[paramName] is None:
if required:
raise InvalidActionParameterError(
actionId, paramName, "required parameter missing",
)
continue
rawValue = coerced[paramName]
if _isRefSchema(typeStr):
coerced[paramName] = _coerceRef(actionId, paramName, rawValue)
continue
if typeStr in ("bool", "int", "float"):
coerced[paramName] = _coercePrimitive(actionId, paramName, rawValue, typeStr)
continue
return coerced

View file

View file

@ -0,0 +1,66 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Unit tests: temperature handling for OpenAI chat-completions models.
Historical regression: every payload sent ``temperature=0.2``. After the
GPT-5 launch OpenAI rejects any non-default temperature for the GPT-5.x
and o-series (o1/o3/o4) reasoning models with HTTP 400::
"Unsupported value: 'temperature' does not support 0.2 with this
model. Only the default (1) value is supported."
The fix is a single helper, ``_supportsCustomTemperature``, that is
consulted before adding the field to the outgoing payload. These tests
pin the contract:
* legacy chat models (gpt-4o, gpt-4o-mini, gpt-4.1, gpt-3.5-*) keep
honoring custom temperatures,
* every gpt-5.x and o1/o3/o4 variant must omit the field entirely.
"""
from __future__ import annotations
import pytest
from modules.aicore.aicorePluginOpenai import _supportsCustomTemperature
class TestSupportsCustomTemperature:
"""Pure model-name classification - no network, no payload assembly."""
@pytest.mark.parametrize(
"modelName",
[
"gpt-4o",
"gpt-4o-mini",
"gpt-4.1",
"gpt-3.5-turbo",
"text-embedding-3-small",
"dall-e-3",
],
)
def testLegacyModelsAcceptCustomTemperature(self, modelName):
assert _supportsCustomTemperature(modelName) is True
@pytest.mark.parametrize(
"modelName",
[
"gpt-5",
"gpt-5.4",
"gpt-5.4-mini",
"gpt-5.4-nano",
"gpt-5.5",
"GPT-5.5",
"o1",
"o1-mini",
"o3",
"o3-mini",
"o4-mini",
],
)
def testReasoningModelsRejectCustomTemperature(self, modelName):
assert _supportsCustomTemperature(modelName) is False
def testEmptyOrNoneModelDefaultsToSupported(self):
# Defensive: unknown/empty names should not silently break legacy paths.
assert _supportsCustomTemperature("") is True
assert _supportsCustomTemperature(None) is True

View file

View file

@ -0,0 +1,158 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Unit tests: PostgreSQL connector raises DatabaseQueryError on real failures.
Historical regression: ``getRecordset`` and friends used to swallow every
exception (``except Exception: log; return []``), which turned every kind of
broken query into "no rows found". That hid bugs like:
* dict passed where Postgres expected a UUID string ("can't adapt type 'dict'"),
* missing/renamed columns after an incomplete schema migration,
* dropped tables, lost connections, etc.
These tests pin the new contract: empty result sets still return ``[]`` /
``None`` (normal), but any exception inside the query path propagates as
``DatabaseQueryError`` with the table name attached. The transaction is
rolled back so the connection is usable for subsequent queries.
"""
from __future__ import annotations
from unittest.mock import MagicMock
import pytest
import psycopg2.errors
from modules.connectors.connectorDbPostgre import (
DatabaseConnector,
DatabaseQueryError,
_rollbackQuietly,
)
class DummyTable:
"""Stand-in for a Pydantic model so we can drive the connector without a real DB.
The connector reads ``model_class.__name__`` to derive the SQL table name,
so the class name itself becomes the asserted table name in tests.
"""
model_fields = {}
def _makeConnector(cursorBehavior):
"""Build a ``DatabaseConnector`` skeleton with mocked connection/cursor.
``cursorBehavior`` is a callable invoked with the cursor mock so the test
can configure ``execute``/``fetchall``/``fetchone`` per scenario.
"""
connector = DatabaseConnector.__new__(DatabaseConnector)
cursor = MagicMock()
cursorContext = MagicMock()
cursorContext.__enter__ = MagicMock(return_value=cursor)
cursorContext.__exit__ = MagicMock(return_value=False)
connection = MagicMock()
connection.cursor.return_value = cursorContext
connector.connection = connection
connector._ensureTableExists = MagicMock(return_value=True)
connector._systemTableName = "_system"
cursorBehavior(cursor)
return connector, connection, cursor
class TestGetRecordsetFailLoud:
def test_emptyResultStillReturnsList(self):
"""No rows → []; this is the normal happy path, not a failure."""
def behavior(cursor):
cursor.execute.return_value = None
cursor.fetchall.return_value = []
connector, connection, _ = _makeConnector(behavior)
result = connector.getRecordset(DummyTable)
assert result == []
connection.rollback.assert_not_called()
def test_dictAdaptErrorRaisesDatabaseQueryError(self):
"""Reproduces the Trustee bug: passing a dict in WHERE → can't adapt → raise."""
def behavior(cursor):
cursor.execute.side_effect = psycopg2.ProgrammingError(
"can't adapt type 'dict'"
)
connector, connection, _ = _makeConnector(behavior)
with pytest.raises(DatabaseQueryError) as excinfo:
connector.getRecordset(
DummyTable,
recordFilter={"featureInstanceId": {"id": "uuid", "featureCode": "trustee"}},
)
assert excinfo.value.table == "DummyTable"
assert "can't adapt type 'dict'" in str(excinfo.value)
assert isinstance(excinfo.value.original, psycopg2.ProgrammingError)
connection.rollback.assert_called_once()
def test_missingColumnRaisesDatabaseQueryError(self):
def behavior(cursor):
cursor.execute.side_effect = psycopg2.errors.UndefinedColumn(
'column "wat" does not exist'
)
connector, connection, _ = _makeConnector(behavior)
with pytest.raises(DatabaseQueryError) as excinfo:
connector.getRecordset(DummyTable, recordFilter={"wat": "x"})
assert "wat" in str(excinfo.value)
connection.rollback.assert_called_once()
def test_operationalErrorRaisesDatabaseQueryError(self):
"""Connection lost mid-query is also a real failure that must propagate."""
def behavior(cursor):
cursor.execute.side_effect = psycopg2.OperationalError("connection lost")
connector, connection, _ = _makeConnector(behavior)
with pytest.raises(DatabaseQueryError):
connector.getRecordset(DummyTable)
connection.rollback.assert_called_once()
class TestGetRecordFailLoud:
def test_recordNotFoundReturnsNone(self):
"""`fetchone()` returning None is "row missing", not an error."""
def behavior(cursor):
cursor.execute.return_value = None
cursor.fetchone.return_value = None
connector, connection, _ = _makeConnector(behavior)
result = connector.getRecord(DummyTable, "missing-id")
assert result is None
connection.rollback.assert_not_called()
def test_queryErrorRaisesDatabaseQueryError(self):
def behavior(cursor):
cursor.execute.side_effect = psycopg2.errors.UndefinedTable(
'relation "DummyTable" does not exist'
)
connector, connection, _ = _makeConnector(behavior)
with pytest.raises(DatabaseQueryError) as excinfo:
connector.getRecord(DummyTable, "any-id")
assert excinfo.value.table == "DummyTable"
connection.rollback.assert_called_once()
class TestRollbackQuietly:
def test_rollsBackOnLiveConnection(self):
connection = MagicMock()
_rollbackQuietly(connection)
connection.rollback.assert_called_once()
def test_swallowsRollbackError(self):
"""Rollback failure must not mask the original query error."""
connection = MagicMock()
connection.rollback.side_effect = RuntimeError("rollback failed")
_rollbackQuietly(connection)
def test_noopOnNoneConnection(self):
_rollbackQuietly(None)

View file

@ -125,3 +125,10 @@ class TestConvertParameterSchema:
schema = _convertParameterSchema(actionParams) schema = _convertParameterSchema(actionParams)
assert schema["properties"]["connection"]["type"] == "object" assert schema["properties"]["connection"]["type"] == "object"
assert "id" in schema["properties"]["connection"]["properties"] assert "id" in schema["properties"]["connection"]["properties"]
# Ref-payload normalization (collapsing `{id: ..., featureCode: ...}` to the
# bare id string) is no longer the adapter's job — it moved to the central
# `parameterValidation.validateAndCoerceParameters` invoked by
# `ActionExecutor.executeAction`. Tests for that contract live in
# `tests/unit/workflows/test_parameterValidation.py`.

View file

@ -0,0 +1,203 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Bootstrap ClickUp tests with a fake service + knowledge service.
Verifies:
- Teams spaces lists (folderless + folder-based) tasks traversal.
- Each task produces a `requestIngestion` call with `sourceKind="clickup_task"`
and header + description content-objects.
- `date_updated` is forwarded as contentVersion idempotency.
- Recency filter drops tasks older than `maxAgeDays`.
- maxWorkspaces / maxListsPerWorkspace / maxTasks caps are respected.
"""
import asyncio
import os
import sys
import time
from types import SimpleNamespace
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup import (
bootstrapClickup,
ClickupBootstrapLimits,
_syntheticTaskId,
)
def _nowMs(offsetDays: int = 0) -> str:
return str(int((time.time() + offsetDays * 86400) * 1000))
class _FakeClickupService:
"""Records API calls; serves a canned 1-team / 1-space / 1-list / 2-task layout."""
def __init__(self, taskCount=2, oldTask=False):
self._taskCount = taskCount
self._oldTask = oldTask # when True, the second task is 400 days old
self.calls = []
async def getAuthorizedTeams(self):
self.calls.append(("getAuthorizedTeams",))
return {"teams": [{"id": "team-1", "name": "Acme"}]}
async def getSpaces(self, team_id: str):
self.calls.append(("getSpaces", team_id))
return {"spaces": [{"id": "space-1", "name": "Engineering"}]}
async def getFolderlessLists(self, space_id: str):
self.calls.append(("getFolderlessLists", space_id))
return {"lists": [{"id": "list-1", "name": "Sprint 1"}]}
async def getFolders(self, space_id: str):
self.calls.append(("getFolders", space_id))
return {"folders": [{"id": "folder-1", "name": "Subproject"}]}
async def getListsInFolder(self, folder_id: str):
self.calls.append(("getListsInFolder", folder_id))
return {"lists": [{"id": "list-2", "name": "Sub-tasks"}]}
async def getTasksInList(self, list_id: str, *, page=0, include_closed=False, subtasks=True):
self.calls.append(("getTasksInList", list_id, page, include_closed))
if page > 0:
return {"tasks": []}
tasks = []
for i in range(self._taskCount):
tid = f"{list_id}-task-{i}"
offsetDays = -400 if (self._oldTask and i == 1) else 0
tasks.append({
"id": tid,
"name": f"Task {i} of {list_id}",
"description": f"Plain description for task {i}",
"text_content": f"Rich content for task {i}",
"status": {"status": "open" if i == 0 else "closed"},
"assignees": [{"username": "alice"}],
"tags": [{"name": "urgent"}],
"date_updated": _nowMs(offsetDays),
"date_created": _nowMs(-1),
"url": f"https://app.clickup.com/t/{tid}",
})
return {"tasks": tasks}
class _FakeKnowledgeService:
def __init__(self, duplicateIds=None):
self.calls = []
self._duplicates = duplicateIds or set()
async def requestIngestion(self, job):
self.calls.append(job)
status = "duplicate" if job.sourceId in self._duplicates else "indexed"
return SimpleNamespace(
jobId=job.sourceId, status=status, contentHash="h",
fileId=job.sourceId, index=None, error=None,
)
def _adapter(svc):
return SimpleNamespace(_svc=svc)
def test_bootstrap_walks_team_space_lists_and_tasks():
svc = _FakeClickupService(taskCount=2)
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapClickup(
connectionId="c1",
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
limits=ClickupBootstrapLimits(maxAgeDays=None),
)
result = asyncio.run(_run())
# 2 lists (folderless list-1 + folder's list-2) × 2 tasks each = 4 tasks
assert result["indexed"] == 4
assert result["workspaces"] == 1
assert result["lists"] == 2
sourceIds = {c.sourceId for c in knowledge.calls}
assert len(sourceIds) == 4
for job in knowledge.calls:
assert job.sourceKind == "clickup_task"
assert job.mimeType == "application/vnd.clickup.task+json"
assert job.mandateId == "m1"
assert job.provenance["connectionId"] == "c1"
assert job.provenance["authority"] == "clickup"
assert job.provenance["teamId"] == "team-1"
assert job.contentVersion # numeric millisecond string
# At least the header content-object is present.
ids = [co["contentObjectId"] for co in job.contentObjects]
assert "header" in ids
def test_bootstrap_reports_duplicates_on_second_run():
svc = _FakeClickupService(taskCount=1)
duplicates = {
_syntheticTaskId("c1", "list-1-task-0"),
_syntheticTaskId("c1", "list-2-task-0"),
}
knowledge = _FakeKnowledgeService(duplicateIds=duplicates)
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapClickup(
connectionId="c1",
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
limits=ClickupBootstrapLimits(maxAgeDays=None),
)
result = asyncio.run(_run())
assert result["indexed"] == 0
assert result["skippedDuplicate"] == 2
def test_bootstrap_skips_tasks_older_than_maxAgeDays():
svc = _FakeClickupService(taskCount=2, oldTask=True)
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapClickup(
connectionId="c1",
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
limits=ClickupBootstrapLimits(maxAgeDays=180),
)
result = asyncio.run(_run())
# 2 lists × (1 recent + 1 skipped old) = 2 indexed + 2 skippedPolicy
assert result["indexed"] == 2
assert result["skippedPolicy"] == 2
def test_bootstrap_maxTasks_caps_ingestion():
svc = _FakeClickupService(taskCount=2)
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapClickup(
connectionId="c1",
adapter=_adapter(svc),
connection=connection,
knowledgeService=knowledge,
limits=ClickupBootstrapLimits(maxAgeDays=None, maxTasks=3),
)
result = asyncio.run(_run())
assert result["indexed"] == 3
if __name__ == "__main__":
test_bootstrap_walks_team_space_lists_and_tasks()
test_bootstrap_reports_duplicates_on_second_run()
test_bootstrap_skips_tasks_older_than_maxAgeDays()
test_bootstrap_maxTasks_caps_ingestion()
print("OK — bootstrapClickup tests passed")

View file

@ -0,0 +1,225 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Bootstrap Google Drive tests with a fake adapter + knowledge service.
Verifies:
- Drive walk traverses root subfolders, respecting `maxDepth`.
- Every file triggers `requestIngestion` with `sourceKind="gdrive_item"`.
- Duplicate runs (same modifiedTime revision) report `skippedDuplicate`.
- Provenance carries `authority="google"` and the Drive file id.
- Recency filter skips files older than `maxAgeDays`.
"""
import asyncio
import os
import sys
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from types import SimpleNamespace
from typing import Any, Dict, List, Optional
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGdrive import (
bootstrapGdrive,
GdriveBootstrapLimits,
_syntheticFileId,
)
@dataclass
class _ExtEntry:
name: str
path: str
isFolder: bool = False
size: Optional[int] = None
mimeType: Optional[str] = None
metadata: Dict[str, Any] = None
def _today_iso(offsetDays: int = 0) -> str:
return (datetime.now(timezone.utc) + timedelta(days=offsetDays)).strftime("%Y-%m-%dT%H:%M:%SZ")
class _FakeDriveAdapter:
"""Minimal DriveAdapter stand-in.
Layout:
"/" (root) 2 files + 1 folder (sub)
"/sub_id" 1 file
"""
def __init__(self, recent_only: bool = True):
self.downloaded: List[str] = []
self._recent = _today_iso(0)
self._old = _today_iso(-400)
self._recent_only = recent_only
async def browse(self, path: str, filter=None, limit=None):
if path in ("/", "", "root"):
return [
_ExtEntry(
name="f1.txt", path="/f1", size=20,
mimeType="text/plain",
metadata={"id": "f1", "modifiedTime": self._recent},
),
_ExtEntry(
name="f2.txt", path="/f2", size=20,
mimeType="text/plain",
metadata={"id": "f2", "modifiedTime": self._recent if self._recent_only else self._old},
),
_ExtEntry(
name="Subfolder", path="/sub_id", isFolder=True,
mimeType="application/vnd.google-apps.folder",
metadata={"id": "sub_id", "modifiedTime": self._recent},
),
]
if path == "/sub_id":
return [
_ExtEntry(
name="f3.txt", path="/f3", size=20,
mimeType="text/plain",
metadata={"id": "f3", "modifiedTime": self._recent},
),
]
return []
async def download(self, path: str) -> bytes:
self.downloaded.append(path)
return path.encode("utf-8")
class _FakeKnowledgeService:
def __init__(self, duplicateIds=None):
self.calls: List[SimpleNamespace] = []
self._duplicateIds = duplicateIds or set()
async def requestIngestion(self, job):
self.calls.append(job)
status = "duplicate" if job.sourceId in self._duplicateIds else "indexed"
return SimpleNamespace(
jobId=f"{job.sourceKind}:{job.sourceId}",
status=status, contentHash="h",
fileId=job.sourceId, index=None, error=None,
)
def _fakeRunExtraction(data, name, mime, options):
return SimpleNamespace(
parts=[
SimpleNamespace(
id="p1",
data=data.decode("utf-8") if isinstance(data, bytes) else str(data),
typeGroup="text",
label="page:1",
metadata={"pageIndex": 0},
)
]
)
def test_bootstrap_walks_drive_and_subfolders():
adapter = _FakeDriveAdapter()
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapGdrive(
connectionId="c1",
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
runExtractionFn=_fakeRunExtraction,
limits=GdriveBootstrapLimits(maxAgeDays=None),
)
result = asyncio.run(_run())
assert len(knowledge.calls) == 3
sourceIds = {c.sourceId for c in knowledge.calls}
assert sourceIds == {
_syntheticFileId("c1", "f1"),
_syntheticFileId("c1", "f2"),
_syntheticFileId("c1", "f3"),
}
assert result["indexed"] == 3
assert result["skippedDuplicate"] == 0
assert adapter.downloaded == ["/f1", "/f2", "/f3"]
def test_bootstrap_reports_duplicates_on_second_run():
adapter = _FakeDriveAdapter()
duplicateIds = {
_syntheticFileId("c1", "f1"),
_syntheticFileId("c1", "f2"),
_syntheticFileId("c1", "f3"),
}
knowledge = _FakeKnowledgeService(duplicateIds=duplicateIds)
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapGdrive(
connectionId="c1",
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
runExtractionFn=_fakeRunExtraction,
limits=GdriveBootstrapLimits(maxAgeDays=None),
)
result = asyncio.run(_run())
assert result["indexed"] == 0
assert result["skippedDuplicate"] == 3
def test_bootstrap_skips_files_older_than_maxAgeDays():
adapter = _FakeDriveAdapter(recent_only=False) # f2 is 400 days old
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapGdrive(
connectionId="c1",
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
runExtractionFn=_fakeRunExtraction,
limits=GdriveBootstrapLimits(maxAgeDays=180),
)
result = asyncio.run(_run())
assert result["indexed"] == 2 # f1, f3
assert result["skippedPolicy"] == 1 # f2 filtered out
def test_bootstrap_passes_connection_provenance():
adapter = _FakeDriveAdapter()
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapGdrive(
connectionId="c1",
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
runExtractionFn=_fakeRunExtraction,
limits=GdriveBootstrapLimits(maxAgeDays=None),
)
asyncio.run(_run())
for job in knowledge.calls:
assert job.sourceKind == "gdrive_item"
assert job.mandateId == "m1"
assert job.provenance["connectionId"] == "c1"
assert job.provenance["authority"] == "google"
assert job.provenance["service"] == "drive"
assert job.contentVersion # modifiedTime ISO string
if __name__ == "__main__":
test_bootstrap_walks_drive_and_subfolders()
test_bootstrap_reports_duplicates_on_second_run()
test_bootstrap_skips_files_older_than_maxAgeDays()
test_bootstrap_passes_connection_provenance()
print("OK — bootstrapGdrive tests passed")

View file

@ -0,0 +1,240 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Bootstrap Gmail tests with a fake googleGet + knowledge service.
Verifies:
- Default labels (INBOX + SENT) are traversed.
- Each message produces a requestIngestion call with sourceKind=gmail_message
and structured contentObjects (header / snippet / body).
- Pagination via `nextPageToken` is followed.
- historyId is forwarded as contentVersion idempotency.
- MIME body extraction walks nested parts (multipart/alternative).
"""
import asyncio
import base64
import os
import sys
from types import SimpleNamespace
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
bootstrapGmail,
GmailBootstrapLimits,
_syntheticMessageId,
_buildContentObjects,
_walkPayloadForBody,
)
def _b64url(text: str) -> str:
return base64.urlsafe_b64encode(text.encode("utf-8")).decode("ascii").rstrip("=")
def _msg(mid: str, subject: str = "Hi", body: str = "Hello world", historyId: str = "h1"):
return {
"id": mid,
"threadId": f"thread-{mid}",
"historyId": historyId,
"internalDate": "1700000000000",
"snippet": body[:120],
"payload": {
"headers": [
{"name": "Subject", "value": subject},
{"name": "From", "value": "Alice <a@x.com>"},
{"name": "To", "value": "Bob <b@x.com>"},
{"name": "Date", "value": "Tue, 01 Jan 2025 10:00:00 +0000"},
],
"mimeType": "text/plain",
"body": {"data": _b64url(body), "size": len(body)},
"parts": [],
},
}
class _FakeGoogleGet:
"""Records URLs + returns the wired-up page or message response."""
def __init__(self, messages_by_label, paginated_label=None, page2=None):
self._messages = messages_by_label
self._paginated = paginated_label
self._page2 = page2 or []
self._served_first_page = set()
self.requested = []
async def __call__(self, url: str):
self.requested.append(url)
# List page: contains `/users/me/messages?labelIds=...`
if "/users/me/messages?" in url:
for label, msgs in self._messages.items():
if f"labelIds={label}" in url:
if (
label == self._paginated
and label not in self._served_first_page
):
self._served_first_page.add(label)
return {
"messages": [{"id": m["id"]} for m in msgs],
"nextPageToken": "token-2",
}
if label == self._paginated and "pageToken=token-2" in url:
return {
"messages": [{"id": m["id"]} for m in self._page2],
}
return {"messages": [{"id": m["id"]} for m in msgs]}
return {"messages": []}
# Detail fetch: /users/me/messages/{id}?format=full
if "/users/me/messages/" in url and "format=full" in url:
msgId = url.split("/users/me/messages/")[-1].split("?")[0]
for msgs in self._messages.values():
for m in msgs:
if m["id"] == msgId:
return m
for m in self._page2:
if m["id"] == msgId:
return m
return {"error": "not found"}
class _FakeKnowledgeService:
def __init__(self, duplicateIds=None):
self.calls = []
self._duplicates = duplicateIds or set()
async def requestIngestion(self, job):
self.calls.append(job)
status = "duplicate" if job.sourceId in self._duplicates else "indexed"
return SimpleNamespace(
jobId=job.sourceId, status=status, contentHash="h",
fileId=job.sourceId, index=None, error=None,
)
def test_buildContentObjects_emits_header_snippet_body():
parts = _buildContentObjects(_msg("m1", body="Hello\nWorld"), maxBodyChars=8000)
ids = [p["contentObjectId"] for p in parts]
assert ids == ["header", "snippet", "body"]
header = parts[0]["data"]
assert "Subject: Hi" in header
assert "From: Alice <a@x.com>" in header
assert "To: Bob <b@x.com>" in header
def test_walkPayloadForBody_prefers_plain_over_html():
payload = {
"mimeType": "multipart/alternative",
"parts": [
{"mimeType": "text/plain", "body": {"data": _b64url("plain body")}},
{"mimeType": "text/html", "body": {"data": _b64url("<p>html body</p>")}},
],
}
bodies = _walkPayloadForBody(payload)
assert bodies["text"] == "plain body"
assert bodies["html"] == "<p>html body</p>"
def test_walkPayloadForBody_falls_back_to_html():
payload = {
"mimeType": "multipart/alternative",
"parts": [
{"mimeType": "text/html", "body": {"data": _b64url("<p>only html</p>")}},
],
}
bodies = _walkPayloadForBody(payload)
assert bodies["text"] == ""
assert "only html" in bodies["html"]
def test_bootstrap_gmail_indexes_messages_from_inbox_and_sent():
fake_get = _FakeGoogleGet({
"INBOX": [_msg("m1"), _msg("m2")],
"SENT": [_msg("m3")],
})
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapGmail(
connectionId="c1",
adapter=SimpleNamespace(_token="t"),
connection=connection,
knowledgeService=knowledge,
limits=GmailBootstrapLimits(maxAgeDays=None),
googleGetFn=fake_get,
)
result = asyncio.run(_run())
assert result["indexed"] == 3
sourceIds = {c.sourceId for c in knowledge.calls}
assert sourceIds == {
_syntheticMessageId("c1", "m1"),
_syntheticMessageId("c1", "m2"),
_syntheticMessageId("c1", "m3"),
}
for job in knowledge.calls:
assert job.sourceKind == "gmail_message"
assert job.mimeType == "message/rfc822"
assert job.provenance["connectionId"] == "c1"
assert job.provenance["authority"] == "google"
assert job.provenance["service"] == "gmail"
assert job.contentVersion == "h1"
assert any(co["contentObjectId"] == "header" for co in job.contentObjects)
def test_bootstrap_gmail_follows_pagination():
fake_get = _FakeGoogleGet(
messages_by_label={"INBOX": [_msg("m1")], "SENT": []},
paginated_label="INBOX",
page2=[_msg("m2"), _msg("m3")],
)
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapGmail(
connectionId="c1",
adapter=SimpleNamespace(_token="t"),
connection=connection,
knowledgeService=knowledge,
limits=GmailBootstrapLimits(maxAgeDays=None),
googleGetFn=fake_get,
)
result = asyncio.run(_run())
assert result["indexed"] == 3
def test_bootstrap_gmail_reports_duplicates():
fake_get = _FakeGoogleGet({"INBOX": [_msg("m1"), _msg("m2")], "SENT": []})
duplicates = {
_syntheticMessageId("c1", "m1"),
_syntheticMessageId("c1", "m2"),
}
knowledge = _FakeKnowledgeService(duplicateIds=duplicates)
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapGmail(
connectionId="c1",
adapter=SimpleNamespace(_token="t"),
connection=connection,
knowledgeService=knowledge,
limits=GmailBootstrapLimits(maxAgeDays=None),
googleGetFn=fake_get,
)
result = asyncio.run(_run())
assert result["indexed"] == 0
assert result["skippedDuplicate"] == 2
if __name__ == "__main__":
test_buildContentObjects_emits_header_snippet_body()
test_walkPayloadForBody_prefers_plain_over_html()
test_walkPayloadForBody_falls_back_to_html()
test_bootstrap_gmail_indexes_messages_from_inbox_and_sent()
test_bootstrap_gmail_follows_pagination()
test_bootstrap_gmail_reports_duplicates()
print("OK — bootstrapGmail tests passed")

View file

@ -0,0 +1,190 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Bootstrap Outlook tests with a fake adapter + knowledge service.
Verifies:
- Well-known folders (inbox, sentitems) are discovered via Graph.
- Each message produces a `requestIngestion` call with sourceKind=outlook_message
and structured contentObjects (header / snippet / body).
- Pagination via `@odata.nextLink` is followed.
- changeKey is forwarded as contentVersion idempotency.
"""
import asyncio
import os
import sys
from types import SimpleNamespace
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncOutlook import (
bootstrapOutlook,
OutlookBootstrapLimits,
_syntheticMessageId,
_buildContentObjects,
)
class _FakeOutlookAdapter:
def __init__(self, messages_by_folder, paginated_folder=None, page2=None):
self._folders = {"inbox": "INBOX-ID", "sentitems": "SENT-ID"}
self._messages = messages_by_folder
self._paginated_folder = paginated_folder
self._page2 = page2 or []
self.requested_endpoints = []
async def _graphGet(self, endpoint: str):
self.requested_endpoints.append(endpoint)
if endpoint.startswith("me/mailFolders/") and "/messages" not in endpoint:
wellKnown = endpoint.split("/")[-1]
fid = self._folders.get(wellKnown)
if not fid:
return {"error": "not found"}
return {"id": fid, "displayName": wellKnown}
# message page request: e.g. me/mailFolders/INBOX-ID/messages?...
for fid, messages in self._messages.items():
if f"me/mailFolders/{fid}/messages" in endpoint:
page = {"value": messages}
if fid == self._paginated_folder and "skiptoken" not in endpoint:
page["@odata.nextLink"] = (
"https://graph.microsoft.com/v1.0/"
f"me/mailFolders/{fid}/messages?$skiptoken=abc"
)
elif fid == self._paginated_folder and "skiptoken" in endpoint:
page = {"value": self._page2}
return page
return {"value": []}
async def browse(self, path):
return []
class _FakeKnowledgeService:
def __init__(self, duplicateIds=None):
self.calls = []
self._duplicates = duplicateIds or set()
async def requestIngestion(self, job):
self.calls.append(job)
status = "duplicate" if job.sourceId in self._duplicates else "indexed"
return SimpleNamespace(
jobId=job.sourceId, status=status, contentHash="h",
fileId=job.sourceId, index=None, error=None,
)
def _msg(mid: str, subject: str = "Hi", change: str = "ck1"):
return {
"id": mid,
"subject": subject,
"from": {"emailAddress": {"name": "Alice", "address": "a@x.com"}},
"toRecipients": [{"emailAddress": {"name": "Bob", "address": "b@x.com"}}],
"ccRecipients": [],
"receivedDateTime": "2025-01-01T10:00:00Z",
"bodyPreview": "Hello world",
"body": {"contentType": "text", "content": "Hello world\nThis is the body."},
"internetMessageId": f"<{mid}@local>",
"hasAttachments": False,
"changeKey": change,
}
def test_buildContentObjects_emits_header_snippet_body():
parts = _buildContentObjects(_msg("m1"), maxBodyChars=8000)
ids = [p["contentObjectId"] for p in parts]
assert ids == ["header", "snippet", "body"]
header = parts[0]["data"]
assert "Subject: Hi" in header
assert "From: Alice <a@x.com>" in header
assert "To: Bob <b@x.com>" in header
def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
adapter = _FakeOutlookAdapter({
"INBOX-ID": [_msg("m1"), _msg("m2")],
"SENT-ID": [_msg("m3")],
})
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapOutlook(
connectionId="c1",
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
limits=OutlookBootstrapLimits(maxAgeDays=None),
)
result = asyncio.run(_run())
assert result["indexed"] == 3
sourceIds = {c.sourceId for c in knowledge.calls}
assert sourceIds == {
_syntheticMessageId("c1", "m1"),
_syntheticMessageId("c1", "m2"),
_syntheticMessageId("c1", "m3"),
}
for job in knowledge.calls:
assert job.sourceKind == "outlook_message"
assert job.mimeType == "message/rfc822"
assert job.provenance["connectionId"] == "c1"
assert job.provenance["service"] == "outlook"
assert job.contentVersion == "ck1"
assert any(co["contentObjectId"] == "header" for co in job.contentObjects)
def test_bootstrap_outlook_follows_pagination():
adapter = _FakeOutlookAdapter(
messages_by_folder={"INBOX-ID": [_msg("m1")], "SENT-ID": []},
paginated_folder="INBOX-ID",
page2=[_msg("m2"), _msg("m3")],
)
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapOutlook(
connectionId="c1",
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
limits=OutlookBootstrapLimits(maxAgeDays=None),
)
result = asyncio.run(_run())
assert result["indexed"] == 3
def test_bootstrap_outlook_reports_duplicates():
adapter = _FakeOutlookAdapter({
"INBOX-ID": [_msg("m1"), _msg("m2")],
"SENT-ID": [],
})
duplicates = {
_syntheticMessageId("c1", "m1"),
_syntheticMessageId("c1", "m2"),
}
knowledge = _FakeKnowledgeService(duplicateIds=duplicates)
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapOutlook(
connectionId="c1",
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
limits=OutlookBootstrapLimits(maxAgeDays=None),
)
result = asyncio.run(_run())
assert result["indexed"] == 0
assert result["skippedDuplicate"] == 2
if __name__ == "__main__":
test_buildContentObjects_emits_header_snippet_body()
test_bootstrap_outlook_indexes_messages_from_inbox_and_sent()
test_bootstrap_outlook_follows_pagination()
test_bootstrap_outlook_reports_duplicates()
print("OK — bootstrapOutlook tests passed")

View file

@ -0,0 +1,209 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Bootstrap SharePoint tests with a fake adapter + knowledge service.
Verifies:
- Every discovered file triggers `requestIngestion`.
- Duplicate runs (same eTag revisions) report `skippedDuplicate`.
- Synthetic fileIds are stable across runs so idempotency works end-to-end.
"""
import asyncio
import os
import sys
from dataclasses import dataclass
from types import SimpleNamespace
from typing import Any, Dict, List, Optional
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
bootstrapSharepoint,
_syntheticFileId,
)
@dataclass
class _ExtEntry:
name: str
path: str
isFolder: bool = False
size: Optional[int] = None
mimeType: Optional[str] = None
metadata: Dict[str, Any] = None
class _FakeSpAdapter:
"""Minimal SharepointAdapter stand-in.
Layout:
"/" 1 site
"/sites/site-1" 2 files (f1, f2) + 1 folder (sub)
"/sites/site-1/sub" 1 file (f3)
"""
def __init__(self):
self.downloaded: List[str] = []
async def browse(self, path: str, filter=None, limit=None):
if path == "/":
return [
_ExtEntry(
name="Site 1",
path="/sites/site-1",
isFolder=True,
metadata={"id": "site-1"},
),
]
if path == "/sites/site-1":
return [
_ExtEntry(
name="f1.txt", path="/sites/site-1/f1.txt",
mimeType="text/plain", size=20,
metadata={"id": "f1", "revision": "etag-f1"},
),
_ExtEntry(
name="f2.txt", path="/sites/site-1/f2.txt",
mimeType="text/plain", size=20,
metadata={"id": "f2", "revision": "etag-f2"},
),
_ExtEntry(
name="sub", path="/sites/site-1/sub",
isFolder=True, metadata={"id": "sub"},
),
]
if path == "/sites/site-1/sub":
return [
_ExtEntry(
name="f3.txt", path="/sites/site-1/sub/f3.txt",
mimeType="text/plain", size=20,
metadata={"id": "f3", "revision": "etag-f3"},
),
]
return []
async def download(self, path: str) -> bytes:
self.downloaded.append(path)
return path.encode("utf-8")
class _FakeKnowledgeService:
"""Records requestIngestion calls and returns the scripted handles."""
def __init__(self, duplicateIds=None):
self.calls: List[SimpleNamespace] = []
self._duplicateIds = duplicateIds or set()
async def requestIngestion(self, job):
self.calls.append(job)
status = "duplicate" if job.sourceId in self._duplicateIds else "indexed"
return SimpleNamespace(
jobId=f"{job.sourceKind}:{job.sourceId}",
status=status,
contentHash="h",
fileId=job.sourceId,
index=None,
error=None,
)
def _fakeRunExtraction(data, name, mime, options):
"""Produce a single synthetic text part so `_toContentObjects` returns one."""
return SimpleNamespace(
parts=[
SimpleNamespace(
id="p1",
data=data.decode("utf-8") if isinstance(data, bytes) else str(data),
typeGroup="text",
label="page:1",
metadata={"pageIndex": 0},
)
]
)
def test_bootstrap_walks_sites_and_subfolders():
adapter = _FakeSpAdapter()
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapSharepoint(
connectionId="c1",
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
runExtractionFn=_fakeRunExtraction,
)
result = asyncio.run(_run())
assert len(knowledge.calls) == 3
sourceIds = {c.sourceId for c in knowledge.calls}
assert sourceIds == {
_syntheticFileId("c1", "f1"),
_syntheticFileId("c1", "f2"),
_syntheticFileId("c1", "f3"),
}
assert result["indexed"] == 3
assert result["skippedDuplicate"] == 0
assert adapter.downloaded == [
"/sites/site-1/f1.txt",
"/sites/site-1/f2.txt",
"/sites/site-1/sub/f3.txt",
]
def test_bootstrap_reports_duplicates_on_second_run():
adapter = _FakeSpAdapter()
duplicateIds = {
_syntheticFileId("c1", "f1"),
_syntheticFileId("c1", "f2"),
_syntheticFileId("c1", "f3"),
}
knowledge = _FakeKnowledgeService(duplicateIds=duplicateIds)
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapSharepoint(
connectionId="c1",
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
runExtractionFn=_fakeRunExtraction,
)
result = asyncio.run(_run())
assert result["indexed"] == 0
assert result["skippedDuplicate"] == 3
def test_bootstrap_passes_connection_provenance():
adapter = _FakeSpAdapter()
knowledge = _FakeKnowledgeService()
connection = SimpleNamespace(mandateId="m1", userId="u1")
async def _run():
return await bootstrapSharepoint(
connectionId="c1",
adapter=adapter,
connection=connection,
knowledgeService=knowledge,
runExtractionFn=_fakeRunExtraction,
)
asyncio.run(_run())
for job in knowledge.calls:
assert job.sourceKind == "sharepoint_item"
assert job.mandateId == "m1"
assert job.provenance["connectionId"] == "c1"
assert job.provenance["authority"] == "msft"
assert job.provenance["service"] == "sharepoint"
assert job.contentVersion and job.contentVersion.startswith("etag-")
if __name__ == "__main__":
test_bootstrap_walks_sites_and_subfolders()
test_bootstrap_reports_duplicates_on_second_run()
test_bootstrap_passes_connection_provenance()
print("OK — bootstrapSharepoint tests passed")

View file

@ -0,0 +1,110 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Unit tests for cleanEmailBody.
Covers: HTMLtext normalisation, quoted-reply removal, signature removal,
whitespace collapse and truncation. The utility is used during Outlook
bootstrap; buggy cleaning would leak quoted threads / signatures into every
embedding.
"""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
from modules.serviceCenter.services.serviceKnowledge.subTextClean import (
cleanEmailBody,
)
def test_strips_html_tags_and_scripts():
html = (
"<html><head><style>body{}</style></head>"
"<body><p>Hello <b>world</b></p>"
"<script>alert('x')</script></body></html>"
)
cleaned = cleanEmailBody(html)
assert "Hello" in cleaned
assert "world" in cleaned
assert "<" not in cleaned
assert "alert" not in cleaned
def test_strips_quoted_reply_english():
body = (
"Actual answer from me.\n\n"
"On Mon, 1 Jan 2024 at 10:00, Someone <s@x.com> wrote:\n"
"> Original question?\n"
"> Second line.\n"
)
cleaned = cleanEmailBody(body)
assert "Actual answer" in cleaned
assert "Original question" not in cleaned
assert "wrote:" not in cleaned
def test_strips_quoted_reply_german():
body = (
"Meine Antwort.\n\n"
"Am 1. Januar 2024 um 10:00 schrieb Max Muster <m@x.com>:\n"
"> Ursprüngliche Frage?\n"
)
cleaned = cleanEmailBody(body)
assert "Meine Antwort" in cleaned
assert "Ursprüngliche Frage" not in cleaned
def test_strips_signature_after_dashes():
body = (
"Kurze Nachricht.\n"
"\n"
"--\n"
"Max Muster\n"
"Vorstand, Beispiel GmbH\n"
)
cleaned = cleanEmailBody(body)
assert "Kurze Nachricht" in cleaned
assert "Beispiel GmbH" not in cleaned
def test_strips_signature_salutation_de():
body = (
"Die eigentliche Information steht hier.\n\n"
"Mit freundlichen Grüßen\n"
"Max Muster"
)
cleaned = cleanEmailBody(body)
assert "eigentliche Information" in cleaned
assert "Max Muster" not in cleaned
def test_truncate_to_max_chars():
body = "abc " * 5000
cleaned = cleanEmailBody(body, maxChars=200)
assert len(cleaned) <= 201 # includes trailing ellipsis
def test_empty_input_returns_empty_string():
assert cleanEmailBody("") == ""
assert cleanEmailBody(None) == "" # type: ignore[arg-type]
def test_collapses_whitespace():
body = "A lot of spaces\n\n\n\nand blank lines"
cleaned = cleanEmailBody(body)
assert " " not in cleaned
assert "\n\n\n" not in cleaned
if __name__ == "__main__":
test_strips_html_tags_and_scripts()
test_strips_quoted_reply_english()
test_strips_quoted_reply_german()
test_strips_signature_after_dashes()
test_strips_signature_salutation_de()
test_truncate_to_max_chars()
test_empty_input_returns_empty_string()
test_collapses_whitespace()
print("OK — cleanEmailBody tests passed")

View file

@ -0,0 +1,119 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Purge tests for KnowledgeObjects.deleteFileContentIndexByConnectionId.
Ensures that a `connection.revoked` event wipes every FileContentIndex + chunk
linked to the given connectionId while leaving entries from other connections
(or upload-files with connectionId=None) intact.
"""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
from modules.datamodels.datamodelKnowledge import FileContentIndex, ContentChunk
from modules.interfaces.interfaceDbKnowledge import KnowledgeObjects
class _FakeDb:
"""Minimal in-memory stand-in for ``KnowledgeObjects.db``.
Supports just the subset of APIs that deleteFileContentIndexByConnectionId
touches: getRecordset(FileContentIndex|ContentChunk, ...) + recordDelete.
"""
def __init__(self):
self.indexRows: dict = {}
self.chunks: dict = {}
def addIndex(self, row: dict) -> None:
self.indexRows[row["id"]] = row
def addChunk(self, row: dict) -> None:
self.chunks[row["id"]] = row
def getRecordset(self, modelClass, recordFilter=None, **_):
filter_ = recordFilter or {}
if modelClass is FileContentIndex:
rows = list(self.indexRows.values())
elif modelClass is ContentChunk:
rows = list(self.chunks.values())
else:
return []
def match(row):
for k, v in filter_.items():
if row.get(k) != v:
return False
return True
return [r for r in rows if match(r)]
def recordDelete(self, modelClass, recordId):
if modelClass is FileContentIndex:
return self.indexRows.pop(recordId, None) is not None
if modelClass is ContentChunk:
return self.chunks.pop(recordId, None) is not None
return False
def _buildKnowledge():
"""Instantiate KnowledgeObjects without triggering the real DB bootstrap."""
ko = KnowledgeObjects.__new__(KnowledgeObjects)
ko.currentUser = None
ko.userId = None
ko._scopeCache = {}
ko.db = _FakeDb()
return ko
def test_purge_by_connection_removes_only_matching_rows():
ko = _buildKnowledge()
ko.db.addIndex({"id": "sp1", "connectionId": "cx", "mandateId": "m1", "sourceKind": "sharepoint_item"})
ko.db.addIndex({"id": "sp2", "connectionId": "cx", "mandateId": "m1", "sourceKind": "sharepoint_item"})
ko.db.addIndex({"id": "upload", "connectionId": None, "mandateId": "m1", "sourceKind": "file"})
ko.db.addIndex({"id": "other", "connectionId": "cy", "mandateId": "m1", "sourceKind": "outlook_message"})
ko.db.addChunk({"id": "c1", "fileId": "sp1"})
ko.db.addChunk({"id": "c2", "fileId": "sp1"})
ko.db.addChunk({"id": "c3", "fileId": "sp2"})
ko.db.addChunk({"id": "c4", "fileId": "upload"})
ko.db.addChunk({"id": "c5", "fileId": "other"})
result = ko.deleteFileContentIndexByConnectionId("cx")
assert result == {"indexRows": 2, "chunks": 3}
assert "sp1" not in ko.db.indexRows
assert "sp2" not in ko.db.indexRows
assert "upload" in ko.db.indexRows
assert "other" in ko.db.indexRows
assert set(ko.db.chunks.keys()) == {"c4", "c5"}
def test_purge_with_empty_connection_id_is_a_noop():
ko = _buildKnowledge()
ko.db.addIndex({"id": "sp1", "connectionId": "cx"})
ko.db.addChunk({"id": "c1", "fileId": "sp1"})
result = ko.deleteFileContentIndexByConnectionId("")
assert result == {"indexRows": 0, "chunks": 0}
assert "sp1" in ko.db.indexRows
def test_purge_unknown_connection_returns_zero():
ko = _buildKnowledge()
ko.db.addIndex({"id": "sp1", "connectionId": "cx"})
result = ko.deleteFileContentIndexByConnectionId("nope")
assert result == {"indexRows": 0, "chunks": 0}
assert "sp1" in ko.db.indexRows
if __name__ == "__main__":
test_purge_by_connection_removes_only_matching_rows()
test_purge_with_empty_connection_id_is_a_noop()
test_purge_unknown_connection_returns_zero()
print("OK — connection-purge tests passed")

View file

@ -0,0 +1,124 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Test that runExtraction preserves per-part granularity when mergeStrategy=None.
The default MergeStrategy concatenates all text parts into a single ContentPart, which
collapses multi-page documents into one blob. This destroys RAG retrieval because every
document ends up as a single ContentChunk with a "blurred average" embedding.
Ingestion pipelines (requestIngestion callers) MUST pass mergeStrategy=None to preserve
per-page / per-section chunks.
"""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
from modules.datamodels.datamodelExtraction import (
ContentPart,
ExtractionOptions,
MergeStrategy,
)
from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction
from modules.serviceCenter.services.serviceExtraction.subRegistry import (
ChunkerRegistry,
Extractor,
ExtractorRegistry,
)
class _FakeMultiPagePdfExtractor(Extractor):
"""Emits one text ContentPart per simulated page."""
def __init__(self, pageCount: int = 10):
self.pageCount = pageCount
def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
return mimeType == "application/pdf"
def getSupportedExtensions(self):
return [".pdf"]
def getSupportedMimeTypes(self):
return ["application/pdf"]
def extract(self, fileBytes: bytes, context):
return [
ContentPart(
id=f"page-{i}",
parentId=None,
label=f"page_{i + 1}",
typeGroup="text",
mimeType="text/plain",
data=f"Page {i + 1} content — distinct semantic anchor #{i}",
metadata={"pageIndex": i, "size": 64},
)
for i in range(self.pageCount)
]
def _buildRegistry(pageCount: int) -> ExtractorRegistry:
registry = ExtractorRegistry()
fake = _FakeMultiPagePdfExtractor(pageCount)
registry.register("application/pdf", fake)
registry.register("pdf", fake)
return registry
def test_default_options_merge_all_text_parts_into_one():
"""Regression safeguard: default ExtractionOptions still merges (legacy behaviour).
Non-ingestion callers (AI processing, summarization) rely on this default.
"""
registry = _buildRegistry(pageCount=5)
extracted = runExtraction(
registry, ChunkerRegistry(), b"", "sample.pdf", "application/pdf",
ExtractionOptions(),
)
textParts = [p for p in extracted.parts if p.typeGroup == "text"]
assert len(textParts) == 1, (
f"Default options should merge all text parts into one, got {len(textParts)}"
)
assert "Page 1" in textParts[0].data and "Page 5" in textParts[0].data, (
"Merged text should contain content from all pages"
)
print("test_default_options_merge_all_text_parts_into_one [PASS]")
def test_merge_none_preserves_all_text_parts():
"""Core fix: mergeStrategy=None preserves per-page granularity for RAG ingestion."""
registry = _buildRegistry(pageCount=500)
extracted = runExtraction(
registry, ChunkerRegistry(), b"", "sample.pdf", "application/pdf",
ExtractionOptions(mergeStrategy=None),
)
textParts = [p for p in extracted.parts if p.typeGroup == "text"]
assert len(textParts) == 500, (
f"mergeStrategy=None should preserve all 500 text parts, got {len(textParts)}"
)
assert textParts[0].label == "page_1"
assert textParts[-1].label == "page_500"
print("test_merge_none_preserves_all_text_parts [PASS]")
def test_explicit_merge_strategy_still_merges():
"""Callers can still opt in to merging by passing an explicit MergeStrategy."""
registry = _buildRegistry(pageCount=3)
extracted = runExtraction(
registry, ChunkerRegistry(), b"", "sample.pdf", "application/pdf",
ExtractionOptions(mergeStrategy=MergeStrategy()),
)
textParts = [p for p in extracted.parts if p.typeGroup == "text"]
assert len(textParts) == 1, (
f"Explicit MergeStrategy should merge, got {len(textParts)} parts"
)
print("test_explicit_merge_strategy_still_merges [PASS]")
if __name__ == "__main__":
test_default_options_merge_all_text_parts_into_one()
test_merge_none_preserves_all_text_parts()
test_explicit_merge_strategy_still_merges()
print("\nAll merge-strategy tests passed.")

View file

@ -0,0 +1,81 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Test that _computeIngestionHash is stable across re-extractions of the same source.
Extractors generate fresh contentObjectIds (uuid.uuid4()) per run. The ingestion
hash MUST therefore be derived from content (contentType + data + order) only
otherwise idempotency (AC4) silently fails: every re-extraction looks "new" and
triggers full re-embedding.
"""
import os
import sys
import uuid
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import (
_computeIngestionHash,
)
def _makeObjects(seed: str = "alpha"):
"""Build a synthetic contentObjects list as routeDataFiles._autoIndexFile would."""
return [
{
"contentObjectId": str(uuid.uuid4()),
"contentType": "text",
"data": f"Page 1 of {seed}",
},
{
"contentObjectId": str(uuid.uuid4()),
"contentType": "text",
"data": f"Page 2 of {seed}",
},
{
"contentObjectId": str(uuid.uuid4()),
"contentType": "binary",
"data": "<image-bytes-as-b64>",
},
]
def test_hash_stable_across_uuid_regeneration():
"""Same content + different contentObjectIds → same hash."""
a = _makeObjects("alpha")
b = _makeObjects("alpha") # identical data, fresh UUIDs
assert [o["contentObjectId"] for o in a] != [o["contentObjectId"] for o in b]
assert _computeIngestionHash(a) == _computeIngestionHash(b)
def test_hash_changes_when_data_changes():
a = _makeObjects("alpha")
b = _makeObjects("beta")
assert _computeIngestionHash(a) != _computeIngestionHash(b)
def test_hash_is_order_sensitive():
"""Reordered pages produce a different hash (different document)."""
a = _makeObjects("alpha")
b = list(reversed(a))
assert _computeIngestionHash(a) != _computeIngestionHash(b)
def test_hash_distinguishes_text_vs_binary_with_same_payload():
a = [{"contentObjectId": "x", "contentType": "text", "data": "hello"}]
b = [{"contentObjectId": "x", "contentType": "binary", "data": "hello"}]
assert _computeIngestionHash(a) != _computeIngestionHash(b)
def test_hash_handles_empty_input():
assert _computeIngestionHash([]) == _computeIngestionHash([])
if __name__ == "__main__":
test_hash_stable_across_uuid_regeneration()
test_hash_changes_when_data_changes()
test_hash_is_order_sensitive()
test_hash_distinguishes_text_vs_binary_with_same_payload()
test_hash_handles_empty_input()
print("OK — all 5 ingestion-hash stability tests passed")

View file

@ -0,0 +1,235 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Unit tests for KnowledgeIngestionConsumer event dispatch.
- `connection.established` enqueue a `connection.bootstrap` job.
- `connection.revoked` synchronous purge via KnowledgeObjects.
"""
import asyncio
import os
import sys
import types
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
from modules.serviceCenter.services.serviceKnowledge import subConnectorIngestConsumer as consumer
def _resetRegistration(monkeypatch):
"""Force the module-level guard to register fresh in each test."""
monkeypatch.setattr(consumer, "_registered", False)
def test_onConnectionEstablished_enqueues_bootstrap(monkeypatch):
startedJobs = []
async def _fakeStartJob(jobType, payload, **kwargs):
startedJobs.append({"jobType": jobType, "payload": payload, "kwargs": kwargs})
return "job-1"
monkeypatch.setattr(consumer, "startJob", _fakeStartJob)
consumer._onConnectionEstablished(
connectionId="c1", authority="msft", userId="u1"
)
# Drain pending tasks created by the consumer.
loop = asyncio.new_event_loop()
try:
asyncio.set_event_loop(loop)
# If the consumer created a Task on a closed loop the fake startJob
# was still called synchronously via asyncio.run — in either case we
# check the recorded call.
finally:
loop.close()
assert len(startedJobs) == 1
assert startedJobs[0]["jobType"] == consumer.BOOTSTRAP_JOB_TYPE
assert startedJobs[0]["payload"]["connectionId"] == "c1"
assert startedJobs[0]["payload"]["authority"] == "msft"
assert startedJobs[0]["kwargs"]["triggeredBy"] == "u1"
def test_onConnectionEstablished_ignores_missing_id(monkeypatch):
called = []
async def _fakeStartJob(*a, **kw):
called.append(1)
return "x"
monkeypatch.setattr(consumer, "startJob", _fakeStartJob)
consumer._onConnectionEstablished(connectionId="", authority="msft")
assert called == []
def test_onConnectionRevoked_runs_sync_purge(monkeypatch):
class _FakeKnowledge:
def __init__(self):
self.calls = []
def deleteFileContentIndexByConnectionId(self, cid):
self.calls.append(cid)
return {"indexRows": 2, "chunks": 5}
fakeKnow = _FakeKnowledge()
def _fakeGetInterface(_user=None):
return fakeKnow
monkeypatch.setattr(consumer, "getKnowledgeInterface", _fakeGetInterface)
consumer._onConnectionRevoked(
connectionId="c1", authority="msft", userId="u1", reason="disconnected"
)
assert fakeKnow.calls == ["c1"]
def test_onConnectionRevoked_ignores_missing_id(monkeypatch):
seen = []
def _fakeGetInterface(_user=None):
class _K:
def deleteFileContentIndexByConnectionId(self, cid):
seen.append(cid)
return {"indexRows": 0, "chunks": 0}
return _K()
monkeypatch.setattr(consumer, "getKnowledgeInterface", _fakeGetInterface)
consumer._onConnectionRevoked(connectionId="")
assert seen == []
def test_bootstrap_job_skips_unsupported_authority(monkeypatch):
async def _run():
result = await consumer._bootstrapJobHandler(
{"payload": {"connectionId": "c1", "authority": "slack"}},
lambda *_: None,
)
return result
result = asyncio.run(_run())
assert result["skipped"] is True
assert result["authority"] == "slack"
assert result["reason"] == "unsupported_authority"
def test_bootstrap_job_dispatches_msft_parts(monkeypatch):
calls = {"sp": 0, "ol": 0}
async def _fakeSp(connectionId, progressCb=None):
calls["sp"] += 1
return {"indexed": 1}
async def _fakeOl(connectionId, progressCb=None):
calls["ol"] += 1
return {"indexed": 2}
fakeSharepoint = types.ModuleType("subConnectorSyncSharepoint")
fakeSharepoint.bootstrapSharepoint = _fakeSp
fakeOutlook = types.ModuleType("subConnectorSyncOutlook")
fakeOutlook.bootstrapOutlook = _fakeOl
monkeypatch.setitem(
sys.modules,
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint",
fakeSharepoint,
)
monkeypatch.setitem(
sys.modules,
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncOutlook",
fakeOutlook,
)
async def _run():
return await consumer._bootstrapJobHandler(
{"payload": {"connectionId": "c1", "authority": "msft"}},
lambda *_: None,
)
result = asyncio.run(_run())
assert calls == {"sp": 1, "ol": 1}
assert result["sharepoint"] == {"indexed": 1}
assert result["outlook"] == {"indexed": 2}
def test_bootstrap_job_dispatches_google_parts(monkeypatch):
calls = {"gd": 0, "gm": 0}
async def _fakeGd(connectionId, progressCb=None):
calls["gd"] += 1
return {"indexed": 7}
async def _fakeGm(connectionId, progressCb=None):
calls["gm"] += 1
return {"indexed": 11}
fakeGdrive = types.ModuleType("subConnectorSyncGdrive")
fakeGdrive.bootstrapGdrive = _fakeGd
fakeGmail = types.ModuleType("subConnectorSyncGmail")
fakeGmail.bootstrapGmail = _fakeGm
monkeypatch.setitem(
sys.modules,
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGdrive",
fakeGdrive,
)
monkeypatch.setitem(
sys.modules,
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail",
fakeGmail,
)
async def _run():
return await consumer._bootstrapJobHandler(
{"payload": {"connectionId": "c1", "authority": "google"}},
lambda *_: None,
)
result = asyncio.run(_run())
assert calls == {"gd": 1, "gm": 1}
assert result["drive"] == {"indexed": 7}
assert result["gmail"] == {"indexed": 11}
def test_bootstrap_job_dispatches_clickup_part(monkeypatch):
calls = {"cu": 0}
async def _fakeCu(connectionId, progressCb=None):
calls["cu"] += 1
return {"indexed": 4}
fakeClickup = types.ModuleType("subConnectorSyncClickup")
fakeClickup.bootstrapClickup = _fakeCu
monkeypatch.setitem(
sys.modules,
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup",
fakeClickup,
)
async def _run():
return await consumer._bootstrapJobHandler(
{"payload": {"connectionId": "c1", "authority": "clickup"}},
lambda *_: None,
)
result = asyncio.run(_run())
assert calls == {"cu": 1}
assert result["clickup"] == {"indexed": 4}
if __name__ == "__main__":
# Usable without pytest fixtures for a quick smoke run.
class _MP:
def __init__(self):
self.undos = []
def setattr(self, target, name_or_value, value=None):
if value is None:
# target is an object, name_or_value is value → no, original signature
raise SystemExit("use pytest monkeypatch in CLI")
self.undos.append((target, name_or_value, getattr(target, name_or_value)))
setattr(target, name_or_value, value)
def setitem(self, mapping, key, value):
self.undos.append((mapping, key, mapping.get(key)))
mapping[key] = value
print("Run via pytest: pytest tests/unit/services/test_knowledge_ingest_consumer.py")

View file

@ -0,0 +1,298 @@
#!/usr/bin/env python3
"""Unit tests for P1d: consent gating, preference parsing, and walker behaviour.
Tests
-----
1. Bootstrap runner skips when ``knowledgeIngestionEnabled=False``.
2. ``loadConnectionPrefs`` returns safe defaults when preferences are absent.
3. ``loadConnectionPrefs`` maps all §2.6 keys correctly from a full prefs dict.
4. Gmail walker passes ``neutralize=True`` and ``mailContentDepth`` to IngestionJob.
5. Gmail walker produces only a header content-object when depth="metadata".
6. ClickUp walker skips description when scope="titles".
"""
from __future__ import annotations
import asyncio
import os
import sys
import types
import unittest
from typing import Any, Dict, Optional
from unittest.mock import AsyncMock, MagicMock, patch
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.."))
# ---------------------------------------------------------------------------
# 1. Bootstrap runner consent gate
# ---------------------------------------------------------------------------
class TestBootstrapConsentGate(unittest.TestCase):
"""_bootstrapJobHandler must no-op when knowledgeIngestionEnabled is False."""
def _makeJob(self, connectionId="c-test", authority="google"):
return {"payload": {"connectionId": connectionId, "authority": authority}}
def _makeConn(self, enabled: bool):
conn = MagicMock()
conn.knowledgeIngestionEnabled = enabled
return conn
def test_skips_when_consent_disabled(self):
from modules.serviceCenter.services.serviceKnowledge import subConnectorIngestConsumer as sut
fake_root = MagicMock()
fake_root.getUserConnectionById.return_value = self._makeConn(False)
with patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=fake_root):
result = asyncio.get_event_loop().run_until_complete(
sut._bootstrapJobHandler(self._makeJob(), lambda *a: None)
)
assert result.get("skipped") is True
assert result.get("reason") == "consent_disabled"
fake_root.getUserConnectionById.assert_called_once_with("c-test")
def test_proceeds_when_consent_enabled(self):
"""When consent is enabled, the handler should call at least one walker."""
from modules.serviceCenter.services.serviceKnowledge import subConnectorIngestConsumer as sut
fake_root = MagicMock()
fake_root.getUserConnectionById.return_value = self._makeConn(True)
# Patch the inner walker so it doesn't do real I/O.
async def _fakeBootstrap(**kwargs):
return {"indexed": 0}
with (
patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=fake_root),
patch(
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGdrive.bootstrapGdrive",
new=AsyncMock(return_value={"indexed": 0}),
),
patch(
"modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail.bootstrapGmail",
new=AsyncMock(return_value={"indexed": 0}),
),
):
result = asyncio.get_event_loop().run_until_complete(
sut._bootstrapJobHandler(self._makeJob(authority="google"), lambda *a: None)
)
# Should not have 'skipped' at the top level.
assert result.get("skipped") is not True
assert result.get("authority") == "google"
# ---------------------------------------------------------------------------
# 2 + 3. loadConnectionPrefs
# ---------------------------------------------------------------------------
class TestLoadConnectionPrefs(unittest.TestCase):
def _makeConn(self, prefs: Optional[Dict[str, Any]]):
conn = MagicMock()
conn.knowledgePreferences = prefs
return conn
def _mockRoot(self, prefs):
root = MagicMock()
root.getUserConnectionById.return_value = self._makeConn(prefs)
return root
def test_returns_safe_defaults_when_prefs_none(self):
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import (
ConnectionIngestionPrefs,
loadConnectionPrefs,
)
with patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=self._mockRoot(None)):
prefs = loadConnectionPrefs("x")
assert prefs.neutralizeBeforeEmbed is False
assert prefs.mailContentDepth == "full"
assert prefs.mailIndexAttachments is False
assert prefs.maxAgeDays == 90
assert prefs.clickupScope == "title_description"
assert prefs.gmailEnabled is True
assert prefs.driveEnabled is True
def test_maps_all_keys(self):
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
raw = {
"neutralizeBeforeEmbed": True,
"mailContentDepth": "metadata",
"mailIndexAttachments": True,
"filesIndexBinaries": False,
"clickupScope": "with_comments",
"maxAgeDays": 30,
"surfaceToggles": {
"google": {"gmail": False, "drive": True},
"msft": {"sharepoint": False, "outlook": True},
},
}
with patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=self._mockRoot(raw)):
prefs = loadConnectionPrefs("x")
assert prefs.neutralizeBeforeEmbed is True
assert prefs.mailContentDepth == "metadata"
assert prefs.mailIndexAttachments is True
assert prefs.filesIndexBinaries is False
assert prefs.clickupScope == "with_comments"
assert prefs.maxAgeDays == 30
assert prefs.gmailEnabled is False
assert prefs.driveEnabled is True
assert prefs.sharepointEnabled is False
assert prefs.outlookEnabled is True
def test_invalid_depth_falls_back_to_default(self):
from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
raw = {"mailContentDepth": "everything_please"}
with patch("modules.interfaces.interfaceDbApp.getRootInterface", return_value=self._mockRoot(raw)):
prefs = loadConnectionPrefs("x")
assert prefs.mailContentDepth == "full"
# ---------------------------------------------------------------------------
# 4. Gmail walker passes neutralize + mailContentDepth to IngestionJob
# ---------------------------------------------------------------------------
class TestGmailWalkerPrefs(unittest.TestCase):
def _make_message(self, *, subject="Test", snippet="hello", body_text="full body"):
import base64
encoded = base64.urlsafe_b64encode(body_text.encode()).decode()
return {
"id": "msg-1",
"historyId": "h-42",
"threadId": "t-1",
"snippet": snippet,
"payload": {
"mimeType": "multipart/alternative",
"headers": [
{"name": "Subject", "value": subject},
{"name": "From", "value": "alice@example.com"},
{"name": "To", "value": "bob@example.com"},
{"name": "Date", "value": "Mon, 20 Apr 2026 10:00:00 +0000"},
],
"parts": [
{
"mimeType": "text/plain",
"body": {"data": encoded},
}
],
},
}
def test_neutralize_flag_forwarded(self):
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
GmailBootstrapLimits,
_ingestMessage,
GmailBootstrapResult,
)
from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
captured_jobs = []
async def fake_requestIngestion(job: IngestionJob):
captured_jobs.append(job)
return MagicMock(status="indexed", error=None)
ks = MagicMock()
ks.requestIngestion = fake_requestIngestion
limits = GmailBootstrapLimits(neutralize=True, mailContentDepth="full")
result = GmailBootstrapResult(connectionId="c-1")
asyncio.get_event_loop().run_until_complete(
_ingestMessage(
googleGetFn=AsyncMock(return_value={}),
knowledgeService=ks,
connectionId="c-1",
mandateId="",
userId="u-1",
labelId="INBOX",
message=self._make_message(),
limits=limits,
result=result,
progressCb=None,
)
)
assert len(captured_jobs) == 1
assert captured_jobs[0].neutralize is True
def test_metadata_depth_yields_only_header(self):
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
_buildContentObjects,
)
message = self._make_message(snippet="hi", body_text="should be excluded")
parts = _buildContentObjects(message, maxBodyChars=4000, mailContentDepth="metadata")
ids = [p["contentObjectId"] for p in parts]
assert ids == ["header"]
def test_snippet_depth_yields_header_and_snippet(self):
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncGmail import (
_buildContentObjects,
)
message = self._make_message(snippet="hi", body_text="should be excluded")
parts = _buildContentObjects(message, maxBodyChars=4000, mailContentDepth="snippet")
ids = [p["contentObjectId"] for p in parts]
assert "header" in ids
assert "snippet" in ids
assert "body" not in ids
# ---------------------------------------------------------------------------
# 5. ClickUp walker respects clickupScope="titles"
# ---------------------------------------------------------------------------
class TestClickupWalkerScope(unittest.TestCase):
def _make_task(self):
return {
"id": "task-1",
"name": "Ship feature X",
"date_updated": "1713888000000",
"description": "This should be omitted",
"text_content": "Also omitted",
"status": {"status": "open"},
"assignees": [],
"tags": [],
"list": {"name": "Backlog"},
"folder": {},
"space": {"name": "Engineering"},
}
def test_titles_scope_omits_description(self):
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup import (
ClickupBootstrapLimits,
_buildContentObjects,
)
limits = ClickupBootstrapLimits(clickupScope="titles")
parts = _buildContentObjects(self._make_task(), limits)
ids = [p["contentObjectId"] for p in parts]
assert ids == ["header"]
assert "description" not in ids
def test_with_description_scope_includes_description(self):
from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncClickup import (
ClickupBootstrapLimits,
_buildContentObjects,
)
limits = ClickupBootstrapLimits(clickupScope="title_description")
parts = _buildContentObjects(self._make_task(), limits)
ids = [p["contentObjectId"] for p in parts]
assert "header" in ids
assert "description" in ids
if __name__ == "__main__":
unittest.main()

View file

@ -0,0 +1,206 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""Unit tests: universal action parameter validation + coercion.
This is the single source of truth for the action parameter contract:
every workflow action (called via the agent, the workflow graph, or REST)
runs through ``validateAndCoerceParameters`` before its body executes.
The tests pin three groups of behaviour:
1. **Required-parameter enforcement** missing required params raise a
typed ``InvalidActionParameterError`` instead of an opaque downstream
error.
2. **Ref-payload normalization** the agent's typed tool schema delivers
``FeatureInstanceRef`` as ``{id: ..., featureCode: ...}``, but actions
expect a bare UUID string. Collapsing happens here, not in N action
bodies.
3. **Primitive coercion** ``"true"``/``"12"``/``"3.14"`` from JSON-shaped
payloads are coerced to bool/int/float, removing ad-hoc branches.
Unknown extra keys (e.g. ``parentOperationId``) flow through unchanged so
the executor can keep injecting cross-cutting context.
"""
from __future__ import annotations
import pytest
from modules.datamodels.datamodelWorkflowActions import (
WorkflowActionDefinition, WorkflowActionParameter,
)
from modules.shared.frontendTypes import FrontendType
from modules.workflows.processing.shared.parameterValidation import (
InvalidActionParameterError, validateAndCoerceParameters,
)
def _makeActionDef(actionId: str = "trustee.refreshAccountingData", **paramDefs) -> WorkflowActionDefinition:
"""Build a real WorkflowActionDefinition; we only care about parameters."""
parameters = {
name: WorkflowActionParameter(
name=name,
type=spec["type"],
frontendType=FrontendType.TEXT,
required=spec.get("required", False),
description=spec.get("description", ""),
)
for name, spec in paramDefs.items()
}
return WorkflowActionDefinition(
actionId=actionId,
description="Test action",
parameters=parameters,
execute=lambda *_a, **_kw: None,
)
class TestRequiredEnforcement:
def test_missingRequiredRaises(self):
actionDef = _makeActionDef(
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
)
with pytest.raises(InvalidActionParameterError) as excinfo:
validateAndCoerceParameters(actionDef, {})
assert excinfo.value.paramName == "featureInstanceId"
assert "required" in excinfo.value.reason.lower()
assert "trustee.refreshAccountingData.featureInstanceId" in str(excinfo.value)
def test_optionalMissingIsFine(self):
actionDef = _makeActionDef(
forceRefresh={"type": "bool", "required": False},
)
result = validateAndCoerceParameters(actionDef, {})
assert result == {}
def test_requiredNoneCountsAsMissing(self):
"""Explicit ``None`` for a required param is missing, not "unset"."""
actionDef = _makeActionDef(
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
)
with pytest.raises(InvalidActionParameterError):
validateAndCoerceParameters(actionDef, {"featureInstanceId": None})
class TestRefNormalization:
"""Trustee bug regression: agent passed `{id: ..., featureCode: ...}` and
Postgres failed with "can't adapt type 'dict'", which the connector
silently turned into "no record found"."""
def test_collapsesDictWithIdToString(self):
actionDef = _makeActionDef(
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
)
result = validateAndCoerceParameters(actionDef, {
"featureInstanceId": {
"id": "b7574103-f4a3-4894-8c23-74bd0d0e83a5",
"featureCode": "trustee",
"label": "Demo AG",
},
})
assert result["featureInstanceId"] == "b7574103-f4a3-4894-8c23-74bd0d0e83a5"
def test_passThroughString(self):
"""Workflow execution path passes a plain UUID; must not break."""
actionDef = _makeActionDef(
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
)
uuid = "b7574103-f4a3-4894-8c23-74bd0d0e83a5"
result = validateAndCoerceParameters(actionDef, {"featureInstanceId": uuid})
assert result["featureInstanceId"] == uuid
def test_dictWithoutIdRaises(self):
actionDef = _makeActionDef(
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
)
with pytest.raises(InvalidActionParameterError) as excinfo:
validateAndCoerceParameters(actionDef, {
"featureInstanceId": {"featureCode": "trustee", "label": "Demo"},
})
assert "id" in excinfo.value.reason
def test_otherDictTypeRaises(self):
actionDef = _makeActionDef(
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
)
with pytest.raises(InvalidActionParameterError):
validateAndCoerceParameters(actionDef, {"featureInstanceId": 12345})
def test_connectionRefAlsoCollapses(self):
"""Same logic applies to every Ref-Schema, not just FeatureInstanceRef."""
actionDef = _makeActionDef(
actionId="msft.readEmails",
connection={"type": "ConnectionRef", "required": True},
)
result = validateAndCoerceParameters(actionDef, {
"connection": {"id": "conn-uuid-123", "authority": "msft", "label": "Outlook"},
})
assert result["connection"] == "conn-uuid-123"
class TestPrimitiveCoercion:
def test_boolFromTrueString(self):
actionDef = _makeActionDef(forceRefresh={"type": "bool", "required": False})
result = validateAndCoerceParameters(actionDef, {"forceRefresh": "true"})
assert result["forceRefresh"] is True
def test_boolFromFalseString(self):
actionDef = _makeActionDef(forceRefresh={"type": "bool", "required": False})
result = validateAndCoerceParameters(actionDef, {"forceRefresh": "false"})
assert result["forceRefresh"] is False
def test_boolPassthrough(self):
actionDef = _makeActionDef(forceRefresh={"type": "bool", "required": False})
assert validateAndCoerceParameters(actionDef, {"forceRefresh": True})["forceRefresh"] is True
def test_boolBadValueRaises(self):
actionDef = _makeActionDef(forceRefresh={"type": "bool", "required": False})
with pytest.raises(InvalidActionParameterError):
validateAndCoerceParameters(actionDef, {"forceRefresh": "maybe"})
def test_intFromString(self):
actionDef = _makeActionDef(periodMonth={"type": "int", "required": False})
assert validateAndCoerceParameters(actionDef, {"periodMonth": "12"})["periodMonth"] == 12
def test_intBadValueRaises(self):
actionDef = _makeActionDef(periodMonth={"type": "int", "required": False})
with pytest.raises(InvalidActionParameterError):
validateAndCoerceParameters(actionDef, {"periodMonth": "twelve"})
def test_floatFromString(self):
actionDef = _makeActionDef(threshold={"type": "float", "required": False})
assert validateAndCoerceParameters(actionDef, {"threshold": "0.75"})["threshold"] == 0.75
class TestUnknownAndOtherTypes:
def test_unknownKeysPassThrough(self):
"""The executor injects parentOperationId, expectedDocumentFormats, etc.
Validation must not strip them."""
actionDef = _makeActionDef(
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
)
result = validateAndCoerceParameters(actionDef, {
"featureInstanceId": "uuid-123",
"parentOperationId": "action_xyz",
"expectedDocumentFormats": ["pdf", "txt"],
})
assert result["parentOperationId"] == "action_xyz"
assert result["expectedDocumentFormats"] == ["pdf", "txt"]
def test_strParamsAreUntouched(self):
actionDef = _makeActionDef(dateFrom={"type": "str", "required": False})
assert validateAndCoerceParameters(actionDef, {"dateFrom": "2025-01-01"})["dateFrom"] == "2025-01-01"
def test_listParamsAreUntouched(self):
actionDef = _makeActionDef(documentList={"type": "List[ActionDocument]", "required": False})
docs = [{"name": "a"}, {"name": "b"}]
assert validateAndCoerceParameters(actionDef, {"documentList": docs})["documentList"] is docs
def test_doesNotMutateInput(self):
"""validateAndCoerceParameters must return a new dict."""
actionDef = _makeActionDef(
featureInstanceId={"type": "FeatureInstanceRef", "required": True},
)
original = {"featureInstanceId": {"id": "uuid", "featureCode": "trustee"}}
result = validateAndCoerceParameters(actionDef, original)
assert isinstance(original["featureInstanceId"], dict)
assert result["featureInstanceId"] == "uuid"