From 75e07743a6743123f8bd214d992e1583e3267868 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Mon, 11 May 2026 21:26:20 +0200
Subject: [PATCH 1/8] google keys transferred to account poweron.center.ai

---
 env-gateway-dev.env                           |  10 +-
 env-gateway-int.env                           |  10 +-
 env-gateway-prod-forgejo.env                  |   8 +-
 env-gateway-prod.env                          |  10 +-
 modules/connectors/connectorVoiceGoogle.py    | 103 +++++++++++++++---
 .../features/commcoach/serviceCommcoach.py    |   2 +
 .../features/teamsbot/datamodelTeamsbot.py    |  13 +++
 modules/features/teamsbot/mainTeamsbot.py     |  13 +++
 .../features/teamsbot/routeFeatureTeamsbot.py |  54 +++++++++
 modules/features/teamsbot/service.py          |   1 +
 modules/interfaces/interfaceVoiceObjects.py   |  23 +++-
 modules/routes/routeVoiceGoogle.py            |  30 ++++-
 .../test_connectorVoiceGoogle_sttHelpers.py   |  23 ++++
 13 files changed, 257 insertions(+), 43 deletions(-)
 create mode 100644 tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py

diff --git a/env-gateway-dev.env b/env-gateway-dev.env
index 3709b0d8..158e00aa 100644
--- a/env-gateway-dev.env
+++ b/env-gateway-dev.env
@@ -19,7 +19,7 @@ APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2Z
 APP_TOKEN_EXPIRY=300
 
 # CORS Configuration
-APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss
+APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
 
 # Logging configuration
 APP_LOGGING_LOG_LEVEL = DEBUG
@@ -39,11 +39,11 @@ Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
 Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
 Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback
 
-Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
+Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
+Service_GOOGLE_AUTH_CLIENT_SECRET = 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
 Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback
-Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
+Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
+Service_GOOGLE_DATA_CLIENT_SECRET = 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
 Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback
 
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
diff --git a/env-gateway-int.env b/env-gateway-int.env
index d22b7d2a..33b21f1f 100644
--- a/env-gateway-int.env
+++ b/env-gateway-int.env
@@ -21,7 +21,7 @@ APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZ
 APP_TOKEN_EXPIRY=300
 
 # CORS Configuration
-APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
+APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
 
 # Logging configuration
 APP_LOGGING_LOG_LEVEL = DEBUG
@@ -41,11 +41,11 @@ Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
 Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
 Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback
 
-Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
+Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
+Service_GOOGLE_AUTH_CLIENT_SECRET = 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
 Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback
-Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
+Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
+Service_GOOGLE_AUTH_CLIENT_SECRET = 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
 Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback
 
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
diff --git a/env-gateway-prod-forgejo.env b/env-gateway-prod-forgejo.env
index e0ab455b..cc35f9c1 100644
--- a/env-gateway-prod-forgejo.env
+++ b/env-gateway-prod-forgejo.env
@@ -39,11 +39,11 @@ Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
 Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
 Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback
 
-Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
+Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
+Service_GOOGLE_AUTH_CLIENT_SECRET = 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
 Service_GOOGLE_AUTH_REDIRECT_URI = 
-Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
+Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
+Service_GOOGLE_AUTH_CLIENT_SECRET = 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
 Service_GOOGLE_DATA_REDIRECT_URI = 
 
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
diff --git a/env-gateway-prod.env b/env-gateway-prod.env
index 0183ae1f..6c840977 100644
--- a/env-gateway-prod.env
+++ b/env-gateway-prod.env
@@ -20,7 +20,7 @@ APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUl
 APP_TOKEN_EXPIRY=300
 
 # CORS Configuration
-APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
+APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
 
 # Logging configuration
 APP_LOGGING_LOG_LEVEL = DEBUG
@@ -40,11 +40,11 @@ Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
 Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
 Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback
 
-Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
+Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
+Service_GOOGLE_AUTH_CLIENT_SECRET = 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
 Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback
-Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o=
+Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
+Service_GOOGLE_AUTH_CLIENT_SECRET = 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
 Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback
 
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
diff --git a/modules/connectors/connectorVoiceGoogle.py b/modules/connectors/connectorVoiceGoogle.py
index f875c72c..3dd3221d 100644
--- a/modules/connectors/connectorVoiceGoogle.py
+++ b/modules/connectors/connectorVoiceGoogle.py
@@ -19,6 +19,30 @@ from modules.shared.voiceCatalog import getDefaultVoice as _catalogDefaultVoice
 
 logger = logging.getLogger(__name__)
 
+
+def _buildPrimarySttRecognitionFields(
+    *,
+    model: str,
+    lightweight: bool,
+) -> Dict[str, Any]:
+    """Shared fields for batch + streaming primary RecognitionConfig."""
+    base: Dict[str, Any] = {
+        "enable_automatic_punctuation": True,
+        "model": model,
+    }
+    if lightweight:
+        base["enable_word_time_offsets"] = False
+        base["enable_word_confidence"] = False
+        base["max_alternatives"] = 1
+        base["use_enhanced"] = False
+    else:
+        base["enable_word_time_offsets"] = True
+        base["enable_word_confidence"] = True
+        base["max_alternatives"] = 3
+        base["use_enhanced"] = True
+    return base
+
+
 # Gemini-TTS speaker IDs from voices.list use short names (e.g. "Kore") and require
 # SynthesisInput.prompt + VoiceSelectionParams.model_name (google-cloud-texttospeech >= 2.24.0).
 _GEMINI_TTS_DEFAULT_MODEL = "gemini-2.5-flash-tts"
@@ -73,7 +97,10 @@ class ConnectorGoogleSpeech:
                            sampleRate: int = None, channels: int = None,
                            skipFallbacks: bool = False,
                            phraseHints: Optional[list] = None,
-                           alternativeLanguages: Optional[list] = None) -> Dict:
+                           alternativeLanguages: Optional[list] = None,
+                           model: str = "latest_long",
+                           lightweight: bool = False,
+                           audioFormat: Optional[str] = None) -> Dict:
         """
         Convert speech to text using Google Cloud Speech-to-Text API.
         
@@ -82,6 +109,9 @@ class ConnectorGoogleSpeech:
             language: Language code (e.g., 'de-DE', 'en-US')
             sample_rate: Audio sample rate (auto-detected if None)
             channels: Number of audio channels (auto-detected if None)
+            model: Google recognition model (e.g. latest_long, latest_short)
+            lightweight: If True, omit word timings/confidence, single alternative, no enhanced model
+            audioFormat: If set (webm_opus, linear16, mp3, flac, wav), skip auto-detection
             
         Returns:
             Dict containing transcribed text, confidence, and metadata
@@ -92,8 +122,24 @@ class ConnectorGoogleSpeech:
                 logger.warning(f"Invalid sampleRate={sampleRate}, treating as unknown for auto-detection")
                 sampleRate = None
 
-            # Auto-detect audio format if not provided
-            if sampleRate is None or channels is None:
+            explicitFormat = (audioFormat or "").strip().lower() or None
+            if explicitFormat:
+                if channels is None:
+                    channels = 1
+                if sampleRate is None:
+                    if explicitFormat == "webm_opus":
+                        sampleRate = 48000
+                    elif explicitFormat == "linear16":
+                        sampleRate = 16000
+                    elif explicitFormat in ("mp3", "flac"):
+                        sampleRate = 44100
+                    elif explicitFormat == "wav":
+                        sampleRate = 16000
+                    else:
+                        sampleRate = 16000
+                audioFormat = explicitFormat
+                logger.info(f"STT explicit format: {audioFormat}, {sampleRate}Hz, {channels}ch")
+            elif sampleRate is None or channels is None:
                 validation = self.validateAudioFormat(audioContent)
                 if not validation["valid"]:
                     return {
@@ -156,12 +202,7 @@ class ConnectorGoogleSpeech:
                 "encoding": encoding,
                 "audio_channel_count": channels,
                 "language_code": language,
-                "enable_automatic_punctuation": True,
-                "model": "latest_long",
-                "enable_word_time_offsets": True,
-                "enable_word_confidence": True,
-                "max_alternatives": 3,
-                "use_enhanced": True,
+                **_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
             }
 
             if phraseHints:
@@ -205,8 +246,7 @@ class ConnectorGoogleSpeech:
                         sample_rate_hertz=16000,
                         audio_channel_count=1,
                         language_code=language,
-                        enable_automatic_punctuation=True,
-                        model="latest_long"
+                        **_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
                     )
                     try:
                         response = await asyncio.to_thread(
@@ -343,7 +383,7 @@ class ConnectorGoogleSpeech:
                             "error": "No recognition results (silence or unclear audio)"
                         }
 
-                models = ["latest_long", "phone_call", "latest_short"]
+                models = list(dict.fromkeys([model, "latest_long", "phone_call", "latest_short"]))
                 
                 for fallback_config in fallback_configs:
                     for model in models:
@@ -419,6 +459,9 @@ class ConnectorGoogleSpeech:
         audioQueue: asyncio.Queue,
         language: str = "de-DE",
         phraseHints: Optional[list] = None,
+        model: str = "latest_long",
+        lightweight: bool = False,
+        singleUtterance: bool = False,
     ) -> AsyncGenerator[Dict[str, Any], None]:
         """
         Stream audio chunks to Google Cloud Speech-to-Text Streaming API.
@@ -429,9 +472,13 @@ class ConnectorGoogleSpeech:
                         Send (b"", True) to signal end of stream.
             language: Language code
             phraseHints: Optional boost phrases
+            model: Google recognition model (e.g. latest_long, latest_short)
+            lightweight: If True, use non-enhanced primary config (lower latency)
+            singleUtterance: If True, end stream after first utterance (client should reconnect)
 
         Yields:
-            Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec
+            Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec;
+            optionally endOfSingleUtterance, reconnectRequired
         """
         STREAM_LIMIT_SEC = 290
         streamStartTs = time.time()
@@ -442,9 +489,7 @@ class ConnectorGoogleSpeech:
             "sample_rate_hertz": 48000,
             "audio_channel_count": 1,
             "language_code": language,
-            "enable_automatic_punctuation": True,
-            "model": "latest_long",
-            "use_enhanced": True,
+            **_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight),
         }
         if phraseHints:
             configParams["speech_contexts"] = [speech.SpeechContext(phrases=phraseHints, boost=15.0)]
@@ -453,7 +498,7 @@ class ConnectorGoogleSpeech:
         streamingConfig = speech.StreamingRecognitionConfig(
             config=recognitionConfig,
             interim_results=True,
-            single_utterance=False,
+            single_utterance=singleUtterance,
         )
 
         import queue as threadQueue
@@ -490,7 +535,22 @@ class ConnectorGoogleSpeech:
                 )
                 for response in responseStream:
                     elapsed = time.time() - streamStartTs
-                    estimatedDurationSec = totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0
+
+                    durationFromResults = 0.0
+                    for result in response.results:
+                        rt = getattr(result, "result_end_time", None)
+                        if rt is None:
+                            continue
+                        if hasattr(rt, "total_seconds"):
+                            durationFromResults = max(durationFromResults, float(rt.total_seconds()))
+                        else:
+                            durationFromResults = max(
+                                durationFromResults,
+                                float(getattr(rt, "seconds", 0)) + float(getattr(rt, "nanos", 0)) * 1e-9,
+                            )
+                    estimatedDurationSec = durationFromResults if durationFromResults > 0 else (
+                        totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0.0
+                    )
 
                     finalTexts = []
                     interimTexts = []
@@ -524,6 +584,13 @@ class ConnectorGoogleSpeech:
                             "stabilityScore": 0.0,
                             "audioDurationSec": estimatedDurationSec,
                         }), loop)
+
+                    speechEvt = getattr(response, "speech_event_type", None)
+                    if speechEvt and "END_OF_SINGLE_UTTERANCE" in str(speechEvt):
+                        asyncio.run_coroutine_threadsafe(resultOutQ.put({
+                            "endOfSingleUtterance": True,
+                            "audioDurationSec": estimatedDurationSec,
+                        }), loop)
                     if elapsed >= STREAM_LIMIT_SEC:
                         logger.info("Streaming STT approaching 5-min limit, client should reconnect")
                         asyncio.run_coroutine_threadsafe(resultOutQ.put({
diff --git a/modules/features/commcoach/serviceCommcoach.py b/modules/features/commcoach/serviceCommcoach.py
index 39b96b55..5ac3af23 100644
--- a/modules/features/commcoach/serviceCommcoach.py
+++ b/modules/features/commcoach/serviceCommcoach.py
@@ -1080,6 +1080,8 @@ class CommcoachService:
             audioContent=audioContent,
             language=language,
             skipFallbacks=True,
+            model="latest_short",
+            lightweight=True,
         )
 
         transcribedText = ""
diff --git a/modules/features/teamsbot/datamodelTeamsbot.py b/modules/features/teamsbot/datamodelTeamsbot.py
index a7a22c9b..076b0eda 100644
--- a/modules/features/teamsbot/datamodelTeamsbot.py
+++ b/modules/features/teamsbot/datamodelTeamsbot.py
@@ -111,6 +111,14 @@ class TeamsbotMeetingModule(PowerOnModel):
     defaultDirectorPrompts: Optional[str] = Field(default=None, description="JSON list of default director prompts")
     goals: Optional[str] = Field(default=None, description="Free-text goals")
     kpiTargets: Optional[str] = Field(default=None, description="JSON object with structured KPI targets")
+    defaultMeetingLink: Optional[str] = Field(
+        default=None,
+        description="Default Teams meeting URL for new sessions in this module (user can override)",
+    )
+    defaultBotName: Optional[str] = Field(
+        default=None,
+        description="Default display name for the bot when starting a session from this module",
+    )
     status: TeamsbotModuleStatus = Field(default=TeamsbotModuleStatus.ACTIVE)
 
 
@@ -257,6 +265,7 @@ class TeamsbotStartSessionRequest(BaseModel):
     """Request to start a new Teams Bot session."""
     meetingLink: str = Field(description="Teams meeting join link (e.g., https://teams.microsoft.com/l/meetup-join/...)")
     botName: Optional[str] = Field(default=None, description="Override bot name for this session")
+    moduleId: Optional[str] = Field(default=None, description="Optional MeetingModule to attach this session to")
     connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access")
     joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.")
     sessionContext: Optional[str] = Field(default=None, description="Custom context/knowledge to provide to the bot for this session (e.g. meeting agenda, documents, background info)")
@@ -277,6 +286,8 @@ class CreateMeetingModuleRequest(BaseModel):
     defaultDirectorPrompts: Optional[str] = None
     goals: Optional[str] = None
     kpiTargets: Optional[str] = None
+    defaultMeetingLink: Optional[str] = None
+    defaultBotName: Optional[str] = None
 
 
 class UpdateMeetingModuleRequest(BaseModel):
@@ -287,6 +298,8 @@ class UpdateMeetingModuleRequest(BaseModel):
     defaultDirectorPrompts: Optional[str] = None
     goals: Optional[str] = None
     kpiTargets: Optional[str] = None
+    defaultMeetingLink: Optional[str] = None
+    defaultBotName: Optional[str] = None
     status: Optional[TeamsbotModuleStatus] = None
 
 
diff --git a/modules/features/teamsbot/mainTeamsbot.py b/modules/features/teamsbot/mainTeamsbot.py
index 66bc9247..850135d6 100644
--- a/modules/features/teamsbot/mainTeamsbot.py
+++ b/modules/features/teamsbot/mainTeamsbot.py
@@ -290,6 +290,19 @@ def _runMigrations():
 
         migrated = False
 
+        # M2: MeetingModule default meeting link / bot name (additive columns)
+        if _tableExists("TeamsbotMeetingModule"):
+            for col, sqlType in (
+                ("defaultMeetingLink", "TEXT"),
+                ("defaultBotName", "TEXT"),
+            ):
+                if not _columnExists("TeamsbotMeetingModule", col):
+                    cur.execute(
+                        f'ALTER TABLE "TeamsbotMeetingModule" ADD COLUMN "{col}" {sqlType} NULL',
+                    )
+                    logger.info(f"Migration M2: Added TeamsbotMeetingModule.{col}")
+                    migrated = True
+
         # M1: Create default Adhoc modules for orphaned sessions
         #     (only runs if TeamsbotSession table exists with moduleId column
         #      and there are sessions without a moduleId)
diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py
index ab42db22..b3088f8e 100644
--- a/modules/features/teamsbot/routeFeatureTeamsbot.py
+++ b/modules/features/teamsbot/routeFeatureTeamsbot.py
@@ -280,6 +280,11 @@ async def startSession(
     mandateId = _validateInstanceAccess(instanceId, context)
     interface = _getInterface(context, instanceId)
     config = _getInstanceConfig(instanceId)
+
+    if body.moduleId:
+        mod = interface.getModule(body.moduleId)
+        if not mod or str(mod.get("instanceId") or "") != str(instanceId):
+            raise HTTPException(status_code=400, detail="Invalid moduleId for this instance")
     
     # Extract and validate meeting URL from user input (handles SafeLinks, invitation text, etc.)
     cleanMeetingUrl = _extractTeamsMeetingUrl(body.meetingLink)
@@ -288,6 +293,7 @@ async def startSession(
     sessionData = TeamsbotSession(
         instanceId=instanceId,
         mandateId=mandateId,
+        moduleId=body.moduleId,
         meetingLink=cleanMeetingUrl,
         botName=body.botName or config.botName,
         sessionContext=body.sessionContext,
@@ -426,6 +432,54 @@ async def listSessions(
     return {"sessions": sessions}
 
 
+@router.get("/{instanceId}/dashboard/stream")
+@limiter.limit("60/minute")
+async def streamDashboard(
+    request: Request,
+    instanceId: str,
+    context: RequestContext = Depends(getRequestContext),
+):
+    """
+    SSE channel for the Teamsbot dashboard: repeated snapshots of sessions and meeting modules.
+    Push interval: 3s while any own session is pending/joining/active, otherwise 20s.
+    Same session visibility rules as GET /sessions (own sessions unless platform admin).
+    """
+    _validateInstanceAccess(instanceId, context)
+    interface = _getInterface(context, instanceId)
+    userId = None if context.isPlatformAdmin else str(context.user.id)
+    activeStatuses = {
+        TeamsbotSessionStatus.PENDING.value,
+        TeamsbotSessionStatus.JOINING.value,
+        TeamsbotSessionStatus.ACTIVE.value,
+    }
+
+    async def eventGenerator():
+        while True:
+            sessionRows = []
+            try:
+                sessionRows = interface.getSessions(instanceId, includeEnded=True, userId=userId)
+                moduleRows = interface.getModules(instanceId)
+                payload = {"type": "dashboardState", "sessions": sessionRows, "modules": moduleRows}
+                yield f"data: {json.dumps(payload, default=str)}\n\n"
+            except asyncio.CancelledError:
+                raise
+            except Exception as ex:
+                logger.warning("dashboard stream tick failed: %s", ex)
+                yield f"data: {json.dumps({'type': 'error', 'message': 'dashboard_tick_failed'})}\n\n"
+            hasActive = any((s.get("status") in activeStatuses) for s in sessionRows)
+            await asyncio.sleep(3.0 if hasActive else 20.0)
+
+    return StreamingResponse(
+        eventGenerator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",
+        },
+    )
+
+
 @router.get("/{instanceId}/sessions/{sessionId}")
 @limiter.limit("30/minute")
 async def getSession(
diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py
index fe0d6c34..d520bf49 100644
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@@ -1225,6 +1225,7 @@ class TeamsbotService:
                 skipFallbacks=True,
                 phraseHints=phraseHints if phraseHints else None,
                 alternativeLanguages=["en-US"],
+                audioFormat="linear16",
             )
 
             if sttResult and sttResult.get("success") and sttResult.get("text"):
diff --git a/modules/interfaces/interfaceVoiceObjects.py b/modules/interfaces/interfaceVoiceObjects.py
index d0b6f461..03729f86 100644
--- a/modules/interfaces/interfaceVoiceObjects.py
+++ b/modules/interfaces/interfaceVoiceObjects.py
@@ -69,7 +69,10 @@ class VoiceObjects:
                           sampleRate: int = None, channels: int = None,
                           skipFallbacks: bool = False,
                           phraseHints: list = None,
-                          alternativeLanguages: list = None) -> Dict[str, Any]:
+                          alternativeLanguages: list = None,
+                          model: str = "latest_long",
+                          lightweight: bool = False,
+                          audioFormat: Optional[str] = None) -> Dict[str, Any]:
         """
         Convert speech to text using Google Cloud Speech-to-Text API.
         
@@ -81,6 +84,9 @@ class VoiceObjects:
             skipFallbacks: If True, skip fallback attempts (use when audio format is known)
             phraseHints: Optional list of phrases to boost recognition (names, terms)
             alternativeLanguages: Optional list of additional language codes for multi-language
+            model: Google STT model (e.g. latest_long, latest_short)
+            lightweight: If True, omit word-level features and enhanced model
+            audioFormat: If set (webm_opus, linear16, ...), skip format auto-detection
             
         Returns:
             Dict containing transcribed text, confidence, and metadata
@@ -97,6 +103,9 @@ class VoiceObjects:
                 skipFallbacks=skipFallbacks,
                 phraseHints=phraseHints,
                 alternativeLanguages=alternativeLanguages,
+                model=model,
+                lightweight=lightweight,
+                audioFormat=audioFormat,
             )
             
             if result["success"]:
@@ -120,13 +129,23 @@ class VoiceObjects:
         audioQueue: asyncio.Queue,
         language: str = "de-DE",
         phraseHints: Optional[list] = None,
+        model: str = "latest_long",
+        lightweight: bool = False,
+        singleUtterance: bool = False,
     ) -> AsyncGenerator[Dict[str, Any], None]:
         """
         Stream audio to Google Streaming STT and yield interim/final results.
         Billing is recorded for each final result.
         """
         connector = self._getGoogleSpeechConnector()
-        async for event in connector.streamingRecognize(audioQueue, language, phraseHints):
+        async for event in connector.streamingRecognize(
+            audioQueue,
+            language,
+            phraseHints,
+            model=model,
+            lightweight=lightweight,
+            singleUtterance=singleUtterance,
+        ):
             if event.get("isFinal") and self.billingCallback:
                 durationSec = event.get("audioDurationSec", 0)
                 priceCHF = connector.calculateSttCostCHF(durationSec)
diff --git a/modules/routes/routeVoiceGoogle.py b/modules/routes/routeVoiceGoogle.py
index 8987e73f..10185cc2 100644
--- a/modules/routes/routeVoiceGoogle.py
+++ b/modules/routes/routeVoiceGoogle.py
@@ -155,12 +155,13 @@ async def sttStream(
 
     Protocol:
       Client sends JSON:
-        {"type": "open", "language": "de-DE"}
+        {"type": "open", "language": "de-DE", "model": "latest_short", "lightweight": true, "singleUtterance": true}
         {"type": "audio", "chunk": "<base64>"}
         {"type": "close"}
       Server sends JSON:
         {"type": "interim", "text": "..."}
         {"type": "final", "text": "...", "confidence": 0.95}
+        {"type": "end_of_single_utterance", "audioDurationSec": 0.0}
         {"type": "error", "message": "..."}
         {"type": "closed"}
     """
@@ -205,7 +206,12 @@ async def sttStream(
         logger.warning(f"STT billing pre-flight skipped: {e}")
 
     audioQueue: asyncio.Queue = asyncio.Queue()
-    language = "de-DE"
+    sttOpenOptions: Dict[str, Any] = {
+        "language": "de-DE",
+        "model": "latest_long",
+        "lightweight": False,
+        "singleUtterance": False,
+    }
     streamingTask: Optional[asyncio.Task] = None
     voiceInterface: Optional[VoiceObjects] = None
 
@@ -233,10 +239,23 @@ async def sttStream(
             voiceInterface.billingCallback = _billingCb
 
         try:
-            async for event in voiceInterface.streamingSpeechToText(audioQueue, language):
+            async for event in voiceInterface.streamingSpeechToText(
+                audioQueue,
+                sttOpenOptions["language"],
+                phraseHints=None,
+                model=sttOpenOptions["model"],
+                lightweight=sttOpenOptions["lightweight"],
+                singleUtterance=sttOpenOptions["singleUtterance"],
+            ):
                 if event.get("reconnectRequired"):
                     await _sendJson({"type": "reconnect_required"})
                     return
+                if event.get("endOfSingleUtterance"):
+                    await _sendJson({
+                        "type": "end_of_single_utterance",
+                        "audioDurationSec": event.get("audioDurationSec", 0.0),
+                    })
+                    continue
                 if event.get("isFinal"):
                     if event.get("transcript"):
                         await _sendJson({"type": "final", "text": event["transcript"], "confidence": event.get("confidence", 0.0)})
@@ -258,7 +277,10 @@ async def sttStream(
             msgType = (msg.get("type") or "").strip()
 
             if msgType == "open":
-                language = msg.get("language") or "de-DE"
+                sttOpenOptions["language"] = msg.get("language") or "de-DE"
+                sttOpenOptions["model"] = msg.get("model") or "latest_long"
+                sttOpenOptions["lightweight"] = bool(msg.get("lightweight"))
+                sttOpenOptions["singleUtterance"] = bool(msg.get("singleUtterance"))
                 if streamingTask and not streamingTask.done():
                     await audioQueue.put((b"", True))
                     streamingTask.cancel()
diff --git a/tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py b/tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py
new file mode 100644
index 00000000..258dc0db
--- /dev/null
+++ b/tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2025 Patrick Motsch
+"""Unit tests for Google STT helper config (no API calls)."""
+
+from modules.connectors.connectorVoiceGoogle import _buildPrimarySttRecognitionFields
+
+
+def test_buildPrimaryStt_lightweight_stripsHeavyFeatures():
+    d = _buildPrimarySttRecognitionFields(model="latest_short", lightweight=True)
+    assert d["model"] == "latest_short"
+    assert d["enable_word_time_offsets"] is False
+    assert d["enable_word_confidence"] is False
+    assert d["max_alternatives"] == 1
+    assert d["use_enhanced"] is False
+    assert d["enable_automatic_punctuation"] is True
+
+
+def test_buildPrimaryStt_full_matchesLegacyDefaults():
+    d = _buildPrimarySttRecognitionFields(model="latest_long", lightweight=False)
+    assert d["model"] == "latest_long"
+    assert d["enable_word_time_offsets"] is True
+    assert d["enable_word_confidence"] is True
+    assert d["max_alternatives"] == 3
+    assert d["use_enhanced"] is True

From 03a6d3248bf3c449c3026f58e4cd2ca226f87027 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Mon, 11 May 2026 23:59:31 +0200
Subject: [PATCH 2/8] fixed teamsbot issues

---
 env-gateway-dev.env                           | 12 +++----
 env-gateway-int.env                           | 14 ++++----
 env-gateway-prod-forgejo.env                  | 14 ++++----
 env-gateway-prod.env                          | 14 ++++----
 modules/features/teamsbot/service.py          | 34 ++++++++++---------
 .../services/serviceAgent/agentLoop.py        |  7 +++-
 .../coreTools/_connectionTools.py             |  1 +
 .../serviceAgent/coreTools/_mediaTools.py     |  6 +++-
 .../serviceAgent/coreTools/_workspaceTools.py |  9 +++--
 .../services/serviceAgent/datamodelAgent.py   |  6 ++++
 .../services/serviceAgent/toolRegistry.py     |  3 +-
 .../services/serviceAi/mainServiceAi.py       | 13 ++++---
 .../services/serviceWeb/mainServiceWeb.py     |  9 +++--
 13 files changed, 83 insertions(+), 59 deletions(-)

diff --git a/env-gateway-dev.env b/env-gateway-dev.env
index 158e00aa..7802b33d 100644
--- a/env-gateway-dev.env
+++ b/env-gateway-dev.env
@@ -32,18 +32,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
 APP_LOGGING_BACKUP_COUNT = 5
 
 # OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP.
-Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
+Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
+Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kxaG9WY1FJaWdCbVFVaTllUlJfU3Y3MmJkRmkzMDVDWUNtZEhlNVhISzJPcy00ZUVZcklYLXFMV0dIODV3NXNSSFBKQ0ZsZllES3diTEgySDF0T1ZCbFZHREZtcXFGSWNZN1NJbzJzczRRQWxoeVNsNzlsa0VzMHJPWHUydjBBclo=
 Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback
-Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
+Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
+Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyUW96aXFVOVJlLUdyRlVvT1hVU09ILWtMZnV2M19mVUxGMnFPV3FzNTdQa3dTbHVGTDBHTk01ZThLcjh6QUR5VldVZUpfcDlZNTh5YldtLWtjTll6VzJNQ3JCQ3ZubHdmd2JvaExDOXdvQ1pjWDVQTUtFWVAtUHhwS1lFQnJXWk4=
 Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback
 
 Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = 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
+Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyd1hPd09vcVFtbVg0Sm5Nd1VYVEEtWjZMZkFndmFVS0ZlcTU0dzJnYVYzRkZWbjh0QldyZkhseDV2cUgxYkNHTzF6MXhqQlZ2N0UtbmhPeWRKUHBVdzV0Q1ROaWNuN2xjMmVzMjNZQ2ZYZ3dOTHgxaU5sTGRjVHpfakhYeWF0ZGU=
 Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback
 Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_DATA_CLIENT_SECRET = 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
+Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kySXoyd1BmTnhOd1owTUJOWm53WlZMMjFHNGJhSUwyd2NDUW9BanlRWVJPLU5jYzRlcm5QeW96d0JYUkVWVWd2dGNBVEpJbElZY2lWb0o5S0gyNnhoV1pnNXhpSFEyaklZZjcwX2lVU0ktMEJGN01DMDhXQ3k4R1BXc1Q3ejFjOEg=
 Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback
 
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
diff --git a/env-gateway-int.env b/env-gateway-int.env
index 33b21f1f..a1924fff 100644
--- a/env-gateway-int.env
+++ b/env-gateway-int.env
@@ -34,18 +34,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
 APP_LOGGING_BACKUP_COUNT = 5
 
 # OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
-Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
+Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
+Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kydlVubld1d1h6SUNSWW1aZ3p4X3Zod1NDTjhZVnVYS2lqOERGTFp2OXJ4TGRiNlRLVFpzLUVDTUhkZGhGUWdxa1djdEV5UWkyblN1UHZoaFBjaExNTEpGMG1PRGJEbDdHVll0Ungwcl9JemZ4ZXFzZUNFQmFlZi1DZFlCekU1S3E=
 Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/login/callback
-Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
+Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
+Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyS1hWZXEzUzZTTE5MUlJncVowMU95Y0hmV1hveDBZOWdLU1RIUWt3SGlXNGxVTXVKc2QyQmtmWTlJRU43ZnRDdnlDTGxQY0hTU25CWWFFdDhUem9HU0VYcTFJTVFEbVk0dUhmVzJNVlEzNTNWdjdmaW9WeUVDVW5PRmNFZEQzNTY=
 Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback
 
 Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = 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
+Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyUTUwNXNGaHRNaGxxbF9sdWJ3Q0xLYU5yOHB4Yk8zMDZvQ29yaEhWOE5JMENXRk5jb2ZBdzRKQ2ZTTld6ZlIxemhOYzN1VE10TjBDRWZEMXlLVWRNYjZ0VG5RZ3I3NWt0SEJzMzdsUmRzcVNmbktRNHZqTUF6a2EyUkVUSFJnZFE=
 Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback
-Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = 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
+Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
+Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyV1FRVjF0c0d3d0dyWU1TdW9HdXVkdHdsVWZKYTJjbGZPRDhMRjA2M0FkaUZIVmhIUmFKNjg2ekFodHd6NG80VTI3TC1icW1LZ01jWVZuQ1pKRm5nMW5UREJEaGp2Wl9oRDRCSmZVT0JpTnkwXzgwY0pkV29yczQ5akF2d1ZGcVY=
 Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback
 
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
diff --git a/env-gateway-prod-forgejo.env b/env-gateway-prod-forgejo.env
index cc35f9c1..b9c9e686 100644
--- a/env-gateway-prod-forgejo.env
+++ b/env-gateway-prod-forgejo.env
@@ -32,18 +32,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
 APP_LOGGING_BACKUP_COUNT = 5
 
 # OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
-Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
+Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
+Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyeUZORDYxOFdlNHk1N25kV3pSQVJMUVFwLUFlMzlzQjQ1eVljOTlzX184RndsTmtTV1FjdWkyQlBiUkdCbGt5S2ltZjJxa2I2dHBMdnJqZnhFSnBCampHYjB3RG5URDM1YzZSLVd6TGdaRXRVcEdadE5zM2thNV9SZy1KZDdLSHY=
 Service_MSFT_AUTH_REDIRECT_URI=https://api.poweron.swiss/api/msft/auth/login/callback
-Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
+Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
+Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySk5uMmlWczBWTE00MHBIcWlBbVJmVmc3MlBWbDA1YTFaS3psZjVLd3d1X2FvRHV0X0c5blpLV0FpY05aMTJMMzUtcG8wakF2TlM3SGQ2VjFZM3JLT1MwTlZ0bm9BRlpkbHVPQTFNaXJvazlQRzN4M2ZZNEVhV1JHV190dWluSUk=
 Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback
 
 Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = 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
+Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kybjVVZ0FldUE1NTJiY2U1N0I0aVU0Z2hfeWlYc2tTdmlxTS1NdGxsRnFHdjZVcW5RRHZkUFhzUTVyX2RaZHlrQThRdTdCRmVBelBOcDlsbFQyd19SZExuWEM5aTcwQ0FvY3ctMUlWU1pndDE0MkdzeTZZRHkwLWU3aW56LW1jS20=
 Service_GOOGLE_AUTH_REDIRECT_URI = 
-Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = 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
+Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
+Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyMnFma3VPOVJtTFFrNDRLN0NkWHY2dUZDWlJzdDVMd3p3N19IY0tWdURRRzExOGZCMjJOYmpKT1E0cTVwYlgtcVJINTY0anZPc1VoTW00cHl6NVh3ZHVTek1oT1RqWUhtamRkZ1dENWlwNTlZSU1oNWczeGdEOC1Gbk5XU2RBcmI=
 Service_GOOGLE_DATA_REDIRECT_URI = 
 
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
diff --git a/env-gateway-prod.env b/env-gateway-prod.env
index 6c840977..d42bb0f9 100644
--- a/env-gateway-prod.env
+++ b/env-gateway-prod.env
@@ -33,18 +33,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760
 APP_LOGGING_BACKUP_COUNT = 5
 
 # OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs)
-Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
+Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
+Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySFR2NjBKM084QTNpeUlyUmM4R0N0SU1BZ2x4MmVTZTVHQkVzRE9GdmFkV041MzhudFhobjU0RWNnd3lqeXpKUXA5aGtNZkhtYU12QjBtX0NjemVmdEZBdC1TbXVBSXJTcF9vMlJXd0ZNRTRKRFBMUXNjTF85eTBxakR4RVNfYmU=
 Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/login/callback
-Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4=
+Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8
+Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyNVU4cVRIZFdjS3l2S1RJVTVlc1ozQ1liZXZDX1VwdFZQUzFtS0N6UWYyeGxkNGNmY1hoaWxEUDBXVU5QR2t3Vi1ZV1A2QkxqbnpobzJwOXdzYTBZaFZYdnNkeDE1VVl0bm4weHFiLXdON2gtZzAwMTkxNWRoZldFM2djSkNHVS0=
 Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback
 
 Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = 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
+Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyUmJleVpTOF9OaFV3NGVfcWVBX2oxSjUwMWRGOFZRWFRIN1FZRzZ6U3VQMlg5a21RY1drTHh3U254LW4zM1A1cXQ1TTFWYlNoek9hSHJIeE4tbm1wU1lKRXlKNU5HVWI4VGZwTVE0VnJGaV8wZmNvdkVrMjJGeXdmZ3UyNmVXN1E=
 Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback
-Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
-Service_GOOGLE_AUTH_CLIENT_SECRET = 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
+Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
+Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyY2pxMDh0U0RqWERianBMTTNtSUZPSzhKUzh4S0RTenR2MmxnRDlvQzJjbDVTczRWLUJtVnhxWTE2MmUxQjJia2xJcVUzVlFlUnpma040NFdHRzVNRUt0OXR0c2JkTkRmQ1RIYllXbXFFaExIQWNycFVHbUxHbmtYOVhOVUV2MFY=
 Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback
 
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py
index d520bf49..8017e6dc 100644
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@@ -83,10 +83,10 @@ _EPHEMERAL_PHRASE_INTENTS: Dict[str, str] = {
     ),
     "agentRound": (
         "One short sentence (max ~14 words) the assistant says BETWEEN rounds "
-        "of a longer agent task to signal that work is still in progress. "
-        "Include the placeholder tokens '{round}' and '{maxRounds}' so the "
-        "caller can substitute the actual numbers — e.g. 'Step {round} of "
-        "{maxRounds}, still working.'"
+        "of a longer agent task to update the audience on what it is doing. "
+        "Include the placeholder token '{activity}' which will be filled with "
+        "the current activity — e.g. 'I am {activity}, one moment...' or "
+        "'Currently {activity}, almost there...'. Do NOT include step numbers."
     ),
 }
 
@@ -1253,19 +1253,18 @@ class TeamsbotService:
 
     def _registerSpeakerHint(self, speaker: str, text: str, sessionId: str = ""):
         """Track current speaker from captions for STT attribution.
-        When the first non-bot caption arrives, retroactively attributes
-        any STT segments that were created before a speaker was known."""
+        Retroactively attributes any unattributed STT segments whenever a
+        new non-bot caption speaker arrives (not just the first time)."""
         if not speaker:
             return
         normalizedSpeaker = speaker.strip()
         if not normalizedSpeaker or self._isBotSpeaker(normalizedSpeaker):
             return
 
-        prevSpeaker = self._lastCaptionSpeaker
         self._lastCaptionSpeaker = normalizedSpeaker
         self._knownSpeakers.add(normalizedSpeaker)
 
-        if prevSpeaker is None and self._unattributedTranscriptIds:
+        if self._unattributedTranscriptIds:
             from . import interfaceFeatureTeamsbot as interfaceDb
             interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId)
             for tid in self._unattributedTranscriptIds:
@@ -3244,15 +3243,17 @@ class TeamsbotService:
         return await self._pickEphemeralPhrase("agentBusy")
 
     async def _interimAgentRoundMessage(
-        self, roundNum: int, maxRounds: int
+        self, lastToolLabel: Optional[str] = None
     ) -> Optional[str]:
         """Per-round progress notice for long agent runs (meeting voice /
         chat, ephemeral). Phrasing is AI-localised once per session;
-        ``{round}`` and ``{maxRounds}`` placeholders are substituted at
-        render time. Returns ``None`` if generation failed."""
+        ``{activity}`` placeholder is substituted with the tool's
+        ``displayLabel`` from the ToolDefinition. Returns ``None`` if
+        generation failed."""
+        activity = lastToolLabel or "processing your request"
         return await self._pickEphemeralPhrase(
             "agentRound",
-            substitutions={"round": roundNum, "maxRounds": maxRounds},
+            substitutions={"activity": activity},
         )
 
     async def _notifyMeetingEphemeral(self, sessionId: str, text: str) -> None:
@@ -3371,6 +3372,7 @@ class TeamsbotService:
 
         finalText: str = ""
         rounds = 0
+        lastToolLabel: Optional[str] = None
         try:
             async for event in agentService.runAgent(
                 prompt=taskText,
@@ -3391,11 +3393,9 @@ class TeamsbotService:
                         "round": roundNum,
                         "maxRounds": maxR,
                     })
-                    # Runde 1: schon allgemeiner Start-Hinweis; ab Runde 2 ins Meeting melden.
-                    # Director prompts bleiben still — keine Zwischen-Updates ins Meeting.
                     if roundNum >= 2 and not directorPromptMode:
                         try:
-                            roundText = await self._interimAgentRoundMessage(roundNum, maxR)
+                            roundText = await self._interimAgentRoundMessage(lastToolLabel)
                             if roundText:
                                 await self._notifyMeetingEphemeral(sessionId, roundText)
                         except Exception as roundNoticeErr:
@@ -3403,7 +3403,9 @@ class TeamsbotService:
                                 f"Session {sessionId}: Per-round agent notice failed: {roundNoticeErr}"
                             )
                 elif event.type == AgentEventTypeEnum.TOOL_CALL:
-                    toolName = (event.data or {}).get("toolName") if event.data else None
+                    evtData = event.data or {}
+                    toolName = evtData.get("toolName")
+                    lastToolLabel = evtData.get("displayLabel")
                     await _emitSessionEvent(sessionId, "agentRun", {
                         "source": sourceLabel,
                         "promptId": promptId,
diff --git a/modules/serviceCenter/services/serviceAgent/agentLoop.py b/modules/serviceCenter/services/serviceAgent/agentLoop.py
index b51ffb85..c1571994 100644
--- a/modules/serviceCenter/services/serviceAgent/agentLoop.py
+++ b/modules/serviceCenter/services/serviceAgent/agentLoop.py
@@ -335,9 +335,14 @@ async def runAgentLoop(
 
         # Execute tool calls
         for tc in toolCalls:
+            toolDef = toolRegistry.getTool(tc.name)
             yield AgentEvent(
                 type=AgentEventTypeEnum.TOOL_CALL,
-                data={"toolName": tc.name, "args": tc.args}
+                data={
+                    "toolName": tc.name,
+                    "displayLabel": toolDef.displayLabel if toolDef else None,
+                    "args": tc.args,
+                }
             )
 
         results = await _executeToolCalls(toolCalls, toolRegistry, {
diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_connectionTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_connectionTools.py
index b0381da2..0f3e4582 100644
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_connectionTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_connectionTools.py
@@ -184,4 +184,5 @@ def _registerConnectionTools(registry: ToolRegistry, services):
             "required": ["connectionId", "to", "subject", "body"],
         },
         readOnly=False,
+        displayLabel="composing an email",
     )
diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
index a3fbb3ed..79e53092 100644
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
@@ -297,6 +297,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
             },
         },
         readOnly=False,
+        displayLabel="creating a document",
     )
 
     # ── textToSpeech tool ──────────────────────────────────────────────
@@ -573,6 +574,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
             "required": ["prompt"],
         },
         readOnly=False,
+        displayLabel="generating an image",
     )
 
     # ── createChart tool ─────────────────────────────────────────────────
@@ -770,6 +772,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
             "required": ["datasets"],
         },
         readOnly=False,
+        displayLabel="creating a chart",
     )
 
     # ── Phase 3: speechToText, detectLanguage, neutralizeData, executeCode ──
@@ -917,5 +920,6 @@ def _registerMediaTools(registry: ToolRegistry, services):
             },
             "required": ["code"]
         },
-        readOnly=True
+        readOnly=True,
+        displayLabel="running calculations",
     )
diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py
index 2ffc808e..c6584735 100644
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py
@@ -359,7 +359,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
             },
             "required": ["fileId"]
         },
-        readOnly=True
+        readOnly=True,
+        displayLabel="reviewing a document",
     )
 
     registry.register(
@@ -406,7 +407,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
             "properties": {"query": {"type": "string", "description": "Search query"}},
             "required": ["query"]
         },
-        readOnly=True
+        readOnly=True,
+        displayLabel="researching on the web",
     )
 
     registry.register(
@@ -581,7 +583,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
             },
             "required": ["url"]
         },
-        readOnly=True
+        readOnly=True,
+        displayLabel="reading a webpage",
     )
 
     registry.register(
diff --git a/modules/serviceCenter/services/serviceAgent/datamodelAgent.py b/modules/serviceCenter/services/serviceAgent/datamodelAgent.py
index 9428af49..889f31e8 100644
--- a/modules/serviceCenter/services/serviceAgent/datamodelAgent.py
+++ b/modules/serviceCenter/services/serviceAgent/datamodelAgent.py
@@ -41,6 +41,12 @@ class ToolDefinition(BaseModel):
     """Schema for a tool available to the agent."""
     name: str = Field(description="Unique tool name")
     description: str = Field(description="What this tool does")
+    displayLabel: Optional[str] = Field(
+        default=None,
+        description="Short human-readable activity phrase (e.g. 'researching on the web'). "
+                    "Used for live progress messages in meetings. English gerund phrase; "
+                    "localised by the caller."
+    )
     parameters: Dict[str, Any] = Field(
         default_factory=dict,
         description="JSON Schema for tool parameters"
diff --git a/modules/serviceCenter/services/serviceAgent/toolRegistry.py b/modules/serviceCenter/services/serviceAgent/toolRegistry.py
index b4b5cd86..b2ba67a0 100644
--- a/modules/serviceCenter/services/serviceAgent/toolRegistry.py
+++ b/modules/serviceCenter/services/serviceAgent/toolRegistry.py
@@ -23,7 +23,7 @@ class ToolRegistry:
     def register(self, name: str, handler: Callable[..., Awaitable[ToolResult]],
                  description: str = "", parameters: Dict[str, Any] = None,
                  readOnly: bool = False, featureType: str = None,
-                 toolSet: str = None):
+                 toolSet: str = None, displayLabel: str = None):
         """Register a tool with its handler function."""
         if name in self._tools:
             logger.warning(f"Tool '{name}' already registered, overwriting")
@@ -31,6 +31,7 @@ class ToolRegistry:
         self._tools[name] = ToolDefinition(
             name=name,
             description=description,
+            displayLabel=displayLabel,
             parameters=parameters or {},
             readOnly=readOnly,
             featureType=featureType,
diff --git a/modules/serviceCenter/services/serviceAi/mainServiceAi.py b/modules/serviceCenter/services/serviceAi/mainServiceAi.py
index bcdb9552..d4e5ccdb 100644
--- a/modules/serviceCenter/services/serviceAi/mainServiceAi.py
+++ b/modules/serviceCenter/services/serviceAi/mainServiceAi.py
@@ -567,11 +567,14 @@ mit Web-Recherche, E-Mail-Versand, Dokumenten-Erzeugung und Datenquellen-Zugriff
 
 Setze "needsAgent": true und "agentReason": "<kurze Beschreibung der Aufgabe in einem Satz>"
 WENN die Aufgabe eines oder mehrere dieser Merkmale hat:
-- Recherche im Internet noetig (z.B. "recherchier was im Internet ueber XY", "schau mal nach", "google das")
-- E-Mail an Teilnehmer/Kontakte versenden
-- Dokument (PDF, Word, Excel) generieren oder im SharePoint/Drive ablegen
-- Mehrere Schritte oder Tool-Aufrufe noetig (Zusammenfassung + Versand, Recherche + Empfehlung etc.)
-- Daten aus externen Quellen abrufen (Outlook-Kontakte, SharePoint-Dateien, Kalender etc.)
+- Recherche im Internet oder aktuelle Informationen noetig
+- Informationen beschaffen die du NICHT im Transkript oder in deinem Vorwissen hast
+- E-Mail versenden
+- Dokument generieren oder in einer Datenquelle ablegen
+- Mehrere Schritte oder Tool-Aufrufe noetig
+- Daten aus externen Quellen abrufen
+
+Wenn du den gewuenschten Inhalt nicht selbst liefern kannst, setze needsAgent=true.
 
 Wenn needsAgent=true:
 - Setze shouldRespond=false (der Agent uebernimmt; du sprichst NICHT eigenstaendig).
diff --git a/modules/serviceCenter/services/serviceWeb/mainServiceWeb.py b/modules/serviceCenter/services/serviceWeb/mainServiceWeb.py
index 4ffc15aa..c4e24947 100644
--- a/modules/serviceCenter/services/serviceWeb/mainServiceWeb.py
+++ b/modules/serviceCenter/services/serviceWeb/mainServiceWeb.py
@@ -98,7 +98,8 @@ class WebService:
             searchUrls = []
             searchResultsWithContent = []
             if needsSearch and (not allUrls or len(allUrls) < maxNumberPages):
-                self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content")
+                if operationId:
+                    self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content")
                 
                 try:
                     searchUrls, searchResultsWithContent = await self._performWebSearch(
@@ -113,16 +114,14 @@ class WebService:
                     searchUrls = []
                     searchResultsWithContent = []
                 
-                # Prioritize Tavily search URLs over AI-extracted URLs (they're more relevant)
                 if searchUrls:
-                    # Prepend Tavily URLs to the list (they're more relevant)
                     allUrls = searchUrls + allUrls
                     logger.info(f"Using {len(searchUrls)} Tavily URLs + {len(allUrls) - len(searchUrls)} other URLs = {len(allUrls)} total")
                 else:
-                    # If Tavily search failed, use AI-extracted URLs
                     logger.warning("Tavily search returned no URLs, using AI-extracted URLs only")
                 
-                self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
+                if operationId:
+                    self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs")
             
             # If we have search results (even without content), use them directly instead of crawling
             # Tavily search results are more relevant than generic AI-extracted URLs

From 48c0f900af160716ffdecd9e3293b15ac289835a Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Tue, 12 May 2026 15:19:01 +0200
Subject: [PATCH 3/8] rag

---
 app.py                                        |   3 +
 env-gateway-dev.env                           |   7 -
 env-gateway-int.env                           |   7 -
 env-gateway-prod-forgejo.env                  |  11 +-
 env-gateway-prod.env                          |   7 -
 modules/datamodels/datamodelDataSource.py     |  12 +-
 modules/datamodels/datamodelUam.py            |   8 +-
 modules/features/workspace/mainWorkspace.py   |   7 -
 .../workspace/routeFeatureWorkspace.py        |  45 ---
 modules/interfaces/interfaceDbKnowledge.py    |  54 ++++
 modules/routes/routeBilling.py                |  25 +-
 modules/routes/routeDataConnections.py        | 178 +++++++++++-
 modules/routes/routeDataFiles.py              |   2 +-
 modules/routes/routeDataPrompts.py            |   2 +-
 modules/routes/routeDataSources.py            |  74 ++++-
 modules/routes/routeHelpers.py                | 113 ++++++--
 modules/routes/routeRagInventory.py           | 267 ++++++++++++++++++
 .../serviceBackgroundJobs/__init__.py         |   6 +
 .../mainBackgroundJobService.py               | 120 +++++++-
 .../subConnectorIngestConsumer.py             |  79 +++++-
 .../serviceKnowledge/subConnectorPrefs.py     |  26 +-
 .../subConnectorSyncClickup.py                | 126 ++++++---
 .../subConnectorSyncGdrive.py                 | 131 +++++----
 .../serviceKnowledge/subConnectorSyncGmail.py |  97 +++++--
 .../subConnectorSyncOutlook.py                | 104 ++++---
 .../subConnectorSyncSharepoint.py             |  76 +++--
 .../serviceKnowledge/subPolicyResolver.py     |  78 +++++
 modules/system/mainSystem.py                  |   8 +
 scripts/script_db_migrate_datasource_rag.py   |  88 ++++++
 tests/unit/services/test_bootstrap_clickup.py |  41 ++-
 tests/unit/services/test_bootstrap_gdrive.py  |  19 +-
 tests/unit/services/test_bootstrap_outlook.py |   4 +
 32 files changed, 1450 insertions(+), 375 deletions(-)
 create mode 100644 modules/routes/routeRagInventory.py
 create mode 100644 modules/serviceCenter/services/serviceKnowledge/subPolicyResolver.py
 create mode 100644 scripts/script_db_migrate_datasource_rag.py

diff --git a/app.py b/app.py
index f5adb3d7..73a64064 100644
--- a/app.py
+++ b/app.py
@@ -604,6 +604,9 @@ app.include_router(promptRouter)
 from modules.routes.routeDataConnections import router as connectionsRouter
 app.include_router(connectionsRouter)
 
+from modules.routes.routeRagInventory import router as ragInventoryRouter
+app.include_router(ragInventoryRouter)
+
 from modules.routes.routeTableViews import router as tableViewsRouter
 app.include_router(tableViewsRouter)
 
diff --git a/env-gateway-dev.env b/env-gateway-dev.env
index 7802b33d..f4e7e244 100644
--- a/env-gateway-dev.env
+++ b/env-gateway-dev.env
@@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug
 APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True
 APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync
 
-# Manadate Pre-Processing Servers
-PREPROCESS_ALTHAUS_CHAT_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGbEphQ3ZUMlFMQ2EwSGpoSE9NNzRJNTJtaGk1N0RGakdIYnVVeVFHZmF5OXB3QTVWLVNaZk9wNkhfQkZWRnVwRGRxem9iRzJIWXdpX1NIN2FwSExfT3c9PQ==
-
-# Preprocessor API Configuration
-PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
-PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
-
 # Azure Communication Services Email Configuration
 MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
 MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
diff --git a/env-gateway-int.env b/env-gateway-int.env
index a1924fff..0898a985 100644
--- a/env-gateway-int.env
+++ b/env-gateway-int.env
@@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
 APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
 APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
 
-# Manadate Pre-Processing Servers
-PREPROCESS_ALTHAUS_CHAT_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4UkNBelhvckxCQUVjZm94N3BZUDcxaEMyckE2dm1lRVhqODhrWU1SUjNXZ3dQZlVJOWhveXFkZXpobW5xT0NneGZ2SkNUblFmYXd0WTBYNTl3UmRnSWc9PQ==
-
-# Preprocessor API Configuration
-PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
-PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
-
 # Azure Communication Services Email Configuration
 MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
 MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
diff --git a/env-gateway-prod-forgejo.env b/env-gateway-prod-forgejo.env
index b9c9e686..80a175e6 100644
--- a/env-gateway-prod-forgejo.env
+++ b/env-gateway-prod-forgejo.env
@@ -41,10 +41,10 @@ Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect
 
 Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
 Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kybjVVZ0FldUE1NTJiY2U1N0I0aVU0Z2hfeWlYc2tTdmlxTS1NdGxsRnFHdjZVcW5RRHZkUFhzUTVyX2RaZHlrQThRdTdCRmVBelBOcDlsbFQyd19SZExuWEM5aTcwQ0FvY3ctMUlWU1pndDE0MkdzeTZZRHkwLWU3aW56LW1jS20=
-Service_GOOGLE_AUTH_REDIRECT_URI = 
+Service_GOOGLE_AUTH_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/login/callback
 Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com
 Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyMnFma3VPOVJtTFFrNDRLN0NkWHY2dUZDWlJzdDVMd3p3N19IY0tWdURRRzExOGZCMjJOYmpKT1E0cTVwYlgtcVJINTY0anZPc1VoTW00cHl6NVh3ZHVTek1oT1RqWUhtamRkZ1dENWlwNTlZSU1oNWczeGdEOC1Gbk5XU2RBcmI=
-Service_GOOGLE_DATA_REDIRECT_URI = 
+Service_GOOGLE_DATA_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/connect/callback
 
 # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly.
 Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4
@@ -86,13 +86,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
 APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
 APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
 
-# Manadate Pre-Processing Servers
-PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
-
-# Preprocessor API Configuration
-PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
-PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
-
 # Azure Communication Services Email Configuration
 MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
 MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
diff --git a/env-gateway-prod.env b/env-gateway-prod.env
index d42bb0f9..6f4cfab0 100644
--- a/env-gateway-prod.env
+++ b/env-gateway-prod.env
@@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
 APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE
 APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync
 
-# Manadate Pre-Processing Servers
-PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo=
-
-# Preprocessor API Configuration
-PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990
-PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query
-
 # Azure Communication Services Email Configuration
 MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt
 MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss
diff --git a/modules/datamodels/datamodelDataSource.py b/modules/datamodels/datamodelDataSource.py
index d9e40bde..fe3f0442 100644
--- a/modules/datamodels/datamodelDataSource.py
+++ b/modules/datamodels/datamodelDataSource.py
@@ -62,15 +62,15 @@ class DataSource(PowerOnModel):
         description="Owner user ID",
         json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "UserInDB", "labelField": "username"}},
     )
-    autoSync: bool = Field(
+    ragIndexEnabled: bool = Field(
         default=False,
-        description="Automatically sync on schedule",
-        json_schema_extra={"label": "Auto-Sync"},
+        description="When true this tree element is indexed into the RAG knowledge store",
+        json_schema_extra={"label": "Im RAG indexieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
     )
-    lastSynced: Optional[float] = Field(
+    lastIndexed: Optional[float] = Field(
         default=None,
-        description="Last sync timestamp",
-        json_schema_extra={"label": "Letzter Sync", "frontend_type": "timestamp"},
+        description="Timestamp of last successful RAG indexing run",
+        json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"},
     )
     scope: str = Field(
         default="personal",
diff --git a/modules/datamodels/datamodelUam.py b/modules/datamodels/datamodelUam.py
index 6aba24eb..f6cbd8fa 100644
--- a/modules/datamodels/datamodelUam.py
+++ b/modules/datamodels/datamodelUam.py
@@ -484,10 +484,10 @@ class UserConnection(PowerOnModel):
         default=None,
         description=(
             "Per-connection knowledge ingestion preferences. schemaVersion=1 keys: "
-            "neutralizeBeforeEmbed (bool), mailContentDepth (metadata|snippet|full), "
-            "mailIndexAttachments (bool), filesIndexBinaries (bool), mimeAllowlist (list[str]), "
-            "clickupScope (titles|title_description|with_comments), "
-            "surfaceToggles (dict per authority), maxAgeDays (int)."
+            "mailContentDepth (metadata|snippet|full), mailIndexAttachments (bool), "
+            "filesIndexBinaries (bool), clickupScope (titles|title_description|with_comments), "
+            "clickupIndexAttachments (bool), maxAgeDays (int). "
+            "Neutralization is controlled per DataSource.neutralize (not here)."
         ),
         json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False, "label": "Wissenspräferenzen"},
     )
diff --git a/modules/features/workspace/mainWorkspace.py b/modules/features/workspace/mainWorkspace.py
index 24307b45..77f5b290 100644
--- a/modules/features/workspace/mainWorkspace.py
+++ b/modules/features/workspace/mainWorkspace.py
@@ -33,11 +33,6 @@ UI_OBJECTS = [
         "label": t("Einstellungen", context="UI"),
         "meta": {"area": "settings"}
     },
-    {
-        "objectKey": "ui.feature.workspace.rag-insights",
-        "label": t("Wissens-Insights", context="UI"),
-        "meta": {"area": "rag-insights"},
-    },
 ]
 
 RESOURCE_OBJECTS = [
@@ -86,7 +81,6 @@ TEMPLATE_ROLES = [
             {"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
             {"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
             {"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
-            {"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True},
             {"context": "DATA", "item": None, "view": True, "read": "m", "create": "n", "update": "n", "delete": "n"},
         ]
     },
@@ -97,7 +91,6 @@ TEMPLATE_ROLES = [
             {"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True},
             {"context": "UI", "item": "ui.feature.workspace.editor", "view": True},
             {"context": "UI", "item": "ui.feature.workspace.settings", "view": True},
-            {"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True},
             {"context": "RESOURCE", "item": "resource.feature.workspace.start", "view": True},
             {"context": "RESOURCE", "item": "resource.feature.workspace.stop", "view": True},
             {"context": "RESOURCE", "item": "resource.feature.workspace.files", "view": True},
diff --git a/modules/features/workspace/routeFeatureWorkspace.py b/modules/features/workspace/routeFeatureWorkspace.py
index 9595fee4..4487e5fe 100644
--- a/modules/features/workspace/routeFeatureWorkspace.py
+++ b/modules/features/workspace/routeFeatureWorkspace.py
@@ -2192,49 +2192,4 @@ async def putWorkspaceUserSettings(
 
 # =========================================================================
 # RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
-# =========================================================================
 
-def _collectWorkspaceFileIdsForStats(instanceId: str, mandateId: Optional[str]) -> List[str]:
-    """All FileItem ids for this feature instance (any user). Knowledge rows are often stored
-    without featureInstanceId; we correlate by file id from the Management DB."""
-    from modules.datamodels.datamodelFiles import FileItem
-    from modules.interfaces.interfaceDbManagement import ComponentObjects
-
-    co = ComponentObjects()
-    rows = co.db.getRecordset(FileItem, recordFilter={"featureInstanceId": instanceId})
-    out: List[str] = []
-    m = str(mandateId) if mandateId else ""
-    for r in rows or []:
-        rid = r.get("id") if isinstance(r, dict) else getattr(r, "id", None)
-        if not rid:
-            continue
-        if m:
-            mid = r.get("mandateId") if isinstance(r, dict) else getattr(r, "mandateId", "") or ""
-            if mid and mid != m:
-                continue
-        out.append(str(rid))
-    return out
-
-
-@router.get("/{instanceId}/rag-statistics")
-@limiter.limit("60/minute")
-async def getRagStatistics(
-    request: Request,
-    instanceId: str = Path(...),
-    days: int = Query(90, ge=7, le=365, description="Timeline window in days"),
-    context: RequestContext = Depends(getRequestContext),
-):
-    """Aggregated, non-identifying knowledge-store metrics for this workspace instance."""
-    mandateId, _instanceConfig = _validateInstanceAccess(instanceId, context)
-    workspaceFileIds = _collectWorkspaceFileIdsForStats(instanceId, mandateId)
-    kdb = getKnowledgeInterface(context.user)
-    stats = kdb.getRagStatisticsForInstance(
-        featureInstanceId=instanceId,
-        mandateId=str(mandateId) if mandateId else "",
-        timelineDays=days,
-        workspaceFileIds=workspaceFileIds,
-    )
-    if isinstance(stats, dict):
-        stats.setdefault("scope", {})
-        stats["scope"]["workspaceFileIdsResolved"] = len(workspaceFileIds)
-    return JSONResponse(stats)
diff --git a/modules/interfaces/interfaceDbKnowledge.py b/modules/interfaces/interfaceDbKnowledge.py
index c2f79b67..31a5af61 100644
--- a/modules/interfaces/interfaceDbKnowledge.py
+++ b/modules/interfaces/interfaceDbKnowledge.py
@@ -133,6 +133,60 @@ class KnowledgeObjects:
 
         return {"indexRows": indexCount, "chunks": chunkCount}
 
+    def deleteFileContentIndexByDataSource(self, dataSourceId: str) -> Dict[str, int]:
+        """Delete all FileContentIndex rows whose provenance.dataSourceId matches.
+
+        Used when a user disables ragIndexEnabled on a DataSource to purge
+        only those chunks that were ingested from that specific tree element.
+        """
+        if not dataSourceId:
+            return {"indexRows": 0, "chunks": 0}
+
+        allRows = self.db.getRecordset(FileContentIndex)
+        matchedRows = []
+        for row in allRows:
+            prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None)
+            if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId:
+                matchedRows.append(row)
+
+        mandateIds: set = set()
+        chunkCount = 0
+        indexCount = 0
+        for row in matchedRows:
+            fid = row.get("id") if isinstance(row, dict) else getattr(row, "id", None)
+            mid = row.get("mandateId") if isinstance(row, dict) else getattr(row, "mandateId", "")
+            if not fid:
+                continue
+            chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fid})
+            for chunk in chunks:
+                if self.db.recordDelete(ContentChunk, chunk["id"]):
+                    chunkCount += 1
+            if self.db.recordDelete(FileContentIndex, fid):
+                indexCount += 1
+                if mid:
+                    mandateIds.add(str(mid))
+
+        for mid in mandateIds:
+            try:
+                from modules.interfaces.interfaceDbBilling import _getRootInterface
+                _getRootInterface().reconcileMandateStorageBilling(mid)
+            except Exception as ex:
+                logger.warning("reconcileMandateStorageBilling after datasource purge failed: %s", ex)
+
+        return {"indexRows": indexCount, "chunks": chunkCount}
+
+    def listFileContentIndexByDataSource(self, dataSourceId: str) -> List[Dict[str, Any]]:
+        """List all FileContentIndex rows whose provenance.dataSourceId matches."""
+        if not dataSourceId:
+            return []
+        allRows = self.db.getRecordset(FileContentIndex)
+        out = []
+        for row in allRows:
+            prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None)
+            if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId:
+                out.append(dict(row) if not isinstance(row, dict) else row)
+        return out
+
     def deleteFileContentIndex(self, fileId: str) -> bool:
         """Delete a FileContentIndex and all associated ContentChunks."""
         existing = self.getFileContentIndex(fileId)
diff --git a/modules/routes/routeBilling.py b/modules/routes/routeBilling.py
index b7fcdeca..04251e09 100644
--- a/modules/routes/routeBilling.py
+++ b/modules/routes/routeBilling.py
@@ -1986,10 +1986,10 @@ def getUserViewTransactions(
             if not pagination:
                 raise HTTPException(status_code=400, detail="pagination required for groupSummary")
             import json as _json
-            from collections import defaultdict
             from modules.interfaces.interfaceDbApp import getInterface as getAppInterface
             from modules.routes.routeHelpers import (
                 applyViewToParams,
+                build_group_summary_groups,
                 effective_group_by_levels,
                 resolveView,
             )
@@ -2018,28 +2018,7 @@ def getUserViewTransactions(
                 summary_params,
                 ctx.user,
             )
-            counts: Dict[str, int] = defaultdict(int)
-            labels: Dict[str, str] = {}
-            null_key = "\x00NULL"
-            for item in all_rows:
-                raw = item.get(field)
-                if raw is None or raw == "":
-                    nk = null_key
-                    labels[nk] = null_label
-                else:
-                    nk = str(raw)
-                    if nk not in labels:
-                        labels[nk] = nk
-                counts[nk] += 1
-            groups_out: List[Dict[str, Any]] = []
-            for nk in sorted(counts.keys(), key=lambda x: (x == null_key, labels.get(x, x).lower())):
-                groups_out.append(
-                    {
-                        "value": None if nk == null_key else nk,
-                        "label": labels.get(nk, nk),
-                        "totalCount": counts[nk],
-                    }
-                )
+            groups_out = build_group_summary_groups(all_rows, field, null_label, groupByLevels=levels)
             return JSONResponse(content={"groups": groups_out})
 
         paginationParams = None
diff --git a/modules/routes/routeDataConnections.py b/modules/routes/routeDataConnections.py
index 58d36b91..04f652fb 100644
--- a/modules/routes/routeDataConnections.py
+++ b/modules/routes/routeDataConnections.py
@@ -130,7 +130,7 @@ def get_auth_authority_options(
 # ============================================================================
 
 @router.get("/")
-@limiter.limit("30/minute")
+@limiter.limit("60/minute")
 async def get_connections(
     request: Request,
     pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"),
@@ -197,7 +197,9 @@ async def get_connections(
                 "lastChecked": connection.lastChecked,
                 "expiresAt": connection.expiresAt,
                 "tokenStatus": tokenStatus,
-                "tokenExpiresAt": tokenExpiresAt
+                "tokenExpiresAt": tokenExpiresAt,
+                "knowledgeIngestionEnabled": getattr(connection, "knowledgeIngestionEnabled", False),
+                "knowledgePreferences": getattr(connection, "knowledgePreferences", None) or {},
             })
         return items
 
@@ -264,7 +266,7 @@ async def get_connections(
             })
         enrichRowsWithFkLabels(enhanced_connections_dict, UserConnection)
         filtered = apply_strategy_b_filters_and_sort(enhanced_connections_dict, paginationParams, currentUser)
-        groups_out = build_group_summary_groups(filtered, field, null_label)
+        groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
         return JSONResponse(content={"groups": groups_out})
 
     try:
@@ -724,4 +726,172 @@ def delete_connection(
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
             detail=f"Failed to delete connection: {str(e)}"
-        ) 
\ No newline at end of file
+        )
+
+
+# =========================================================================
+# Knowledge Consent & Control Endpoints
+# =========================================================================
+
+
+def _findOwnConnection(interface, userId: str, connectionId: str):
+    """Find a connection owned by the user. Returns None if not found."""
+    connections = interface.getUserConnections(userId)
+    for conn in connections:
+        if conn.id == connectionId:
+            return conn
+    return None
+
+
+@router.patch("/{connectionId}/knowledge-consent")
+@limiter.limit("10/minute")
+def _updateKnowledgeConsent(
+    request: Request,
+    connectionId: str = Path(..., description="Connection ID"),
+    enabled: bool = Body(..., embed=True),
+    currentUser: User = Depends(getCurrentUser),
+) -> Dict[str, Any]:
+    """Master switch: can PowerOn ingest data from this connection into the RAG knowledge store?
+
+    enabled=False: purge ALL chunks for this connection + cancel running jobs.
+    enabled=True: set flag; enqueue bootstrap only if rag-enabled DataSources exist.
+    """
+    try:
+        interface = getInterface(currentUser)
+        connection = _findOwnConnection(interface, currentUser.id, connectionId)
+        if not connection:
+            raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
+
+        from modules.interfaces.interfaceDbApp import getRootInterface
+        rootIf = getRootInterface()
+        rootIf.db.recordModify(UserConnection, connectionId, {"knowledgeIngestionEnabled": enabled})
+
+        purged = None
+        cancelled = 0
+        bootstrapEnqueued = False
+
+        if not enabled:
+            from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
+            purged = getKnowledgeInterface(None).deleteFileContentIndexByConnectionId(connectionId)
+
+            from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection
+            cancelled = cancelJobsByConnection(connectionId)
+        else:
+            from modules.datamodels.datamodelDataSource import DataSource
+            dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId, "ragIndexEnabled": True})
+            if dataSources:
+                import asyncio
+                from modules.serviceCenter.services.serviceBackgroundJobs import startJob
+                authority = connection.authority.value if hasattr(connection.authority, "value") else str(connection.authority or "")
+
+                async def _enqueue():
+                    await startJob(
+                        "connection.bootstrap",
+                        {"connectionId": connectionId, "authority": authority.lower()},
+                        triggeredBy=str(currentUser.id),
+                    )
+                try:
+                    loop = asyncio.get_event_loop()
+                    if loop.is_running():
+                        loop.create_task(_enqueue())
+                    else:
+                        loop.run_until_complete(_enqueue())
+                except RuntimeError:
+                    asyncio.run(_enqueue())
+                bootstrapEnqueued = True
+
+        import json as _json
+        from modules.shared.auditLogger import audit_logger
+        from modules.datamodels.datamodelAudit import AuditCategory
+        audit_logger.logEvent(
+            userId=str(currentUser.id),
+            mandateId=str(getattr(connection, "mandateId", "") or ""),
+            category=AuditCategory.PERMISSION.value,
+            action="knowledge_consent_changed",
+            details=_json.dumps({"connectionId": connectionId, "enabled": enabled}),
+        )
+
+        logger.info("Knowledge consent %s for connection %s by user %s",
+                    "enabled" if enabled else "disabled", connectionId, currentUser.id)
+        return {
+            "connectionId": connectionId,
+            "knowledgeIngestionEnabled": enabled,
+            "purged": purged,
+            "cancelledJobs": cancelled,
+            "bootstrapEnqueued": bootstrapEnqueued,
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error("Error updating knowledge consent: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.patch("/{connectionId}/knowledge-preferences")
+@limiter.limit("20/minute")
+def _updateKnowledgePreferences(
+    request: Request,
+    connectionId: str = Path(..., description="Connection ID"),
+    preferences: Dict[str, Any] = Body(..., embed=True),
+    currentUser: User = Depends(getCurrentUser),
+) -> Dict[str, Any]:
+    """Update per-connection knowledge ingestion preferences (mail depth, attachments, etc.)."""
+    _ALLOWED_KEYS = {"mailContentDepth", "mailIndexAttachments", "filesIndexBinaries",
+                     "clickupScope", "clickupIndexAttachments", "maxAgeDays"}
+    try:
+        interface = getInterface(currentUser)
+        connection = _findOwnConnection(interface, currentUser.id, connectionId)
+        if not connection:
+            raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
+
+        existing = getattr(connection, "knowledgePreferences", None) or {}
+        cleaned = {k: v for k, v in preferences.items() if k in _ALLOWED_KEYS}
+        merged = {**existing, **cleaned, "schemaVersion": 1}
+
+        from modules.interfaces.interfaceDbApp import getRootInterface
+        getRootInterface().db.recordModify(UserConnection, connectionId, {"knowledgePreferences": merged})
+
+        logger.info("Knowledge preferences updated for connection %s", connectionId)
+        return {"connectionId": connectionId, "knowledgePreferences": merged, "updated": True}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error("Error updating knowledge preferences: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/{connectionId}/knowledge-stop")
+@limiter.limit("10/minute")
+def _stopKnowledgeJobs(
+    request: Request,
+    connectionId: str = Path(..., description="Connection ID"),
+    currentUser: User = Depends(getCurrentUser),
+) -> Dict[str, Any]:
+    """Cancel all running/pending bootstrap jobs for this connection."""
+    try:
+        interface = getInterface(currentUser)
+        connection = _findOwnConnection(interface, currentUser.id, connectionId)
+        if not connection:
+            raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found"))
+
+        from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection
+        cancelled = cancelJobsByConnection(connectionId)
+
+        import json as _json
+        from modules.shared.auditLogger import audit_logger
+        from modules.datamodels.datamodelAudit import AuditCategory
+        audit_logger.logEvent(
+            userId=str(currentUser.id),
+            mandateId=str(getattr(connection, "mandateId", "") or ""),
+            category=AuditCategory.PERMISSION.value,
+            action="knowledge_jobs_stopped",
+            details=_json.dumps({"connectionId": connectionId, "cancelledCount": cancelled}),
+        )
+
+        logger.info("Stopped %d knowledge jobs for connection %s", cancelled, connectionId)
+        return {"connectionId": connectionId, "cancelled": cancelled}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error("Error stopping knowledge jobs: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
\ No newline at end of file
diff --git a/modules/routes/routeDataFiles.py b/modules/routes/routeDataFiles.py
index 244b77b0..3a951f3e 100644
--- a/modules/routes/routeDataFiles.py
+++ b/modules/routes/routeDataFiles.py
@@ -543,7 +543,7 @@ def get_files(
                 FileItem,
             )
             filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
-            groups_out = build_group_summary_groups(filtered, field, null_label)
+            groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
             return JSONResponse(content={"groups": groups_out})
 
         if mode == "filterValues":
diff --git a/modules/routes/routeDataPrompts.py b/modules/routes/routeDataPrompts.py
index c410d26a..331267b5 100644
--- a/modules/routes/routeDataPrompts.py
+++ b/modules/routes/routeDataPrompts.py
@@ -100,7 +100,7 @@ def get_prompts(
             result if isinstance(result, list) else (result.items if hasattr(result, "items") else [])
         )
         filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser)
-        groups_out = build_group_summary_groups(filtered, field, null_label)
+        groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels)
         return JSONResponse(content={"groups": groups_out})
 
     if mode == "filterValues":
diff --git a/modules/routes/routeDataSources.py b/modules/routes/routeDataSources.py
index 5df8a18b..f7e5425d 100644
--- a/modules/routes/routeDataSources.py
+++ b/modules/routes/routeDataSources.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
-"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize tagging."""
+"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize/rag-index tagging."""
 
 import logging
 from typing import Any, Dict, List, Optional
@@ -125,3 +125,75 @@ def _updateNeutralizeFields(
     except Exception as e:
         logger.error("Error updating neutralizeFields: %s", e)
         raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.patch("/{sourceId}/rag-index")
+@limiter.limit("30/minute")
+def _updateDataSourceRagIndex(
+    request: Request,
+    sourceId: str = Path(..., description="ID of the DataSource"),
+    ragIndexEnabled: bool = Body(..., embed=True),
+    context: RequestContext = Depends(getRequestContext),
+) -> Dict[str, Any]:
+    """Toggle RAG indexing for a DataSource.
+
+    true:  sets flag + enqueues mini-bootstrap for this DataSource only.
+    false: sets flag + synchronously purges all chunks from this DataSource.
+    """
+    try:
+        from modules.interfaces.interfaceDbApp import getRootInterface
+        rootIf = getRootInterface()
+        rec = rootIf.db.getRecord(DataSource, sourceId)
+        if not rec:
+            raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found")
+
+        rootIf.db.recordModify(DataSource, sourceId, {"ragIndexEnabled": ragIndexEnabled})
+        logger.info("Updated ragIndexEnabled=%s for DataSource %s", ragIndexEnabled, sourceId)
+
+        if ragIndexEnabled:
+            from modules.serviceCenter.services.serviceBackgroundJobs import startJob
+            import asyncio
+
+            connectionId = rec.get("connectionId") or rec.get("connection_id") or ""
+            conn = rootIf.getUserConnectionById(connectionId) if connectionId else None
+            authority = ""
+            if conn:
+                authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
+
+            async def _enqueue():
+                await startJob(
+                    "connection.bootstrap",
+                    {"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": [sourceId]},
+                    triggeredBy=str(context.user.id),
+                )
+            try:
+                loop = asyncio.get_event_loop()
+                if loop.is_running():
+                    loop.create_task(_enqueue())
+                else:
+                    loop.run_until_complete(_enqueue())
+            except RuntimeError:
+                asyncio.run(_enqueue())
+        else:
+            from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
+            purgeResult = getKnowledgeInterface(None).deleteFileContentIndexByDataSource(sourceId)
+            logger.info("Purged %d index rows / %d chunks for DataSource %s",
+                        purgeResult.get("indexRows", 0), purgeResult.get("chunks", 0), sourceId)
+
+        import json
+        from modules.shared.auditLogger import audit_logger
+        from modules.datamodels.datamodelAudit import AuditCategory
+        audit_logger.logEvent(
+            userId=str(context.user.id),
+            mandateId=context.mandateId,
+            category=AuditCategory.PERMISSION.value,
+            action="rag_index_toggled",
+            details=json.dumps({"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled}),
+        )
+
+        return {"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled, "updated": True}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error("Error updating datasource ragIndexEnabled: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/modules/routes/routeHelpers.py b/modules/routes/routeHelpers.py
index f5af7d06..f1d88e31 100644
--- a/modules/routes/routeHelpers.py
+++ b/modules/routes/routeHelpers.py
@@ -825,45 +825,106 @@ def build_group_summary_groups(
     items: List[Dict[str, Any]],
     field: str,
     null_label: str = "—",
+    groupByLevels: List[Dict[str, Any]] | None = None,
 ) -> List[Dict[str, Any]]:
     """
-    Build {"value", "label", "totalCount"} for mode=groupSummary (single grouping level).
+    Build {"value", "label", "totalCount"} summaries for mode=groupSummary.
+
+    When *groupByLevels* contains more than one level the function produces one
+    entry per unique combination of all level values (flat permutations).
+    ``value`` becomes a ``///``-joined composite key and ``label`` the ``/``-joined
+    human-readable label so the frontend can split them back.
     """
     from collections import defaultdict
 
-    counts: Dict[str, int] = defaultdict(int)
-    display_by_key: Dict[str, str] = {}
-    null_key = "\x00NULL"
-    label_attr = f"{field}Label"
+    fields: list[dict] = []
+    if groupByLevels and len(groupByLevels) > 1:
+        for lvl in groupByLevels:
+            f = lvl.get("field", "")
+            nl = str(lvl.get("nullLabel") or null_label)
+            if f:
+                fields.append({"field": f, "nullLabel": nl})
+    if not fields:
+        fields = [{"field": field, "nullLabel": null_label}]
 
+    nullKey = "\x00NULL"
+
+    if len(fields) == 1:
+        f = fields[0]["field"]
+        nl = fields[0]["nullLabel"]
+        counts: Dict[str, int] = defaultdict(int)
+        displayByKey: Dict[str, str] = {}
+        labelAttr = f"{f}Label"
+        for item in items:
+            raw = item.get(f)
+            if raw is None or raw == "":
+                nk = nullKey
+                display = nl
+            else:
+                nk = str(raw)
+                display = None
+                lbl = item.get(labelAttr)
+                if lbl is not None and lbl != "":
+                    display = str(lbl)
+                if display is None:
+                    display = nk
+            counts[nk] += 1
+            if nk not in displayByKey:
+                displayByKey[nk] = display
+        orderedKeys = sorted(
+            counts.keys(),
+            key=lambda x: (x == nullKey, str(displayByKey.get(x, x)).lower()),
+        )
+        return [
+            {
+                "value": None if nk == nullKey else nk,
+                "label": displayByKey.get(nk, nk),
+                "totalCount": counts[nk],
+            }
+            for nk in orderedKeys
+        ]
+
+    counts = defaultdict(int)
+    displayByComposite: Dict[str, list] = {}
+    filtersByComposite: Dict[str, dict] = {}
     for item in items:
-        raw = item.get(field)
-        if raw is None or raw == "":
-            nk = null_key
-            display = null_label
-        else:
-            nk = str(raw)
-            display = None
-            lbl = item.get(label_attr)
-            if lbl is not None and lbl != "":
-                display = str(lbl)
-            if display is None:
-                display = nk
-        counts[nk] += 1
-        if nk not in display_by_key:
-            display_by_key[nk] = display
+        parts: list[str] = []
+        labels: list[str] = []
+        filterMap: dict = {}
+        for fd in fields:
+            f = fd["field"]
+            nl = fd["nullLabel"]
+            labelAttr = f"{f}Label"
+            raw = item.get(f)
+            if raw is None or raw == "":
+                parts.append(nullKey)
+                labels.append(nl)
+                filterMap[f] = None
+            else:
+                parts.append(str(raw))
+                lbl = item.get(labelAttr)
+                labels.append(str(lbl) if lbl not in (None, "") else str(raw))
+                filterMap[f] = str(raw)
+        compositeKey = "///".join(parts)
+        counts[compositeKey] += 1
+        if compositeKey not in displayByComposite:
+            displayByComposite[compositeKey] = labels
+            filtersByComposite[compositeKey] = filterMap
 
-    ordered_keys = sorted(
+    orderedKeys = sorted(
         counts.keys(),
-        key=lambda x: (x == null_key, str(display_by_key.get(x, x)).lower()),
+        key=lambda x: tuple(
+            (seg == nullKey, seg.lower()) for seg in x.split("///")
+        ),
     )
     return [
         {
-            "value": None if nk == null_key else nk,
-            "label": display_by_key.get(nk, nk),
-            "totalCount": counts[nk],
+            "value": ck.replace(nullKey, "__null__") if nullKey in ck else ck,
+            "label": " / ".join(displayByComposite[ck]),
+            "totalCount": counts[ck],
+            "filters": filtersByComposite[ck],
         }
-        for nk in ordered_keys
+        for ck in orderedKeys
     ]
 
 
diff --git a/modules/routes/routeRagInventory.py b/modules/routes/routeRagInventory.py
new file mode 100644
index 00000000..08d2a245
--- /dev/null
+++ b/modules/routes/routeRagInventory.py
@@ -0,0 +1,267 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""RAG Inventory API — global knowledge-store visibility for users, admins, platform."""
+
+import logging
+from typing import Any, Dict, List, Optional
+
+from fastapi import APIRouter, HTTPException, Depends, Request
+from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext
+from modules.datamodels.datamodelUam import User
+from modules.shared.i18nRegistry import apiRouteContext
+
+routeApiMsg = apiRouteContext("routeRagInventory")
+logger = logging.getLogger(__name__)
+
+router = APIRouter(
+    prefix="/api/rag/inventory",
+    tags=["RAG Inventory"],
+    responses={
+        401: {"description": "Unauthorized"},
+        403: {"description": "Forbidden"},
+        500: {"description": "Internal server error"},
+    },
+)
+
+
+def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> List[Dict[str, Any]]:
+    from modules.datamodels.datamodelDataSource import DataSource
+    from modules.datamodels.datamodelKnowledge import FileContentIndex
+
+    out = []
+    for conn in connections:
+        connectionId = str(conn.id)
+        dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
+
+        connIndexRows = knowledgeIf.db.getRecordset(FileContentIndex, recordFilter={"connectionId": connectionId})
+        connChunkTotal = len(connIndexRows)
+
+        dsItems = []
+        for ds in dataSources:
+            dsId = ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")
+            dsItems.append({
+                "id": dsId,
+                "label": ds.get("label") if isinstance(ds, dict) else getattr(ds, "label", ""),
+                "path": ds.get("path") if isinstance(ds, dict) else getattr(ds, "path", ""),
+                "sourceType": ds.get("sourceType") if isinstance(ds, dict) else getattr(ds, "sourceType", ""),
+                "ragIndexEnabled": ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False),
+                "neutralize": ds.get("neutralize") if isinstance(ds, dict) else getattr(ds, "neutralize", False),
+                "lastIndexed": ds.get("lastIndexed") if isinstance(ds, dict) else getattr(ds, "lastIndexed", None),
+                "chunkCount": 0,
+            })
+
+        if dsItems and connChunkTotal > 0 and len(dsItems) == 1:
+            dsItems[0]["chunkCount"] = connChunkTotal
+
+        jobs = jobService.listJobs(jobType="connection.bootstrap", limit=5)
+        connJobs = [j for j in jobs if (j.get("payload") or {}).get("connectionId") == connectionId]
+        runningJobs = [
+            {"jobId": j["id"], "progress": j.get("progress", 0), "progressMessage": j.get("progressMessage", "")}
+            for j in connJobs
+            if j.get("status") in ("PENDING", "RUNNING")
+        ]
+        lastError = None
+        for j in connJobs:
+            if j.get("status") == "ERROR":
+                lastError = {"jobId": j["id"], "errorMessage": j.get("errorMessage", "")}
+                break
+
+        out.append({
+            "id": connectionId,
+            "authority": conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority),
+            "externalEmail": getattr(conn, "externalEmail", ""),
+            "knowledgeIngestionEnabled": getattr(conn, "knowledgeIngestionEnabled", False),
+            "preferences": getattr(conn, "knowledgePreferences", None) or {},
+            "dataSources": dsItems,
+            "totalChunks": connChunkTotal,
+            "runningJobs": runningJobs,
+            "lastError": lastError,
+        })
+    return out
+
+
+@router.get("/me")
+@limiter.limit("30/minute")
+def _getInventoryMe(
+    request: Request,
+    currentUser: User = Depends(getCurrentUser),
+) -> Dict[str, Any]:
+    """Personal RAG inventory: own connections + DataSources + chunk counts."""
+    try:
+        from modules.interfaces.interfaceDbApp import getRootInterface
+        from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
+        from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
+
+        rootIf = getRootInterface()
+        knowledgeIf = getKnowledgeInterface(None)
+        connections = rootIf.getUserConnections(currentUser.id)
+
+        items = _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService)
+        totalChunks = sum(c.get("totalChunks", 0) for c in items)
+
+        return {"connections": items, "totals": {"chunks": totalChunks}}
+    except Exception as e:
+        logger.error("Error in RAG inventory /me: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/mandate")
+@limiter.limit("20/minute")
+def _getInventoryMandate(
+    request: Request,
+    context: RequestContext = Depends(getRequestContext),
+) -> Dict[str, Any]:
+    """Mandate-level RAG aggregation (requires mandate membership)."""
+    if not context.mandateId:
+        raise HTTPException(status_code=403, detail=routeApiMsg("Mandate context required"))
+    try:
+        from modules.interfaces.interfaceDbApp import getRootInterface
+        from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface, aggregateMandateRagTotalBytes
+        from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
+
+        rootIf = getRootInterface()
+        knowledgeIf = getKnowledgeInterface(None)
+        mandateId = str(context.mandateId) if context.mandateId else ""
+
+        from modules.datamodels.datamodelUam import UserConnection
+        allConnections = rootIf.db.getRecordset(UserConnection, recordFilter={"mandateId": mandateId})
+        connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
+
+        items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
+        totalChunks = sum(c.get("totalChunks", 0) for c in items)
+        totalBytes = aggregateMandateRagTotalBytes(mandateId)
+
+        return {"connections": items, "totals": {"chunks": totalChunks, "bytes": totalBytes}}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error("Error in RAG inventory /mandate: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/platform")
+@limiter.limit("10/minute")
+def _getInventoryPlatform(
+    request: Request,
+    context: RequestContext = Depends(getRequestContext),
+) -> Dict[str, Any]:
+    """Platform-wide RAG statistics (sysadmin only)."""
+    if not context.isSysAdmin:
+        raise HTTPException(status_code=403, detail=routeApiMsg("Platform admin required"))
+    try:
+        from modules.interfaces.interfaceDbApp import getRootInterface
+        from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface
+        from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService
+        from modules.datamodels.datamodelUam import UserConnection
+
+        rootIf = getRootInterface()
+        knowledgeIf = getKnowledgeInterface(None)
+        allConnections = rootIf.db.getRecordset(UserConnection)
+        connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections]
+
+        items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService)
+        totalChunks = sum(c.get("totalChunks", 0) for c in items)
+
+        return {"connections": items, "totals": {"chunks": totalChunks}}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error("Error in RAG inventory /platform: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/reindex/{connectionId}")
+@limiter.limit("10/minute")
+def _reindexConnection(
+    request: Request,
+    connectionId: str,
+    currentUser: User = Depends(getCurrentUser),
+) -> Dict[str, Any]:
+    """Re-trigger bootstrap for a connection (re-index all ragIndexEnabled DataSources).
+
+    Submits a new connection.bootstrap job, regardless of previous failures.
+    """
+    try:
+        from modules.interfaces.interfaceDbApp import getRootInterface
+        from modules.serviceCenter.services.serviceBackgroundJobs import startJob
+        from modules.datamodels.datamodelDataSource import DataSource
+        import asyncio
+
+        rootIf = getRootInterface()
+        conn = rootIf.getUserConnectionById(connectionId)
+        if conn is None:
+            raise HTTPException(status_code=404, detail="Connection not found")
+
+        if str(conn.userId) != str(currentUser.id):
+            raise HTTPException(status_code=403, detail="Not your connection")
+
+        dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
+        ragDs = [ds for ds in dataSources if (ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False))]
+        if not ragDs:
+            return {"status": "skipped", "reason": "no_rag_enabled_datasources"}
+
+        authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "")
+        dsIds = [(ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")) for ds in ragDs]
+
+        async def _enqueue():
+            return await startJob(
+                "connection.bootstrap",
+                {"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": dsIds},
+                triggeredBy=str(currentUser.id),
+            )
+        try:
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                future = asyncio.ensure_future(_enqueue())
+                jobId = None
+            else:
+                jobId = loop.run_until_complete(_enqueue())
+        except RuntimeError:
+            jobId = asyncio.run(_enqueue())
+
+        logger.info("Reindex triggered for connection %s (%d DataSources)", connectionId, len(dsIds))
+        return {"status": "queued", "connectionId": connectionId, "dataSourceCount": len(dsIds), "jobId": jobId}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error("Error triggering reindex: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/jobs")
+@limiter.limit("60/minute")
+def _getActiveJobs(
+    request: Request,
+    currentUser: User = Depends(getCurrentUser),
+) -> List[Dict[str, Any]]:
+    """Active RAG jobs for the current user (used by header badge)."""
+    try:
+        from modules.serviceCenter.services.serviceBackgroundJobs import listJobs
+        from modules.interfaces.interfaceDbApp import getRootInterface
+
+        rootIf = getRootInterface()
+        connections = rootIf.getUserConnections(currentUser.id)
+        connectionMap = {str(c.id): c for c in connections}
+        connectionIds = set(connectionMap.keys())
+
+        jobs = listJobs(jobType="connection.bootstrap", limit=50)
+        active = []
+        for j in jobs:
+            if j.get("status") not in ("PENDING", "RUNNING"):
+                continue
+            payload = j.get("payload") or {}
+            connId = payload.get("connectionId")
+            if connId in connectionIds:
+                conn = connectionMap[connId]
+                active.append({
+                    "jobId": j["id"],
+                    "connectionId": connId,
+                    "connectionLabel": getattr(conn, "displayLabel", None) or getattr(conn, "authority", connId),
+                    "jobType": j.get("jobType", "connection.bootstrap"),
+                    "progress": j.get("progress", 0),
+                    "progressMessage": j.get("progressMessage", ""),
+                })
+        return active
+    except Exception as e:
+        logger.error("Error in RAG inventory /jobs: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/modules/serviceCenter/services/serviceBackgroundJobs/__init__.py b/modules/serviceCenter/services/serviceBackgroundJobs/__init__.py
index e9d4c94c..ce67dc4a 100644
--- a/modules/serviceCenter/services/serviceBackgroundJobs/__init__.py
+++ b/modules/serviceCenter/services/serviceBackgroundJobs/__init__.py
@@ -7,6 +7,9 @@ from .mainBackgroundJobService import (
     startJob,
     getJobStatus,
     listJobs,
+    cancelJob,
+    cancelJobsByConnection,
+    isTerminalStatus,
     JobProgressCallback,
 )
 
@@ -15,5 +18,8 @@ __all__ = [
     "startJob",
     "getJobStatus",
     "listJobs",
+    "cancelJob",
+    "cancelJobsByConnection",
+    "isTerminalStatus",
     "JobProgressCallback",
 ]
diff --git a/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py b/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py
index b8a55e28..66ca4708 100644
--- a/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py
+++ b/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py
@@ -30,6 +30,7 @@ clear message. No silent zombies.
 
 import asyncio
 import logging
+import time
 from datetime import datetime, timezone
 from typing import Any, Awaitable, Callable, Dict, List, Optional
 
@@ -49,7 +50,46 @@ JOBS_DATABASE = APP_CONFIG.get("DB_DATABASE", "poweron_app")
 registerDatabase(JOBS_DATABASE)
 
 
-JobProgressCallback = Callable[[int, Optional[str]], None]
+_CANCEL_CHECK_INTERVAL_S = 3.0
+
+
+class JobProgressCallback:
+    """Callable progress reporter with cooperative cancel-check for long-running walkers."""
+
+    def __init__(self, jobId: str):
+        self._jobId = jobId
+        self._cancelledCache: Optional[bool] = None
+        self._lastCheckedAt: float = 0.0
+
+    def __call__(self, progress: int, message: Optional[str] = None) -> None:
+        try:
+            clamped = max(0, min(100, int(progress)))
+            fields: Dict[str, Any] = {"progress": clamped}
+            if message is not None:
+                fields["progressMessage"] = message[:500]
+            _updateJob(self._jobId, fields)
+        except Exception as ex:
+            logger.warning("Progress update failed for job %s: %s", self._jobId, ex)
+
+    def isCancelled(self) -> bool:
+        """Check if this job was cancelled. Reads DB at most every 3s to limit load."""
+        now = time.time()
+        if self._cancelledCache is True:
+            return True
+        if now - self._lastCheckedAt < _CANCEL_CHECK_INTERVAL_S:
+            return self._cancelledCache or False
+        self._lastCheckedAt = now
+        try:
+            job = _loadJob(self._jobId)
+            if job and job.get("status") == BackgroundJobStatusEnum.CANCELLED.value:
+                self._cancelledCache = True
+                return True
+        except Exception:
+            pass
+        self._cancelledCache = False
+        return False
+
+
 JobHandler = Callable[[Dict[str, Any], JobProgressCallback], Awaitable[Optional[Dict[str, Any]]]]
 
 
@@ -155,16 +195,7 @@ def _markError(jobId: str, errorMessage: str) -> None:
 
 
 def _makeProgressCallback(jobId: str) -> JobProgressCallback:
-    def _cb(progress: int, message: Optional[str] = None) -> None:
-        try:
-            clamped = max(0, min(100, int(progress)))
-            fields: Dict[str, Any] = {"progress": clamped}
-            if message is not None:
-                fields["progressMessage"] = message[:500]
-            _updateJob(jobId, fields)
-        except Exception as ex:
-            logger.warning("Progress update failed for job %s: %s", jobId, ex)
-    return _cb
+    return JobProgressCallback(jobId)
 
 
 async def _runJob(jobId: str) -> None:
@@ -220,12 +251,51 @@ def isTerminalStatus(status: str) -> bool:
     return status in {s.value for s in TERMINAL_JOB_STATUSES}
 
 
+def cancelJob(jobId: str, *, reason: str = "user_requested") -> bool:
+    """Mark a job as CANCELLED. Walkers detect this via JobProgressCallback.isCancelled().
+
+    Returns False if the job is already in a terminal state or does not exist.
+    """
+    job = _loadJob(jobId)
+    if not job:
+        return False
+    if isTerminalStatus(job.get("status", "")):
+        return False
+    _updateJob(jobId, {
+        "status": BackgroundJobStatusEnum.CANCELLED.value,
+        "errorMessage": f"cancelled: {reason}"[:1000],
+        "finishedAt": datetime.now(timezone.utc).timestamp(),
+    })
+    logger.info("BackgroundJob %s cancelled (reason=%s)", jobId, reason)
+    return True
+
+
+def cancelJobsByConnection(connectionId: str, *, jobType: str = "connection.bootstrap") -> int:
+    """Cancel all RUNNING/PENDING jobs whose payload.connectionId matches.
+
+    Returns count of jobs marked as cancelled.
+    """
+    db = _getDb()
+    rows = db.getRecordset(BackgroundJob, recordFilter={"jobType": jobType})
+    count = 0
+    for row in rows:
+        status = row.get("status", "")
+        if status not in (BackgroundJobStatusEnum.PENDING.value, BackgroundJobStatusEnum.RUNNING.value):
+            continue
+        payload = row.get("payload") or {}
+        if payload.get("connectionId") == connectionId:
+            if cancelJob(row["id"], reason=f"connection_stop:{connectionId[:8]}"):
+                count += 1
+    return count
+
+
 def recoverInterruptedJobs() -> int:
-    """Flip any RUNNING jobs to ERROR (called at worker boot).
+    """Flip any RUNNING jobs to ERROR and re-queue bootstrap jobs (called at worker boot).
 
     A RUNNING job in the DB after process restart means the previous worker
     died mid-execution; the asyncio task is gone and the job will never
-    finish on its own.
+    finish on its own. For connection.bootstrap jobs, a fresh job is
+    automatically re-queued so the user doesn't have to manually retry.
     """
     db = _getDb()
     try:
@@ -234,12 +304,34 @@ def recoverInterruptedJobs() -> int:
         logger.warning("recoverInterruptedJobs: failed to scan RUNNING jobs: %s", ex)
         return 0
     count = 0
+    requeued = 0
     for row in rows:
         try:
             _markError(row["id"], "Interrupted by worker restart")
             count += 1
         except Exception as ex:
             logger.warning("recoverInterruptedJobs: could not mark %s as ERROR: %s", row.get("id"), ex)
+            continue
+
+        if row.get("jobType") == "connection.bootstrap":
+            payload = row.get("payload") or {}
+            if payload.get("connectionId"):
+                try:
+                    newJob = BackgroundJob(
+                        jobType="connection.bootstrap",
+                        payload=payload,
+                        triggeredBy="recovery.requeue",
+                    )
+                    record = db.recordCreate(BackgroundJob, _serialiseDatetimes(newJob.model_dump()))
+                    asyncio.create_task(_runJob(record["id"]))
+                    requeued += 1
+                    logger.info(
+                        "recoverInterruptedJobs: re-queued bootstrap for connectionId=%s (new jobId=%s)",
+                        payload["connectionId"], record["id"],
+                    )
+                except Exception as reqEx:
+                    logger.warning("recoverInterruptedJobs: re-queue failed for %s: %s", row.get("id"), reqEx)
+
     if count:
-        logger.warning("Recovered %d interrupted background job(s) after restart", count)
+        logger.warning("Recovered %d interrupted background job(s) after restart (re-queued %d)", count, requeued)
     return count
diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py
index 97ac61d5..0e2d251f 100644
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py
@@ -122,21 +122,54 @@ def _onConnectionRevoked(
     )
 
 
+_SOURCE_TYPE_MAP = {
+    "msft": {
+        "sharepoint": ("sharepointFolder", "onedriveFolder"),
+        "outlook": ("outlookFolder", "calendarFolder", "contactFolder"),
+    },
+    "google": {
+        "drive": ("googleDriveFolder",),
+        "gmail": ("gmailFolder",),
+    },
+    "clickup": {
+        "clickup": ("clickupList",),
+    },
+    "infomaniak": {
+        "kdrive": ("kdriveFolder",),
+    },
+}
+
+
+def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list] = None):
+    """Load DataSource rows with ragIndexEnabled=true for a connection.
+
+    If dataSourceIds is provided (mini-bootstrap), filter to only those IDs.
+    """
+    from modules.interfaces.interfaceDbApp import getRootInterface
+    from modules.datamodels.datamodelDataSource import DataSource
+
+    rootIf = getRootInterface()
+    allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
+    if dataSourceIds:
+        return [ds for ds in allDs if ds.get("id") in dataSourceIds and ds.get("ragIndexEnabled")]
+    return [ds for ds in allDs if ds.get("ragIndexEnabled")]
+
+
 async def _bootstrapJobHandler(
     job: Dict[str, Any],
     progressCb,
 ) -> Dict[str, Any]:
-    """Dispatch bootstrap by authority. Each authority runs its own sub-bootstraps."""
+    """Dispatch bootstrap by authority, iterating only over ragIndexEnabled DataSources."""
     payload = job.get("payload") or {}
     connectionId = payload.get("connectionId")
     authority = (payload.get("authority") or "").lower()
+    dataSourceIds = payload.get("dataSourceIds")
     if not connectionId:
         raise ValueError("connection.bootstrap requires payload.connectionId")
 
     progressCb(5, f"resolving {authority} connection")
 
-    # Defensive consent check: if the connection has since disabled knowledge ingestion
-    # (e.g. user toggled setting after the job was enqueued), skip all walkers.
+    # Defensive consent check
     try:
         from modules.interfaces.interfaceDbApp import getRootInterface
         _root = getRootInterface()
@@ -156,6 +189,21 @@ async def _bootstrapJobHandler(
     except Exception as _guardErr:
         logger.debug("Could not load connection for consent guard: %s", _guardErr)
 
+    # Load only ragIndexEnabled DataSources for this connection
+    dataSources = _loadRagEnabledDataSources(connectionId, dataSourceIds)
+    if not dataSources:
+        logger.info(
+            "ingestion.connection.bootstrap.skipped — no rag-enabled DataSources connectionId=%s",
+            connectionId,
+            extra={
+                "event": "ingestion.connection.bootstrap.skipped",
+                "connectionId": connectionId,
+                "authority": authority,
+                "reason": "no_data_sources",
+            },
+        )
+        return {"connectionId": connectionId, "authority": authority, "skipped": True, "reason": "no_data_sources"}
+
     def _normalize(res: Any, label: str) -> Dict[str, Any]:
         if isinstance(res, Exception):
             logger.error(
@@ -165,6 +213,10 @@ async def _bootstrapJobHandler(
             return {"error": str(res)}
         return res or {}
 
+    def _filterDs(walkerKey: str) -> list:
+        sourceTypes = _SOURCE_TYPE_MAP.get(authority, {}).get(walkerKey, ())
+        return [ds for ds in dataSources if ds.get("sourceType") in sourceTypes]
+
     if authority == "msft":
         from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import (
             bootstrapSharepoint,
@@ -174,9 +226,14 @@ async def _bootstrapJobHandler(
         )
 
         progressCb(10, "sharepoint + outlook")
+        spDs = _filterDs("sharepoint")
+        olDs = _filterDs("outlook")
+        async def _noopResult():
+            return {"skipped": True, "reason": "no_datasources"}
+
         spResult, olResult = await asyncio.gather(
-            bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb),
-            bootstrapOutlook(connectionId=connectionId, progressCb=progressCb),
+            bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb, dataSources=spDs) if spDs else _noopResult(),
+            bootstrapOutlook(connectionId=connectionId, progressCb=progressCb, dataSources=olDs) if olDs else _noopResult(),
             return_exceptions=True,
         )
         return {
@@ -195,9 +252,14 @@ async def _bootstrapJobHandler(
         )
 
         progressCb(10, "drive + gmail")
+        gdDs = _filterDs("drive")
+        gmDs = _filterDs("gmail")
+        async def _noopResult():
+            return {"skipped": True, "reason": "no_datasources"}
+
         gdResult, gmResult = await asyncio.gather(
-            bootstrapGdrive(connectionId=connectionId, progressCb=progressCb),
-            bootstrapGmail(connectionId=connectionId, progressCb=progressCb),
+            bootstrapGdrive(connectionId=connectionId, progressCb=progressCb, dataSources=gdDs) if gdDs else _noopResult(),
+            bootstrapGmail(connectionId=connectionId, progressCb=progressCb, dataSources=gmDs) if gmDs else _noopResult(),
             return_exceptions=True,
         )
         return {
@@ -213,7 +275,8 @@ async def _bootstrapJobHandler(
         )
 
         progressCb(10, "clickup tasks")
-        cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb)
+        cuDs = _filterDs("clickup")
+        cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb, dataSources=cuDs) if cuDs else {"skipped": True, "reason": "no_datasources"}
         return {
             "connectionId": connectionId,
             "authority": authority,
diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorPrefs.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorPrefs.py
index 950400ce..4aaaa9bf 100644
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorPrefs.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorPrefs.py
@@ -9,7 +9,7 @@ is None).
 from __future__ import annotations
 
 import logging
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 
 logger = logging.getLogger(__name__)
@@ -21,10 +21,11 @@ _DEFAULT_CLICKUP_SCOPE = "title_description"
 
 @dataclass
 class ConnectionIngestionPrefs:
-    """Parsed per-connection preferences for knowledge ingestion walkers."""
+    """Parsed per-connection preferences for knowledge ingestion walkers.
 
-    # PII
-    neutralizeBeforeEmbed: bool = False
+    Neutralization is now controlled per DataSource.neutralize (not here).
+    Surface toggles are obsolete — walker iterates only over ragIndexEnabled DataSources.
+    """
 
     # Mail (Outlook + Gmail)
     mailContentDepth: str = _DEFAULT_MAIL_DEPTH          # "metadata" | "snippet" | "full"
@@ -32,18 +33,11 @@ class ConnectionIngestionPrefs:
 
     # Files (Drive / SharePoint / OneDrive)
     filesIndexBinaries: bool = True
-    mimeAllowlist: List[str] = field(default_factory=list)  # empty = all allowed
 
     # ClickUp
     clickupScope: str = _DEFAULT_CLICKUP_SCOPE  # "titles" | "title_description" | "with_comments"
     clickupIndexAttachments: bool = False
 
-    # Per-authority surface toggles (default everything on)
-    gmailEnabled: bool = True
-    driveEnabled: bool = True
-    sharepointEnabled: bool = True
-    outlookEnabled: bool = True
-
     # Time window
     maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS  # 0 = no limit
 
@@ -78,22 +72,12 @@ def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs:
             v = raw.get(key)
             return int(v) if isinstance(v, int) else default
 
-        surface = raw.get("surfaceToggles") or {}
-        google_surf = surface.get("google") or {}
-        msft_surf = surface.get("msft") or {}
-
         return ConnectionIngestionPrefs(
-            neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False),
             mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH),
             mailIndexAttachments=_bool("mailIndexAttachments", False),
             filesIndexBinaries=_bool("filesIndexBinaries", True),
-            mimeAllowlist=list(raw.get("mimeAllowlist") or []),
             clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE),
             clickupIndexAttachments=_bool("clickupIndexAttachments", False),
-            gmailEnabled=bool(google_surf.get("gmail", True)),
-            driveEnabled=bool(google_surf.get("drive", True)),
-            sharepointEnabled=bool(msft_surf.get("sharepoint", True)),
-            outlookEnabled=bool(msft_surf.get("outlook", True)),
             maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS),
         )
     except Exception as exc:
diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py
index 31ac9687..7acbaa19 100644
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py
@@ -23,7 +23,7 @@ import logging
 import time
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta, timezone
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Dict, List, Optional
 
 logger = logging.getLogger(__name__)
 
@@ -150,8 +150,6 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -
                 "data": description,
                 "contextRef": {"part": "description"},
             })
-        # text_content is ClickUp's rendered-markdown version; include if it adds
-        # something beyond the plain description (common for bullet lists, checklists).
         textContent = _truncate(task.get("text_content"), limits.maxDescriptionChars)
         if textContent and textContent != description:
             parts.append({
@@ -166,33 +164,35 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) -
 async def bootstrapClickup(
     connectionId: str,
     *,
-    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
+    dataSources: Optional[List[Dict[str, Any]]] = None,
+    progressCb: Optional[Any] = None,
     adapter: Any = None,
     connection: Any = None,
     knowledgeService: Any = None,
     limits: Optional[ClickupBootstrapLimits] = None,
 ) -> Dict[str, Any]:
-    """Walk workspaces → lists → tasks and ingest each task as a virtual doc."""
-    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
-    prefs = loadConnectionPrefs(connectionId)
+    """Walk workspaces → lists → tasks and ingest each task as a virtual doc.
+
+    Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
+    Each DataSource defines the neutralize policy for its subtree.
+    """
+    if not dataSources:
+        return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
 
     if not limits:
-        limits = ClickupBootstrapLimits(
-            maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
-            neutralize=prefs.neutralizeBeforeEmbed,
-            clickupScope=prefs.clickupScope,
-        )
+        limits = ClickupBootstrapLimits()
 
     startMs = time.time()
     result = ClickupBootstrapResult(connectionId=connectionId)
 
     logger.info(
-        "ingestion.connection.bootstrap.started part=clickup connectionId=%s",
-        connectionId,
+        "ingestion.connection.bootstrap.started part=clickup connectionId=%s dataSources=%d",
+        connectionId, len(dataSources),
         extra={
             "event": "ingestion.connection.bootstrap.started",
             "part": "clickup",
             "connectionId": connectionId,
+            "dataSourceCount": len(dataSources),
         },
     )
 
@@ -215,30 +215,56 @@ async def bootstrapClickup(
         return _finalizeResult(connectionId, result, startMs)
 
     teams = (teamsResp or {}).get("teams") or []
-    for team in teams[: limits.maxWorkspaces]:
+
+    cancelled = False
+    for ds in dataSources:
         if result.indexed + result.skippedDuplicate >= limits.maxTasks:
             break
-        teamId = str(team.get("id", "") or "")
-        if not teamId:
-            continue
-        result.workspaces += 1
-        try:
-            await _walkTeam(
-                svc=svc,
-                knowledgeService=knowledgeService,
-                connectionId=connectionId,
-                mandateId=mandateId,
-                userId=userId,
-                team=team,
-                limits=limits,
-                result=result,
-                progressCb=progressCb,
-            )
-        except Exception as exc:
-            logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
-            result.errors.append(f"team({teamId}): {exc}")
+        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
+            cancelled = True
+            break
 
-    return _finalizeResult(connectionId, result, startMs)
+        dsId = ds.get("id", "")
+        dsNeutralize = ds.get("neutralize", False)
+        dsLimits = ClickupBootstrapLimits(
+            maxTasks=limits.maxTasks,
+            maxWorkspaces=limits.maxWorkspaces,
+            maxListsPerWorkspace=limits.maxListsPerWorkspace,
+            maxDescriptionChars=limits.maxDescriptionChars,
+            maxAgeDays=limits.maxAgeDays,
+            includeClosed=limits.includeClosed,
+            neutralize=dsNeutralize,
+            clickupScope=limits.clickupScope,
+        )
+
+        for team in teams[:dsLimits.maxWorkspaces]:
+            if result.indexed + result.skippedDuplicate >= dsLimits.maxTasks:
+                break
+            teamId = str(team.get("id", "") or "")
+            if not teamId:
+                continue
+            result.workspaces += 1
+            try:
+                await _walkTeam(
+                    svc=svc,
+                    knowledgeService=knowledgeService,
+                    connectionId=connectionId,
+                    mandateId=mandateId,
+                    userId=userId,
+                    team=team,
+                    limits=dsLimits,
+                    result=result,
+                    progressCb=progressCb,
+                    dataSourceId=dsId,
+                )
+            except Exception as exc:
+                logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True)
+                result.errors.append(f"team({teamId}): {exc}")
+
+    finalResult = _finalizeResult(connectionId, result, startMs)
+    if cancelled:
+        finalResult["cancelled"] = True
+    return finalResult
 
 
 async def _resolveDependencies(connectionId: str):
@@ -280,8 +306,12 @@ async def _walkTeam(
     team: Dict[str, Any],
     limits: ClickupBootstrapLimits,
     result: ClickupBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
+    dataSourceId: str = "",
 ) -> None:
+    if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
+        return
+
     teamId = str(team.get("id", "") or "")
     spacesResp = await svc.getSpaces(teamId)
     spaces = (spacesResp or {}).get("spaces") or []
@@ -294,14 +324,12 @@ async def _walkTeam(
         if not spaceId:
             continue
 
-        # Folderless lists directly under the space
         folderless = await svc.getFolderlessLists(spaceId)
         for lst in (folderless or {}).get("lists") or []:
             if len(listsCollected) >= limits.maxListsPerWorkspace:
                 break
             listsCollected.append({**lst, "_space": space})
 
-        # Lists inside folders
         foldersResp = await svc.getFolders(spaceId)
         for folder in (foldersResp or {}).get("folders") or []:
             if len(listsCollected) >= limits.maxListsPerWorkspace:
@@ -318,6 +346,8 @@ async def _walkTeam(
     for lst in listsCollected:
         if result.indexed + result.skippedDuplicate >= limits.maxTasks:
             return
+        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
+            return
         result.lists += 1
         await _walkList(
             svc=svc,
@@ -330,6 +360,7 @@ async def _walkTeam(
             limits=limits,
             result=result,
             progressCb=progressCb,
+            dataSourceId=dataSourceId,
         )
 
 
@@ -344,13 +375,16 @@ async def _walkList(
     lst: Dict[str, Any],
     limits: ClickupBootstrapLimits,
     result: ClickupBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
+    dataSourceId: str = "",
 ) -> None:
     listId = str(lst.get("id", "") or "")
     if not listId:
         return
     page = 0
     while result.indexed + result.skippedDuplicate < limits.maxTasks:
+        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
+            return
         resp = await svc.getTasksInList(
             listId,
             page=page,
@@ -371,7 +405,6 @@ async def _walkList(
             if not _isRecent(task.get("date_updated"), limits.maxAgeDays):
                 result.skippedPolicy += 1
                 continue
-            # Inject the list/folder/space metadata we already loaded.
             task["list"] = task.get("list") or {"id": listId, "name": lst.get("name")}
             task["folder"] = task.get("folder") or lst.get("_folder") or {}
             task["space"] = task.get("space") or lst.get("_space") or {}
@@ -385,9 +418,10 @@ async def _walkList(
                 limits=limits,
                 result=result,
                 progressCb=progressCb,
+                dataSourceId=dataSourceId,
             )
 
-        if len(tasks) < 100:  # ClickUp page-size hint: fewer than 100 => last page
+        if len(tasks) < 100:
             return
         page += 1
 
@@ -402,7 +436,8 @@ async def _ingestTask(
     task: Dict[str, Any],
     limits: ClickupBootstrapLimits,
     result: ClickupBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
+    dataSourceId: str = "",
 ) -> None:
     from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
 
@@ -431,6 +466,7 @@ async def _ingestTask(
                 neutralize=limits.neutralize,
                 provenance={
                     "connectionId": connectionId,
+                    "dataSourceId": dataSourceId,
                     "authority": "clickup",
                     "service": "clickup",
                     "externalItemId": taskId,
@@ -456,8 +492,10 @@ async def _ingestTask(
     else:
         result.failed += 1
 
-    if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
-        processed = result.indexed + result.skippedDuplicate
+    processed = result.indexed + result.skippedDuplicate
+    if progressCb is not None and processed % 50 == 0:
+        if hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
+            return
         try:
             progressCb(
                 min(90, 10 + int(80 * processed / max(1, limits.maxTasks))),
diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py
index 5e4e659b..398b9af9 100644
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py
@@ -12,6 +12,7 @@ via export), runs the standard extraction pipeline and routes results through
 
 from __future__ import annotations
 
+import asyncio
 import hashlib
 import logging
 import time
@@ -30,7 +31,6 @@ SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/")
 MAX_DEPTH_DEFAULT = 4
 MAX_AGE_DAYS_DEFAULT = 365
 
-# Google Drive uses virtual mime-types for folders and non-downloadable assets.
 FOLDER_MIME = "application/vnd.google-apps.folder"
 
 
@@ -41,12 +41,8 @@ class GdriveBootstrapLimits:
     maxFileSize: int = MAX_FILE_SIZE_DEFAULT
     skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT
     maxDepth: int = MAX_DEPTH_DEFAULT
-    # Only ingest files modified within the last N days. None disables filter.
     maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT
-    # Pass-through to IngestionJob.neutralize
     neutralize: bool = False
-    # Whether to skip binary/non-text files
-    filesIndexBinaries: bool = True
 
 
 @dataclass
@@ -95,10 +91,8 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
     if not maxAgeDays:
         return True
     if not modifiedIso:
-        # No timestamp -> be permissive (Drive native docs sometimes omit it on export).
         return True
     try:
-        # Google returns RFC 3339 with `Z` or offset; python 3.11+ parses both.
         ts = datetime.fromisoformat(modifiedIso.replace("Z", "+00:00"))
     except Exception:
         return True
@@ -111,34 +105,36 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool:
 async def bootstrapGdrive(
     connectionId: str,
     *,
-    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
+    dataSources: Optional[List[Dict[str, Any]]] = None,
+    progressCb: Optional[Any] = None,
     adapter: Any = None,
     connection: Any = None,
     knowledgeService: Any = None,
     limits: Optional[GdriveBootstrapLimits] = None,
     runExtractionFn: Optional[Callable[..., Any]] = None,
 ) -> Dict[str, Any]:
-    """Walk My Drive starting from the virtual root folder."""
-    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
-    prefs = loadConnectionPrefs(connectionId)
+    """Walk My Drive starting from the virtual root folder.
+
+    Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
+    Each DataSource defines the root path + neutralize policy for its subtree.
+    """
+    if not dataSources:
+        return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
 
     if not limits:
-        limits = GdriveBootstrapLimits(
-            maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
-            neutralize=prefs.neutralizeBeforeEmbed,
-            filesIndexBinaries=prefs.filesIndexBinaries,
-        )
+        limits = GdriveBootstrapLimits()
 
     startMs = time.time()
     result = GdriveBootstrapResult(connectionId=connectionId)
 
     logger.info(
-        "ingestion.connection.bootstrap.started part=gdrive connectionId=%s",
-        connectionId,
+        "ingestion.connection.bootstrap.started part=gdrive connectionId=%s dataSources=%d",
+        connectionId, len(dataSources),
         extra={
             "event": "ingestion.connection.bootstrap.started",
             "part": "gdrive",
             "connectionId": connectionId,
+            "dataSourceCount": len(dataSources),
         },
     )
 
@@ -158,25 +154,51 @@ async def bootstrapGdrive(
     mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
     userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
 
-    try:
-        await _walkFolder(
-            adapter=adapter,
-            knowledgeService=knowledgeService,
-            runExtractionFn=runExtractionFn,
-            connectionId=connectionId,
-            mandateId=mandateId,
-            userId=userId,
-            folderPath="/",  # DriveAdapter.browse maps "" / "/" -> "root"
-            depth=0,
-            limits=limits,
-            result=result,
-            progressCb=progressCb,
-        )
-    except Exception as exc:
-        logger.error("gdrive walk failed for %s: %s", connectionId, exc, exc_info=True)
-        result.errors.append(f"walk: {exc}")
+    cancelled = False
+    for ds in dataSources:
+        if result.indexed + result.skippedDuplicate >= limits.maxItems:
+            break
+        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
+            cancelled = True
+            break
 
-    return _finalizeResult(connectionId, result, startMs)
+        dsPath = ds.get("path", "/")
+        dsId = ds.get("id", "")
+        dsNeutralize = ds.get("neutralize", False)
+        dsMaxAgeDays = ds.get("maxAgeDays", limits.maxAgeDays)
+        dsLimits = GdriveBootstrapLimits(
+            maxItems=limits.maxItems,
+            maxBytes=limits.maxBytes,
+            maxFileSize=limits.maxFileSize,
+            skipMimePrefixes=limits.skipMimePrefixes,
+            maxDepth=limits.maxDepth,
+            maxAgeDays=dsMaxAgeDays,
+            neutralize=dsNeutralize,
+        )
+
+        try:
+            await _walkFolder(
+                adapter=adapter,
+                knowledgeService=knowledgeService,
+                runExtractionFn=runExtractionFn,
+                connectionId=connectionId,
+                mandateId=mandateId,
+                userId=userId,
+                folderPath=dsPath,
+                depth=0,
+                limits=dsLimits,
+                result=result,
+                progressCb=progressCb,
+                dataSourceId=dsId,
+            )
+        except Exception as exc:
+            logger.error("gdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
+            result.errors.append(f"walk({dsPath}): {exc}")
+
+    finalResult = _finalizeResult(connectionId, result, startMs)
+    if cancelled:
+        finalResult["cancelled"] = True
+    return finalResult
 
 
 async def _resolveDependencies(connectionId: str):
@@ -220,10 +242,13 @@ async def _walkFolder(
     depth: int,
     limits: GdriveBootstrapLimits,
     result: GdriveBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
+    dataSourceId: str = "",
 ) -> None:
     if depth > limits.maxDepth:
         return
+    if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
+        return
     try:
         entries = await adapter.browse(folderPath)
     except Exception as exc:
@@ -236,6 +261,8 @@ async def _walkFolder(
             return
         if result.bytesProcessed >= limits.maxBytes:
             return
+        if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
+            return
 
         entryPath = getattr(entry, "path", "") or ""
         metadata = getattr(entry, "metadata", {}) or {}
@@ -254,6 +281,7 @@ async def _walkFolder(
                 limits=limits,
                 result=result,
                 progressCb=progressCb,
+                dataSourceId=dataSourceId,
             )
             continue
 
@@ -288,6 +316,7 @@ async def _walkFolder(
             limits=limits,
             result=result,
             progressCb=progressCb,
+            dataSourceId=dataSourceId,
         )
 
 
@@ -306,7 +335,8 @@ async def _ingestOne(
     revision: Optional[str],
     limits: GdriveBootstrapLimits,
     result: GdriveBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
+    dataSourceId: str = "",
 ) -> None:
     from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
 
@@ -321,14 +351,13 @@ async def _ingestOne(
         result.errors.append(f"download({entryPath}): {exc}")
         return
 
-    # Adapter.download returns raw bytes today; guard DownloadResult shape too.
     fileBytes: bytes
     if isinstance(downloaded, (bytes, bytearray)):
         fileBytes = bytes(downloaded)
     else:
         fileBytes = bytes(getattr(downloaded, "data", b"") or b"")
         if getattr(downloaded, "mimeType", None):
-            mimeType = downloaded.mimeType  # export may have changed the type
+            mimeType = downloaded.mimeType
     if not fileBytes:
         result.failed += 1
         return
@@ -354,6 +383,15 @@ async def _ingestOne(
         result.skippedPolicy += 1
         return
 
+    provenance: Dict[str, Any] = {
+        "connectionId": connectionId,
+        "dataSourceId": dataSourceId,
+        "authority": "google",
+        "service": "drive",
+        "externalItemId": externalItemId,
+        "entryPath": entryPath,
+        "tier": "body",
+    }
     try:
         handle = await knowledgeService.requestIngestion(
             IngestionJob(
@@ -366,14 +404,7 @@ async def _ingestOne(
                 contentObjects=contentObjects,
                 contentVersion=revision,
                 neutralize=limits.neutralize,
-                provenance={
-                    "connectionId": connectionId,
-                    "authority": "google",
-                    "service": "drive",
-                    "externalItemId": externalItemId,
-                    "entryPath": entryPath,
-                    "tier": "body",
-                },
+                provenance=provenance,
             )
         )
     except Exception as exc:
@@ -388,6 +419,8 @@ async def _ingestOne(
         result.indexed += 1
     else:
         result.failed += 1
+        if handle.error:
+            result.errors.append(f"ingest({entryPath}): {handle.error}")
 
     if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0:
         processed = result.indexed + result.skippedDuplicate
@@ -411,6 +444,8 @@ async def _ingestOne(
             },
         )
 
+    await asyncio.sleep(0)
+
 
 def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]:
     durationMs = int((time.time() - startMs) * 1000)
diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py
index 21fec83d..f5c345c6 100644
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py
@@ -175,35 +175,36 @@ def _buildContentObjects(
 async def bootstrapGmail(
     connectionId: str,
     *,
-    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
+    dataSources: Optional[List[Dict[str, Any]]] = None,
+    progressCb: Optional[Any] = None,
     adapter: Any = None,
     connection: Any = None,
     knowledgeService: Any = None,
     limits: Optional[GmailBootstrapLimits] = None,
     googleGetFn: Optional[Callable[..., Any]] = None,
 ) -> Dict[str, Any]:
-    """Enumerate Gmail labels (INBOX + SENT default) and ingest messages."""
-    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
-    prefs = loadConnectionPrefs(connectionId)
+    """Enumerate Gmail labels (INBOX + SENT default) and ingest messages.
+
+    Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
+    Each DataSource defines the neutralize policy for its scope.
+    """
+    if not dataSources:
+        return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
 
     if not limits:
-        limits = GmailBootstrapLimits(
-            includeAttachments=prefs.mailIndexAttachments,
-            maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
-            mailContentDepth=prefs.mailContentDepth,
-            neutralize=prefs.neutralizeBeforeEmbed,
-        )
+        limits = GmailBootstrapLimits()
 
     startMs = time.time()
     result = GmailBootstrapResult(connectionId=connectionId)
 
     logger.info(
-        "ingestion.connection.bootstrap.started part=gmail connectionId=%s",
-        connectionId,
+        "ingestion.connection.bootstrap.started part=gmail connectionId=%s dataSources=%d",
+        connectionId, len(dataSources),
         extra={
             "event": "ingestion.connection.bootstrap.started",
             "part": "gmail",
             "connectionId": connectionId,
+            "dataSourceCount": len(dataSources),
         },
     )
 
@@ -221,26 +222,51 @@ async def bootstrapGmail(
     mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
     userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
 
-    for labelId in limits.labels:
+    cancelled = False
+    for ds in dataSources:
         if result.indexed + result.skippedDuplicate >= limits.maxMessages:
             break
-        try:
-            await _ingestLabel(
-                googleGetFn=googleGetFn,
-                knowledgeService=knowledgeService,
-                connectionId=connectionId,
-                mandateId=mandateId,
-                userId=userId,
-                labelId=labelId,
-                limits=limits,
-                result=result,
-                progressCb=progressCb,
-            )
-        except Exception as exc:
-            logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
-            result.errors.append(f"label({labelId}): {exc}")
+        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
+            cancelled = True
+            break
 
-    return _finalizeResult(connectionId, result, startMs)
+        dsId = ds.get("id", "")
+        dsNeutralize = ds.get("neutralize", False)
+        dsLimits = GmailBootstrapLimits(
+            maxMessages=limits.maxMessages,
+            labels=limits.labels,
+            maxBodyChars=limits.maxBodyChars,
+            includeAttachments=limits.includeAttachments,
+            maxAttachmentBytes=limits.maxAttachmentBytes,
+            maxAgeDays=limits.maxAgeDays,
+            mailContentDepth=limits.mailContentDepth,
+            neutralize=dsNeutralize,
+        )
+
+        for labelId in dsLimits.labels:
+            if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages:
+                break
+            try:
+                await _ingestLabel(
+                    googleGetFn=googleGetFn,
+                    knowledgeService=knowledgeService,
+                    connectionId=connectionId,
+                    mandateId=mandateId,
+                    userId=userId,
+                    labelId=labelId,
+                    limits=dsLimits,
+                    result=result,
+                    progressCb=progressCb,
+                    dataSourceId=dsId,
+                )
+            except Exception as exc:
+                logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True)
+                result.errors.append(f"label({labelId}): {exc}")
+
+    finalResult = _finalizeResult(connectionId, result, startMs)
+    if cancelled:
+        finalResult["cancelled"] = True
+    return finalResult
 
 
 async def _resolveDependencies(connectionId: str):
@@ -282,7 +308,8 @@ async def _ingestLabel(
     labelId: str,
     limits: GmailBootstrapLimits,
     result: GmailBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
+    dataSourceId: str = "",
 ) -> None:
     remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
     if remaining <= 0:
@@ -316,6 +343,8 @@ async def _ingestLabel(
         for stub in messageStubs:
             if result.indexed + result.skippedDuplicate >= limits.maxMessages:
                 break
+            if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
+                return
             msgId = stub.get("id")
             if not msgId:
                 continue
@@ -337,6 +366,7 @@ async def _ingestLabel(
                 limits=limits,
                 result=result,
                 progressCb=progressCb,
+                dataSourceId=dataSourceId,
             )
 
         nextPageToken = page.get("nextPageToken")
@@ -355,7 +385,8 @@ async def _ingestMessage(
     message: Dict[str, Any],
     limits: GmailBootstrapLimits,
     result: GmailBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
+    dataSourceId: str = "",
 ) -> None:
     from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
 
@@ -386,6 +417,7 @@ async def _ingestMessage(
                 neutralize=limits.neutralize,
                 provenance={
                     "connectionId": connectionId,
+                    "dataSourceId": dataSourceId,
                     "authority": "google",
                     "service": "gmail",
                     "externalItemId": messageId,
@@ -420,6 +452,7 @@ async def _ingestMessage(
                 parentSyntheticId=syntheticId,
                 limits=limits,
                 result=result,
+                dataSourceId=dataSourceId,
             )
         except Exception as exc:
             logger.warning("gmail attachments %s failed: %s", messageId, exc)
@@ -461,6 +494,7 @@ async def _ingestAttachments(
     parentSyntheticId: str,
     limits: GmailBootstrapLimits,
     result: GmailBootstrapResult,
+    dataSourceId: str = "",
 ) -> None:
     """Child ingestion jobs for file attachments. Skips inline images (cid: refs)."""
     from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@@ -561,6 +595,7 @@ async def _ingestAttachments(
                     contentObjects=contentObjects,
                     provenance={
                         "connectionId": connectionId,
+                        "dataSourceId": dataSourceId,
                         "authority": "google",
                         "service": "gmail",
                         "parentId": parentSyntheticId,
diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py
index 64a3545f..3f4a8afb 100644
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py
@@ -18,7 +18,7 @@ import hashlib
 import logging
 import time
 from dataclasses import dataclass, field
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Dict, List, Optional
 
 from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody
 
@@ -139,34 +139,35 @@ def _buildContentObjects(
 async def bootstrapOutlook(
     connectionId: str,
     *,
-    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
+    dataSources: Optional[List[Dict[str, Any]]] = None,
+    progressCb: Optional[Any] = None,
     adapter: Any = None,
     connection: Any = None,
     knowledgeService: Any = None,
     limits: Optional[OutlookBootstrapLimits] = None,
 ) -> Dict[str, Any]:
-    """Enumerate Outlook folders (inbox + sent by default) and ingest messages."""
-    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
-    prefs = loadConnectionPrefs(connectionId)
+    """Enumerate Outlook folders (inbox + sent by default) and ingest messages.
+
+    Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
+    Each DataSource defines the neutralize policy for its messages.
+    """
+    if not dataSources:
+        return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
 
     if not limits:
-        limits = OutlookBootstrapLimits(
-            includeAttachments=prefs.mailIndexAttachments,
-            maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None,
-            mailContentDepth=prefs.mailContentDepth,
-            neutralize=prefs.neutralizeBeforeEmbed,
-        )
+        limits = OutlookBootstrapLimits()
 
     startMs = time.time()
     result = OutlookBootstrapResult(connectionId=connectionId)
 
     logger.info(
-        "ingestion.connection.bootstrap.started part=outlook connectionId=%s",
-        connectionId,
+        "ingestion.connection.bootstrap.started part=outlook connectionId=%s dataSources=%d",
+        connectionId, len(dataSources),
         extra={
             "event": "ingestion.connection.bootstrap.started",
             "part": "outlook",
             "connectionId": connectionId,
+            "dataSourceCount": len(dataSources),
         },
     )
 
@@ -176,27 +177,52 @@ async def bootstrapOutlook(
     mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
     userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
 
-    folderIds = await _selectFolderIds(adapter, limits)
-    for folderId in folderIds:
+    cancelled = False
+    for ds in dataSources:
         if result.indexed + result.skippedDuplicate >= limits.maxMessages:
             break
-        try:
-            await _ingestFolder(
-                adapter=adapter,
-                knowledgeService=knowledgeService,
-                connectionId=connectionId,
-                mandateId=mandateId,
-                userId=userId,
-                folderId=folderId,
-                limits=limits,
-                result=result,
-                progressCb=progressCb,
-            )
-        except Exception as exc:
-            logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
-            result.errors.append(f"folder({folderId}): {exc}")
+        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
+            cancelled = True
+            break
 
-    return _finalizeResult(connectionId, result, startMs)
+        dsId = ds.get("id", "")
+        dsNeutralize = ds.get("neutralize", False)
+        dsLimits = OutlookBootstrapLimits(
+            maxMessages=limits.maxMessages,
+            maxFolders=limits.maxFolders,
+            maxBodyChars=limits.maxBodyChars,
+            includeAttachments=limits.includeAttachments,
+            maxAttachmentBytes=limits.maxAttachmentBytes,
+            maxAgeDays=limits.maxAgeDays,
+            mailContentDepth=limits.mailContentDepth,
+            neutralize=dsNeutralize,
+        )
+
+        folderIds = await _selectFolderIds(adapter, dsLimits)
+        for folderId in folderIds:
+            if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages:
+                break
+            try:
+                await _ingestFolder(
+                    adapter=adapter,
+                    knowledgeService=knowledgeService,
+                    connectionId=connectionId,
+                    mandateId=mandateId,
+                    userId=userId,
+                    folderId=folderId,
+                    limits=dsLimits,
+                    result=result,
+                    progressCb=progressCb,
+                    dataSourceId=dsId,
+                )
+            except Exception as exc:
+                logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True)
+                result.errors.append(f"folder({folderId}): {exc}")
+
+    finalResult = _finalizeResult(connectionId, result, startMs)
+    if cancelled:
+        finalResult["cancelled"] = True
+    return finalResult
 
 
 async def _resolveDependencies(connectionId: str):
@@ -266,8 +292,12 @@ async def _ingestFolder(
     folderId: str,
     limits: OutlookBootstrapLimits,
     result: OutlookBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
+    dataSourceId: str = "",
 ) -> None:
+    if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
+        return
+
     remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate)
     if remaining <= 0:
         return
@@ -307,6 +337,8 @@ async def _ingestFolder(
         for message in page.get("value", []) or []:
             if result.indexed + result.skippedDuplicate >= limits.maxMessages:
                 break
+            if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
+                return
             await _ingestMessage(
                 adapter=adapter,
                 knowledgeService=knowledgeService,
@@ -317,6 +349,7 @@ async def _ingestFolder(
                 limits=limits,
                 result=result,
                 progressCb=progressCb,
+                dataSourceId=dataSourceId,
             )
 
         nextLink = page.get("@odata.nextLink")
@@ -338,7 +371,8 @@ async def _ingestMessage(
     message: Dict[str, Any],
     limits: OutlookBootstrapLimits,
     result: OutlookBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
+    dataSourceId: str = "",
 ) -> None:
     from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
 
@@ -369,6 +403,7 @@ async def _ingestMessage(
                 neutralize=limits.neutralize,
                 provenance={
                     "connectionId": connectionId,
+                    "dataSourceId": dataSourceId,
                     "authority": "msft",
                     "service": "outlook",
                     "externalItemId": messageId,
@@ -402,6 +437,7 @@ async def _ingestMessage(
                 parentSyntheticId=syntheticId,
                 limits=limits,
                 result=result,
+                dataSourceId=dataSourceId,
             )
         except Exception as exc:
             logger.warning("outlook attachments %s failed: %s", messageId, exc)
@@ -443,6 +479,7 @@ async def _ingestAttachments(
     parentSyntheticId: str,
     limits: OutlookBootstrapLimits,
     result: OutlookBootstrapResult,
+    dataSourceId: str = "",
 ) -> None:
     """Child ingestion jobs for file attachments (skip inline & oversized)."""
     from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
@@ -531,6 +568,7 @@ async def _ingestAttachments(
                     neutralize=limits.neutralize,
                     provenance={
                         "connectionId": connectionId,
+                        "dataSourceId": dataSourceId,
                         "authority": "msft",
                         "service": "outlook",
                         "parentId": parentSyntheticId,
diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py
index 07fef7a8..f664f1a8 100644
--- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py
+++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py
@@ -94,35 +94,36 @@ def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]:
 async def bootstrapSharepoint(
     connectionId: str,
     *,
-    progressCb: Optional[Callable[[int, Optional[str]], None]] = None,
+    dataSources: Optional[List[Dict[str, Any]]] = None,
+    progressCb: Optional[Any] = None,
     adapter: Any = None,
     connection: Any = None,
     knowledgeService: Any = None,
     limits: Optional[SharepointBootstrapLimits] = None,
     runExtractionFn: Optional[Callable[..., Any]] = None,
 ) -> Dict[str, Any]:
-    """Enumerate SharePoint drives and ingest every reachable file via the façade.
+    """Enumerate SharePoint drives and ingest files via the facade.
 
-    Parameters allow injection for tests; production callers pass only
-    `connectionId` (and optionally a progressCb) and everything else is
-    resolved against the registered services.
+    Iterates only over explicitly provided dataSources (ragIndexEnabled=true).
+    Each DataSource defines the root path + neutralize policy for its subtree.
     """
-    from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs
-    prefs = loadConnectionPrefs(connectionId)
+    if not dataSources:
+        return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"}
 
     if not limits:
-        limits = SharepointBootstrapLimits(neutralize=prefs.neutralizeBeforeEmbed)
+        limits = SharepointBootstrapLimits()
 
     startMs = time.time()
     result = SharepointBootstrapResult(connectionId=connectionId)
 
     logger.info(
-        "ingestion.connection.bootstrap.started part=sharepoint connectionId=%s",
-        connectionId,
+        "ingestion.connection.bootstrap.started part=sharepoint connectionId=%s dataSources=%d",
+        connectionId, len(dataSources),
         extra={
             "event": "ingestion.connection.bootstrap.started",
             "part": "sharepoint",
             "connectionId": connectionId,
+            "dataSourceCount": len(dataSources),
         },
     )
 
@@ -142,17 +143,27 @@ async def bootstrapSharepoint(
     mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else ""
     userId = str(getattr(connection, "userId", "") or "") if connection is not None else ""
 
-    try:
-        sites = await adapter.browse("/", limit=limits.maxSites)
-    except Exception as exc:
-        logger.error("sharepoint site discovery failed for %s: %s", connectionId, exc, exc_info=True)
-        result.errors.append(f"site_discovery: {exc}")
-        return _finalizeResult(connectionId, result, startMs)
-
-    for site in sites[: limits.maxSites]:
+    cancelled = False
+    for ds in dataSources:
         if result.indexed + result.skippedDuplicate >= limits.maxItems:
             break
-        sitePath = getattr(site, "path", "") or ""
+        if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
+            cancelled = True
+            break
+
+        dsPath = ds.get("path", "")
+        dsId = ds.get("id", "")
+        dsNeutralize = ds.get("neutralize", False)
+        dsLimits = SharepointBootstrapLimits(
+            maxItems=limits.maxItems,
+            maxBytes=limits.maxBytes,
+            maxFileSize=limits.maxFileSize,
+            skipMimePrefixes=limits.skipMimePrefixes,
+            maxDepth=limits.maxDepth,
+            maxSites=limits.maxSites,
+            neutralize=dsNeutralize,
+        )
+
         try:
             await _walkFolder(
                 adapter=adapter,
@@ -161,17 +172,21 @@ async def bootstrapSharepoint(
                 connectionId=connectionId,
                 mandateId=mandateId,
                 userId=userId,
-                folderPath=sitePath,
+                folderPath=dsPath,
                 depth=0,
-                limits=limits,
+                limits=dsLimits,
                 result=result,
                 progressCb=progressCb,
+                dataSourceId=dsId,
             )
         except Exception as exc:
-            logger.error("sharepoint walk failed for site %s: %s", sitePath, exc, exc_info=True)
-            result.errors.append(f"walk({sitePath}): {exc}")
+            logger.error("sharepoint walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True)
+            result.errors.append(f"walk({dsPath}): {exc}")
 
-    return _finalizeResult(connectionId, result, startMs)
+    finalResult = _finalizeResult(connectionId, result, startMs)
+    if cancelled:
+        finalResult["cancelled"] = True
+    return finalResult
 
 
 async def _resolveDependencies(connectionId: str):
@@ -221,10 +236,13 @@ async def _walkFolder(
     depth: int,
     limits: SharepointBootstrapLimits,
     result: SharepointBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
+    dataSourceId: str = "",
 ) -> None:
     if depth > limits.maxDepth:
         return
+    if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled():
+        return
     try:
         entries = await adapter.browse(folderPath)
     except Exception as exc:
@@ -237,6 +255,8 @@ async def _walkFolder(
             return
         if result.bytesProcessed >= limits.maxBytes:
             return
+        if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled():
+            return
 
         entryPath = getattr(entry, "path", "") or ""
         if getattr(entry, "isFolder", False):
@@ -252,6 +272,7 @@ async def _walkFolder(
                 limits=limits,
                 result=result,
                 progressCb=progressCb,
+                dataSourceId=dataSourceId,
             )
             continue
 
@@ -283,6 +304,7 @@ async def _walkFolder(
             limits=limits,
             result=result,
             progressCb=progressCb,
+            dataSourceId=dataSourceId,
         )
 
 
@@ -301,7 +323,8 @@ async def _ingestOne(
     revision: Optional[str],
     limits: SharepointBootstrapLimits,
     result: SharepointBootstrapResult,
-    progressCb: Optional[Callable[[int, Optional[str]], None]],
+    progressCb: Optional[Any],
+    dataSourceId: str = "",
 ) -> None:
     from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob
 
@@ -339,6 +362,7 @@ async def _ingestOne(
 
     provenance: Dict[str, Any] = {
         "connectionId": connectionId,
+        "dataSourceId": dataSourceId,
         "authority": "msft",
         "service": "sharepoint",
         "externalItemId": externalItemId,
diff --git a/modules/serviceCenter/services/serviceKnowledge/subPolicyResolver.py b/modules/serviceCenter/services/serviceKnowledge/subPolicyResolver.py
new file mode 100644
index 00000000..10be150d
--- /dev/null
+++ b/modules/serviceCenter/services/serviceKnowledge/subPolicyResolver.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""Resolve effective policies (neutralize, ragIndexEnabled) for DataSource tree hierarchies.
+
+Tree-inheritance rule: nearest ancestor DataSource with an explicit value wins.
+If no ancestor has a value, the default (False) is used.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+def resolveEffectiveNeutralize(
+    ds: Dict[str, Any],
+    allDataSources: List[Dict[str, Any]],
+) -> bool:
+    """Compute effective neutralize by walking up the path tree.
+
+    A DataSource at /sites/HR/Documents inherits from /sites/HR if
+    that ancestor has neutralize=True and the child has no explicit override.
+    """
+    ownValue = ds.get("neutralize")
+    if ownValue is not None and ownValue is not False:
+        return True
+    if ownValue is False:
+        return False
+    return _findAncestorPolicy(ds, allDataSources, "neutralize")
+
+
+def resolveEffectiveRagIndexEnabled(
+    ds: Dict[str, Any],
+    allDataSources: List[Dict[str, Any]],
+) -> bool:
+    """Compute effective ragIndexEnabled by walking up the path tree."""
+    ownValue = ds.get("ragIndexEnabled")
+    if ownValue is True:
+        return True
+    if ownValue is False:
+        return False
+    return _findAncestorPolicy(ds, allDataSources, "ragIndexEnabled")
+
+
+def _findAncestorPolicy(
+    ds: Dict[str, Any],
+    allDataSources: List[Dict[str, Any]],
+    field: str,
+) -> bool:
+    """Walk ancestors (longest-prefix match) to find an inherited policy value."""
+    dsPath = ds.get("path", "")
+    connectionId = ds.get("connectionId", "")
+    if not dsPath:
+        return False
+
+    ancestors = []
+    for candidate in allDataSources:
+        if candidate.get("id") == ds.get("id"):
+            continue
+        if candidate.get("connectionId") != connectionId:
+            continue
+        candidatePath = candidate.get("path", "")
+        if not candidatePath:
+            continue
+        if dsPath.startswith(candidatePath) and len(candidatePath) < len(dsPath):
+            ancestors.append(candidate)
+
+    ancestors.sort(key=lambda a: len(a.get("path", "")), reverse=True)
+
+    for ancestor in ancestors:
+        val = ancestor.get(field)
+        if val is True:
+            return True
+        if val is False:
+            return False
+    return False
diff --git a/modules/system/mainSystem.py b/modules/system/mainSystem.py
index b7e45006..21d0cbee 100644
--- a/modules/system/mainSystem.py
+++ b/modules/system/mainSystem.py
@@ -144,6 +144,14 @@ NAVIGATION_SECTIONS = [
                         "path": "/automations",
                         "order": 30,
                     },
+                    {
+                        "id": "rag-inventory",
+                        "objectKey": "ui.system.ragInventory",
+                        "label": t("RAG-Inventar"),
+                        "icon": "FaDatabase",
+                        "path": "/rag-inventory",
+                        "order": 35,
+                    },
                     {
                         "id": "store",
                         "objectKey": "ui.system.store",
diff --git a/scripts/script_db_migrate_datasource_rag.py b/scripts/script_db_migrate_datasource_rag.py
new file mode 100644
index 00000000..95c2ae35
--- /dev/null
+++ b/scripts/script_db_migrate_datasource_rag.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+"""Migration: Rename DataSource.autoSync -> ragIndexEnabled, lastSynced -> lastIndexed.
+
+This is a one-off migration for the RAG consent & control unification.
+Safe to run multiple times (checks column existence before acting).
+
+Usage:
+    python script_db_migrate_datasource_rag.py [--dry-run]
+"""
+
+import os
+import sys
+import argparse
+import logging
+from pathlib import Path
+
+scriptPath = Path(__file__).resolve()
+gatewayPath = scriptPath.parent.parent
+sys.path.insert(0, str(gatewayPath))
+os.chdir(str(gatewayPath))
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True)
+logger = logging.getLogger(__name__)
+
+import psycopg2
+from modules.shared.configuration import APP_CONFIG
+
+
+def _getConnection():
+    return psycopg2.connect(
+        host=APP_CONFIG.get("DB_HOST", "localhost"),
+        port=int(APP_CONFIG.get("DB_PORT", "5432")),
+        database=APP_CONFIG.get("DB_DATABASE", "poweron_app"),
+        user=APP_CONFIG.get("DB_USER"),
+        password=APP_CONFIG.get("DB_PASSWORD_SECRET"),
+    )
+
+
+def _columnExists(cur, table: str, column: str) -> bool:
+    cur.execute(
+        """SELECT 1 FROM information_schema.columns
+           WHERE table_schema = 'public' AND table_name = %s AND column_name = %s""",
+        (table, column),
+    )
+    return cur.fetchone() is not None
+
+
+def migrate(dryRun: bool = False):
+    conn = _getConnection()
+    conn.autocommit = False
+    cur = conn.cursor()
+
+    renames = [
+        ("DataSource", "autoSync", "ragIndexEnabled"),
+        ("DataSource", "lastSynced", "lastIndexed"),
+    ]
+
+    executed = []
+    for table, oldCol, newCol in renames:
+        if _columnExists(cur, table, oldCol) and not _columnExists(cur, table, newCol):
+            sql = f'ALTER TABLE public."{table}" RENAME COLUMN "{oldCol}" TO "{newCol}";'
+            logger.info("EXEC: %s", sql)
+            if not dryRun:
+                cur.execute(sql)
+            executed.append(sql)
+        elif _columnExists(cur, table, newCol):
+            logger.info("SKIP: %s.%s already exists (migration already applied)", table, newCol)
+        elif not _columnExists(cur, table, oldCol):
+            logger.warning("SKIP: %s.%s does not exist (table schema may differ)", table, oldCol)
+
+    if not dryRun and executed:
+        conn.commit()
+        logger.info("Migration committed (%d statements)", len(executed))
+    elif dryRun and executed:
+        conn.rollback()
+        logger.info("DRY RUN — would execute %d statements", len(executed))
+    else:
+        logger.info("Nothing to do — schema already up to date")
+
+    cur.close()
+    conn.close()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--dry-run", action="store_true", help="Print SQL without executing")
+    args = parser.parse_args()
+    migrate(dryRun=args.dry_run)
diff --git a/tests/unit/services/test_bootstrap_clickup.py b/tests/unit/services/test_bootstrap_clickup.py
index 87c08c3d..4ed0c4f1 100644
--- a/tests/unit/services/test_bootstrap_clickup.py
+++ b/tests/unit/services/test_bootstrap_clickup.py
@@ -100,6 +100,9 @@ def _adapter(svc):
     return SimpleNamespace(_svc=svc)
 
 
+_DEFAULT_DS = [{"id": "ds-1", "neutralize": False}]
+
+
 def test_bootstrap_walks_team_space_lists_and_tasks():
     svc = _FakeClickupService(taskCount=2)
     knowledge = _FakeKnowledgeService()
@@ -108,6 +111,7 @@ def test_bootstrap_walks_team_space_lists_and_tasks():
     async def _run():
         return await bootstrapClickup(
             connectionId="c1",
+            dataSources=_DEFAULT_DS,
             adapter=_adapter(svc),
             connection=connection,
             knowledgeService=knowledge,
@@ -126,10 +130,10 @@ def test_bootstrap_walks_team_space_lists_and_tasks():
         assert job.mimeType == "application/vnd.clickup.task+json"
         assert job.mandateId == "m1"
         assert job.provenance["connectionId"] == "c1"
+        assert job.provenance["dataSourceId"] == "ds-1"
         assert job.provenance["authority"] == "clickup"
         assert job.provenance["teamId"] == "team-1"
         assert job.contentVersion  # numeric millisecond string
-        # At least the header content-object is present.
         ids = [co["contentObjectId"] for co in job.contentObjects]
         assert "header" in ids
 
@@ -146,6 +150,7 @@ def test_bootstrap_reports_duplicates_on_second_run():
     async def _run():
         return await bootstrapClickup(
             connectionId="c1",
+            dataSources=_DEFAULT_DS,
             adapter=_adapter(svc),
             connection=connection,
             knowledgeService=knowledge,
@@ -165,6 +170,7 @@ def test_bootstrap_skips_tasks_older_than_maxAgeDays():
     async def _run():
         return await bootstrapClickup(
             connectionId="c1",
+            dataSources=_DEFAULT_DS,
             adapter=_adapter(svc),
             connection=connection,
             knowledgeService=knowledge,
@@ -185,6 +191,7 @@ def test_bootstrap_maxTasks_caps_ingestion():
     async def _run():
         return await bootstrapClickup(
             connectionId="c1",
+            dataSources=_DEFAULT_DS,
             adapter=_adapter(svc),
             connection=connection,
             knowledgeService=knowledge,
@@ -195,9 +202,41 @@ def test_bootstrap_maxTasks_caps_ingestion():
     assert result["indexed"] == 3
 
 
+def test_bootstrap_skips_when_no_datasources():
+    async def _run():
+        return await bootstrapClickup(connectionId="c1")
+
+    result = asyncio.run(_run())
+    assert result["skipped"] is True
+    assert result["reason"] == "no_datasources"
+
+
+def test_bootstrap_honours_datasource_neutralize():
+    svc = _FakeClickupService(taskCount=1)
+    knowledge = _FakeKnowledgeService()
+    connection = SimpleNamespace(mandateId="m1", userId="u1")
+
+    async def _run():
+        return await bootstrapClickup(
+            connectionId="c1",
+            dataSources=[{"id": "ds-n", "neutralize": True}],
+            adapter=_adapter(svc),
+            connection=connection,
+            knowledgeService=knowledge,
+            limits=ClickupBootstrapLimits(maxAgeDays=None),
+        )
+
+    asyncio.run(_run())
+    for job in knowledge.calls:
+        assert job.neutralize is True
+        assert job.provenance["dataSourceId"] == "ds-n"
+
+
 if __name__ == "__main__":
     test_bootstrap_walks_team_space_lists_and_tasks()
     test_bootstrap_reports_duplicates_on_second_run()
     test_bootstrap_skips_tasks_older_than_maxAgeDays()
     test_bootstrap_maxTasks_caps_ingestion()
+    test_bootstrap_skips_when_no_datasources()
+    test_bootstrap_honours_datasource_neutralize()
     print("OK — bootstrapClickup tests passed")
diff --git a/tests/unit/services/test_bootstrap_gdrive.py b/tests/unit/services/test_bootstrap_gdrive.py
index 1b88677e..2741332f 100644
--- a/tests/unit/services/test_bootstrap_gdrive.py
+++ b/tests/unit/services/test_bootstrap_gdrive.py
@@ -119,6 +119,9 @@ def _fakeRunExtraction(data, name, mime, options):
     )
 
 
+_DEFAULT_DS = [{"id": "ds1", "path": "/", "neutralize": False}]
+
+
 def test_bootstrap_walks_drive_and_subfolders():
     adapter = _FakeDriveAdapter()
     knowledge = _FakeKnowledgeService()
@@ -127,6 +130,7 @@ def test_bootstrap_walks_drive_and_subfolders():
     async def _run():
         return await bootstrapGdrive(
             connectionId="c1",
+            dataSources=_DEFAULT_DS,
             adapter=adapter,
             connection=connection,
             knowledgeService=knowledge,
@@ -160,6 +164,7 @@ def test_bootstrap_reports_duplicates_on_second_run():
     async def _run():
         return await bootstrapGdrive(
             connectionId="c1",
+            dataSources=_DEFAULT_DS,
             adapter=adapter,
             connection=connection,
             knowledgeService=knowledge,
@@ -180,11 +185,11 @@ def test_bootstrap_skips_files_older_than_maxAgeDays():
     async def _run():
         return await bootstrapGdrive(
             connectionId="c1",
+            dataSources=[{"id": "ds1", "path": "/", "neutralize": False, "maxAgeDays": 180}],
             adapter=adapter,
             connection=connection,
             knowledgeService=knowledge,
             runExtractionFn=_fakeRunExtraction,
-            limits=GdriveBootstrapLimits(maxAgeDays=180),
         )
 
     result = asyncio.run(_run())
@@ -200,6 +205,7 @@ def test_bootstrap_passes_connection_provenance():
     async def _run():
         return await bootstrapGdrive(
             connectionId="c1",
+            dataSources=_DEFAULT_DS,
             adapter=adapter,
             connection=connection,
             knowledgeService=knowledge,
@@ -212,14 +218,25 @@ def test_bootstrap_passes_connection_provenance():
         assert job.sourceKind == "gdrive_item"
         assert job.mandateId == "m1"
         assert job.provenance["connectionId"] == "c1"
+        assert job.provenance["dataSourceId"] == "ds1"
         assert job.provenance["authority"] == "google"
         assert job.provenance["service"] == "drive"
         assert job.contentVersion  # modifiedTime ISO string
 
 
+def test_bootstrap_skips_when_no_datasources():
+    async def _run():
+        return await bootstrapGdrive(connectionId="c1")
+
+    result = asyncio.run(_run())
+    assert result["skipped"] is True
+    assert result["reason"] == "no_datasources"
+
+
 if __name__ == "__main__":
     test_bootstrap_walks_drive_and_subfolders()
     test_bootstrap_reports_duplicates_on_second_run()
     test_bootstrap_skips_files_older_than_maxAgeDays()
     test_bootstrap_passes_connection_provenance()
+    test_bootstrap_skips_when_no_datasources()
     print("OK — bootstrapGdrive tests passed")
diff --git a/tests/unit/services/test_bootstrap_outlook.py b/tests/unit/services/test_bootstrap_outlook.py
index 26664eaa..c5fea524 100644
--- a/tests/unit/services/test_bootstrap_outlook.py
+++ b/tests/unit/services/test_bootstrap_outlook.py
@@ -111,6 +111,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
     async def _run():
         return await bootstrapOutlook(
             connectionId="c1",
+            dataSources=[{"id": "ds1", "neutralize": False}],
             adapter=adapter,
             connection=connection,
             knowledgeService=knowledge,
@@ -129,6 +130,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent():
         assert job.sourceKind == "outlook_message"
         assert job.mimeType == "message/rfc822"
         assert job.provenance["connectionId"] == "c1"
+        assert job.provenance["dataSourceId"] == "ds1"
         assert job.provenance["service"] == "outlook"
         assert job.contentVersion == "ck1"
         assert any(co["contentObjectId"] == "header" for co in job.contentObjects)
@@ -146,6 +148,7 @@ def test_bootstrap_outlook_follows_pagination():
     async def _run():
         return await bootstrapOutlook(
             connectionId="c1",
+            dataSources=[{"id": "ds1", "neutralize": False}],
             adapter=adapter,
             connection=connection,
             knowledgeService=knowledge,
@@ -171,6 +174,7 @@ def test_bootstrap_outlook_reports_duplicates():
     async def _run():
         return await bootstrapOutlook(
             connectionId="c1",
+            dataSources=[{"id": "ds1", "neutralize": False}],
             adapter=adapter,
             connection=connection,
             knowledgeService=knowledge,

From c130f49cf902ec700f4945314f1af29a15ac7938 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Tue, 12 May 2026 17:49:48 +0200
Subject: [PATCH 4/8] fixed teams

---
 modules/routes/routeRagInventory.py              | 16 +++++++++++++---
 .../serviceExtraction/mainServiceExtraction.py   |  1 +
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/modules/routes/routeRagInventory.py b/modules/routes/routeRagInventory.py
index 08d2a245..37fb330b 100644
--- a/modules/routes/routeRagInventory.py
+++ b/modules/routes/routeRagInventory.py
@@ -36,6 +36,16 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
         connIndexRows = knowledgeIf.db.getRecordset(FileContentIndex, recordFilter={"connectionId": connectionId})
         connChunkTotal = len(connIndexRows)
 
+        chunksByDs: Dict[str, int] = {}
+        unassigned = 0
+        for idx in connIndexRows:
+            prov = (idx.get("provenance") if isinstance(idx, dict) else getattr(idx, "provenance", None)) or {}
+            dsIdRef = prov.get("dataSourceId", "") if isinstance(prov, dict) else ""
+            if dsIdRef:
+                chunksByDs[dsIdRef] = chunksByDs.get(dsIdRef, 0) + 1
+            else:
+                unassigned += 1
+
         dsItems = []
         for ds in dataSources:
             dsId = ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")
@@ -47,11 +57,11 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L
                 "ragIndexEnabled": ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False),
                 "neutralize": ds.get("neutralize") if isinstance(ds, dict) else getattr(ds, "neutralize", False),
                 "lastIndexed": ds.get("lastIndexed") if isinstance(ds, dict) else getattr(ds, "lastIndexed", None),
-                "chunkCount": 0,
+                "chunkCount": chunksByDs.get(dsId, 0),
             })
 
-        if dsItems and connChunkTotal > 0 and len(dsItems) == 1:
-            dsItems[0]["chunkCount"] = connChunkTotal
+        if unassigned > 0 and len(dsItems) == 1:
+            dsItems[0]["chunkCount"] += unassigned
 
         jobs = jobService.listJobs(jobType="connection.bootstrap", limit=5)
         connJobs = [j for j in jobs if (j.get("payload") or {}).get("connectionId") == connectionId]
diff --git a/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py b/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py
index 13f4a1d3..1ffb8557 100644
--- a/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py
@@ -33,6 +33,7 @@ class ExtractionService:
         self._interfaceDbComponent = getComponentInterface(
             context.user,
             mandateId=context.mandate_id,
+            featureInstanceId=context.feature_instance_id,
         )
         self._extractorRegistry = getExtractorRegistry()
         if ExtractionService._sharedChunkerRegistry is None:

From 6380f14ebe0b5736db4b9be0503d7f277001ddda Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Tue, 12 May 2026 19:16:34 +0200
Subject: [PATCH 5/8] teamsbot anonymous bot working

---
 .../features/teamsbot/browserBotConnector.py  |  8 +++-
 .../features/teamsbot/datamodelTeamsbot.py    |  9 ++++
 .../teamsbot/interfaceFeatureTeamsbot.py      |  3 ++
 .../features/teamsbot/routeFeatureTeamsbot.py | 10 ++--
 modules/features/teamsbot/service.py          | 46 ++++++++++++++++++-
 5 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/modules/features/teamsbot/browserBotConnector.py b/modules/features/teamsbot/browserBotConnector.py
index 2e76d039..d99fe829 100644
--- a/modules/features/teamsbot/browserBotConnector.py
+++ b/modules/features/teamsbot/browserBotConnector.py
@@ -40,6 +40,8 @@ class BrowserBotConnector:
         botAccountPassword: Optional[str] = None,
         transferMode: str = "auto",
         debugMode: bool = False,
+        avatarMediaData: Optional[str] = None,
+        avatarMediaType: Optional[str] = None,
     ) -> Dict[str, Any]:
         """
         Send join command to the Browser Bot service.
@@ -79,12 +81,16 @@ class BrowserBotConnector:
             "debugMode": debugMode,
         }
 
-        # Add authenticated join credentials if configured
         if botAccountEmail and botAccountPassword:
             payload["botAccountEmail"] = botAccountEmail
             payload["botAccountPassword"] = botAccountPassword
             logger.info(f"Bot will join authenticated as {botAccountEmail}")
 
+        if avatarMediaData and avatarMediaType:
+            payload["avatarMediaData"] = avatarMediaData
+            payload["avatarMediaType"] = avatarMediaType
+            logger.info(f"Avatar media attached: {avatarMediaType}, {len(avatarMediaData)} chars")
+
         try:
             async with aiohttp.ClientSession(timeout=_BOT_TIMEOUT) as session:
                 async with session.post(f"{self.botUrl}/api/bot", json=payload) as resp:
diff --git a/modules/features/teamsbot/datamodelTeamsbot.py b/modules/features/teamsbot/datamodelTeamsbot.py
index 076b0eda..18904525 100644
--- a/modules/features/teamsbot/datamodelTeamsbot.py
+++ b/modules/features/teamsbot/datamodelTeamsbot.py
@@ -119,6 +119,10 @@ class TeamsbotMeetingModule(PowerOnModel):
         default=None,
         description="Default display name for the bot when starting a session from this module",
     )
+    defaultAvatarFileId: Optional[str] = Field(
+        default=None,
+        description="FileItem ID for the default avatar image/video shown in the meeting",
+    )
     status: TeamsbotModuleStatus = Field(default=TeamsbotModuleStatus.ACTIVE)
 
 
@@ -225,6 +229,7 @@ class TeamsbotUserSettings(PowerOnModel):
     triggerCooldownSeconds: Optional[int] = Field(default=None, description="Trigger cooldown override")
     contextWindowSegments: Optional[int] = Field(default=None, description="Context window override")
     debugMode: Optional[bool] = Field(default=None, description="Debug mode override")
+    avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video override")
 
 
 # ============================================================================
@@ -248,6 +253,7 @@ class TeamsbotConfig(BaseModel):
     triggerCooldownSeconds: int = Field(default=3, ge=1, le=30, description="Minimum seconds between AI calls")
     contextWindowSegments: int = Field(default=20, ge=5, le=100, description="Number of transcript segments to include in AI context")
     debugMode: bool = Field(default=False, description="Enable debug mode: screenshots at every join step for diagnostics")
+    avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video shown in the meeting")
 
     def _getEffectiveBrowserBotUrl(self) -> Optional[str]:
         """Resolve the effective browser bot URL: per-instance config takes priority, then env variable."""
@@ -288,6 +294,7 @@ class CreateMeetingModuleRequest(BaseModel):
     kpiTargets: Optional[str] = None
     defaultMeetingLink: Optional[str] = None
     defaultBotName: Optional[str] = None
+    defaultAvatarFileId: Optional[str] = None
 
 
 class UpdateMeetingModuleRequest(BaseModel):
@@ -300,6 +307,7 @@ class UpdateMeetingModuleRequest(BaseModel):
     kpiTargets: Optional[str] = None
     defaultMeetingLink: Optional[str] = None
     defaultBotName: Optional[str] = None
+    defaultAvatarFileId: Optional[str] = None
     status: Optional[TeamsbotModuleStatus] = None
 
 
@@ -317,6 +325,7 @@ class TeamsbotConfigUpdateRequest(BaseModel):
     triggerCooldownSeconds: Optional[int] = None
     contextWindowSegments: Optional[int] = None
     debugMode: Optional[bool] = None
+    avatarFileId: Optional[str] = None
 
 
 # ============================================================================
diff --git a/modules/features/teamsbot/interfaceFeatureTeamsbot.py b/modules/features/teamsbot/interfaceFeatureTeamsbot.py
index 8491b3b9..2bfe77ff 100644
--- a/modules/features/teamsbot/interfaceFeatureTeamsbot.py
+++ b/modules/features/teamsbot/interfaceFeatureTeamsbot.py
@@ -25,6 +25,7 @@ from .datamodelTeamsbot import (
     TeamsbotDirectorPromptStatus,
     TeamsbotDirectorPromptMode,
     TeamsbotMeetingModule,
+    TeamsbotModuleStatus,
 )
 
 logger = logging.getLogger(__name__)
@@ -338,6 +339,8 @@ class TeamsbotObjects:
     def getModules(self, instanceId: str) -> List[Dict[str, Any]]:
         """Get all meeting modules for a feature instance."""
         records = self.db.getRecordset(TeamsbotMeetingModule, recordFilter={"instanceId": instanceId})
+        for r in records:
+            r.setdefault("status", TeamsbotModuleStatus.ACTIVE.value)
         records.sort(key=lambda r: r.get("sysCreatedAt") or "", reverse=True)
         return records
 
diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py
index b3088f8e..f07c98c5 100644
--- a/modules/features/teamsbot/routeFeatureTeamsbot.py
+++ b/modules/features/teamsbot/routeFeatureTeamsbot.py
@@ -40,6 +40,7 @@ from .datamodelTeamsbot import (
     TeamsbotDirectorPromptMode,
     TeamsbotDirectorPromptStatus,
     TeamsbotMeetingModule,
+    TeamsbotModuleStatus,
     CreateMeetingModuleRequest,
     UpdateMeetingModuleRequest,
     DIRECTOR_PROMPT_FILE_LIMIT,
@@ -203,6 +204,7 @@ async def createModule(
     data["instanceId"] = instanceId
     data["mandateId"] = mandateId
     data["ownerUserId"] = str(context.user.id)
+    data.setdefault("status", TeamsbotModuleStatus.ACTIVE.value)
     module = interface.createModule(data)
     return {"module": module}
 
@@ -688,12 +690,10 @@ def _getEffectiveConfig(instanceId: str, userId: str, interface) -> TeamsbotConf
     if not userSettings:
         return baseConfig
     
-    # Merge: user settings override instance defaults (only non-None values)
+    # Merge: user settings override instance defaults (only non-None values).
+    # Derive mergeable fields from TeamsbotConfig so new fields are picked up automatically.
     overrides = {}
-    for field in ["botName", "aiSystemPrompt", "responseMode",
-                  "responseChannel", "transferMode", "language", "voiceId",
-                  "triggerIntervalSeconds", "triggerCooldownSeconds", "contextWindowSegments",
-                  "debugMode"]:
+    for field in TeamsbotConfig.model_fields:
         value = userSettings.get(field)
         if value is not None:
             overrides[field] = value
diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py
index 8017e6dc..2136d8e0 100644
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@@ -732,6 +732,12 @@ class TeamsbotService:
             hasAuth = bool(botAccountEmail and botAccountPassword)
             logger.info(f"Joining meeting for session {sessionId}: auth={hasAuth}, email={botAccountEmail or 'N/A'}, transferMode={self.config.transferMode}")
 
+            avatarMediaData = None
+            avatarMediaType = None
+            avatarFileId = self._resolveAvatarFileId(session, interface)
+            if avatarFileId:
+                avatarMediaData, avatarMediaType = self._loadAvatarFileData(avatarFileId, interface)
+
             result = await self.browserBotConnector.joinMeeting(
                 sessionId=sessionId,
                 meetingUrl=meetingLink,
@@ -743,6 +749,8 @@ class TeamsbotService:
                 botAccountPassword=botAccountPassword,
                 transferMode=self.config.transferMode if hasattr(self.config, 'transferMode') else "auto",
                 debugMode=self.config.debugMode if hasattr(self.config, 'debugMode') else False,
+                avatarMediaData=avatarMediaData,
+                avatarMediaType=avatarMediaType,
             )
 
             if result.get("success"):
@@ -767,6 +775,37 @@ class TeamsbotService:
             })
             await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)})
 
+    def _resolveAvatarFileId(self, session, interface):
+        """Resolve avatarFileId: module override > config default."""
+        moduleId = session.get("moduleId")
+        if moduleId:
+            module = interface.getModule(moduleId)
+            if module and module.get("defaultAvatarFileId"):
+                return module["defaultAvatarFileId"]
+        return getattr(self.config, "avatarFileId", None)
+
+    def _loadAvatarFileData(self, fileId, _teamsbotInterface):
+        """Load avatar file as base64 data + mime type. Returns (data, mimeType) or (None, None)."""
+        import base64
+        from modules.interfaces import interfaceDbManagement
+        try:
+            mgmt = interfaceDbManagement.getInterface(self.currentUser, self.mandateId)
+            fileRecord = mgmt.getFile(fileId)
+            if not fileRecord:
+                logger.warning(f"Avatar file {fileId} not found")
+                return None, None
+            mimeType = getattr(fileRecord, "mimeType", None) or "image/png"
+            rawBytes = mgmt.getFileData(fileId)
+            if not rawBytes:
+                logger.warning(f"Avatar file {fileId} has no data")
+                return None, None
+            b64 = base64.b64encode(rawBytes).decode("ascii")
+            logger.info(f"Avatar file loaded: {fileId}, {mimeType}, {len(b64)} chars base64")
+            return b64, mimeType
+        except Exception as e:
+            logger.error(f"Failed to load avatar file {fileId}: {e}")
+            return None, None
+
     async def leaveMeeting(self, sessionId: str):
         """Send leave command to the Browser Bot service."""
         from . import interfaceFeatureTeamsbot as interfaceDb
@@ -1217,6 +1256,12 @@ class TeamsbotService:
             if self.config.botName:
                 phraseHints.append(self.config.botName)
 
+            # Sprache kommt ausschliesslich aus der Session/Instance-Konfig
+            # (TeamsbotUserSettings.language ueberschreibt
+            # TeamsbotConfig.language, Fallback de-DE im Schema).
+            # KEIN hardcodierter Alternative-Sprachen-Pool — der hat dafuer
+            # gesorgt, dass Google STT bei verrauschter Audio auf en-US
+            # gesprungen ist und englisches Kauderwelsch geliefert hat.
             sttResult = await voiceInterface.speechToText(
                 audioContent=audioBytes,
                 language=self.config.language or "de-DE",
@@ -1224,7 +1269,6 @@ class TeamsbotService:
                 channels=1,
                 skipFallbacks=True,
                 phraseHints=phraseHints if phraseHints else None,
-                alternativeLanguages=["en-US"],
                 audioFormat="linear16",
             )
 

From 37187459312b85b781a23cd55fa4790bd6921bb3 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Tue, 12 May 2026 21:31:28 +0200
Subject: [PATCH 6/8] teamsbot auth fixes

---
 modules/interfaces/interfaceDbManagement.py | 38 ++++++++++++---------
 modules/routes/routeDataFiles.py            |  2 +-
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/modules/interfaces/interfaceDbManagement.py b/modules/interfaces/interfaceDbManagement.py
index f74de871..6a3c27b5 100644
--- a/modules/interfaces/interfaceDbManagement.py
+++ b/modules/interfaces/interfaceDbManagement.py
@@ -1274,17 +1274,20 @@ class ComponentObjects:
             if getattr(permissions, "update", None) != AccessLevel.ALL:
                 raise PermissionError("Setting global scope requires ALL permission")
 
-        self.db.recordModify(FileFolder, folderId, {"scope": scope})
+        allFolderIds = self._collectChildFolderIds(folderId)
+        for fid in allFolderIds:
+            self.db.recordModify(FileFolder, fid, {"scope": scope})
 
         filesUpdated = 0
         if cascadeToFiles:
-            items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId})
-            for item in items:
-                owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
-                if owner == self.userId:
-                    iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
-                    self.db.recordModify(FileItem, iid, {"scope": scope})
-                    filesUpdated += 1
+            for fid in allFolderIds:
+                items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid})
+                for item in items:
+                    owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
+                    if owner == self.userId:
+                        iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
+                        self.db.recordModify(FileItem, iid, {"scope": scope})
+                        filesUpdated += 1
 
         return {"folderId": folderId, "scope": scope, "filesUpdated": filesUpdated}
 
@@ -1294,16 +1297,19 @@ class ComponentObjects:
             raise FileNotFoundError(f"Folder {folderId} not found")
         self._requireFolderWriteAccess(folder, folderId, "update")
 
-        self.db.recordModify(FileFolder, folderId, {"neutralize": neutralize})
+        allFolderIds = self._collectChildFolderIds(folderId)
+        for fid in allFolderIds:
+            self.db.recordModify(FileFolder, fid, {"neutralize": neutralize})
 
-        items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId})
         filesUpdated = 0
-        for item in items:
-            owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
-            if owner == self.userId:
-                iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
-                self.db.recordModify(FileItem, iid, {"neutralize": neutralize})
-                filesUpdated += 1
+        for fid in allFolderIds:
+            items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid})
+            for item in items:
+                owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None)
+                if owner == self.userId:
+                    iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
+                    self.db.recordModify(FileItem, iid, {"neutralize": neutralize})
+                    filesUpdated += 1
 
         return {"folderId": folderId, "neutralize": neutralize, "filesUpdated": filesUpdated}
 
diff --git a/modules/routes/routeDataFiles.py b/modules/routes/routeDataFiles.py
index 3a951f3e..b22dacae 100644
--- a/modules/routes/routeDataFiles.py
+++ b/modules/routes/routeDataFiles.py
@@ -413,7 +413,7 @@ def patch_folder_scope(
         scope = body.get("scope")
         if not scope:
             raise HTTPException(status_code=400, detail="scope is required")
-        cascadeToFiles = body.get("cascadeToFiles", False)
+        cascadeToFiles = body.get("cascadeChildren", body.get("cascadeToFiles", False))
         managementInterface = interfaceDbManagement.getInterface(
             currentUser,
             mandateId=str(context.mandateId) if context.mandateId else None,

From 16ab816c65a3a85b82b5e68e25ecd847cf438b3f Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Tue, 12 May 2026 22:39:42 +0200
Subject: [PATCH 7/8] teamsbot ux fixes

---
 modules/features/teamsbot/service.py          |  62 ++++++-
 .../serviceAgent/coreTools/_workspaceTools.py | 156 +++++++++++++++++-
 .../extractors/extractorContainer.py          |  10 +-
 .../extractors/extractorEmail.py              |  46 ++++--
 requirements.txt                              |   3 +
 5 files changed, 249 insertions(+), 28 deletions(-)

diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py
index 2136d8e0..93cc27a2 100644
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@@ -3290,16 +3290,50 @@ class TeamsbotService:
         self, lastToolLabel: Optional[str] = None
     ) -> Optional[str]:
         """Per-round progress notice for long agent runs (meeting voice /
-        chat, ephemeral). Phrasing is AI-localised once per session;
-        ``{activity}`` placeholder is substituted with the tool's
-        ``displayLabel`` from the ToolDefinition. Returns ``None`` if
-        generation failed."""
-        activity = lastToolLabel or "processing your request"
-        return await self._pickEphemeralPhrase(
-            "agentRound",
-            substitutions={"activity": activity},
+        chat, ephemeral). Generates a single short phrase in the bot's
+        configured language that describes the current activity. Unlike
+        the cached ephemeral phrases, this is a per-call AI generation
+        to avoid mixing English displayLabels into non-English speech."""
+        targetLang = (self.config.language or "").strip() or "en-US"
+        botName = (self.config.botName or "the assistant").strip()
+        activityHint = lastToolLabel or "working on the task"
+
+        prompt = (
+            f"You are a meeting assistant named '{botName}'.\n"
+            f"Target spoken language (BCP-47): {targetLang}\n\n"
+            f"The assistant is currently busy with: {activityHint}\n\n"
+            f"Generate ONE short sentence (max 12 words) in {targetLang} "
+            f"that tells the audience what the assistant is doing right now. "
+            f"Natural, spoken style. No step numbers. No quotes around the output.\n"
+            f"Output ONLY the sentence, nothing else."
         )
 
+        try:
+            aiService = createAiService(
+                self.currentUser, self.mandateId, self.instanceId
+            )
+            await aiService.ensureAiObjectsInitialized()
+            request = AiCallRequest(
+                prompt=prompt,
+                context="",
+                options=AiCallOptions(
+                    operationType=OperationTypeEnum.DATA_ANALYSE,
+                    priority=PriorityEnum.SPEED,
+                ),
+            )
+            response = await aiService.callAi(request)
+        except Exception as aiErr:
+            logger.debug(f"Agent round phrase generation failed: {aiErr}")
+            return None
+
+        if not response or response.errorCount != 0 or not response.content:
+            return None
+
+        result = response.content.strip().strip('"').strip("'")
+        if len(result) > 200:
+            result = result[:200]
+        return result
+
     async def _notifyMeetingEphemeral(self, sessionId: str, text: str) -> None:
         """Deliver a short line to the meeting (TTS + chat per config) without
         persisting botResponses/transcripts, so the main agent answer stays the
@@ -3455,6 +3489,18 @@ class TeamsbotService:
                         "promptId": promptId,
                         "status": "toolCall",
                         "toolName": toolName,
+                        "displayLabel": lastToolLabel,
+                    })
+                elif event.type == AgentEventTypeEnum.TOOL_RESULT:
+                    evtData = event.data or {}
+                    resultSnippet = (evtData.get("data") or "")[:200]
+                    await _emitSessionEvent(sessionId, "agentRun", {
+                        "source": sourceLabel,
+                        "promptId": promptId,
+                        "status": "toolResult",
+                        "toolName": evtData.get("toolName", ""),
+                        "success": evtData.get("success", True),
+                        "summary": resultSnippet,
                     })
                 elif event.type == AgentEventTypeEnum.FILE_CREATED:
                     await _emitSessionEvent(sessionId, "documentCreated", event.data or {})
diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py
index c6584735..ed30538a 100644
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py
@@ -310,11 +310,15 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
                 return ToolResult(toolCallId="", toolName="writeFile", success=False, error="name is required for mode=create")
             fileItem, _ = dbMgmt.saveUploadedFile(content.encode("utf-8"), name)
             fiId = context.get("featureInstanceId") or (services.featureInstanceId if services else "")
+            updateFields: Dict[str, Any] = {}
             if fiId:
-                dbMgmt.updateFile(fileItem.id, {"featureInstanceId": fiId})
-            # File group tree removed — groupId arg and instance-group assignment no longer apply
+                updateFields["featureInstanceId"] = fiId
+            if args.get("folderId"):
+                updateFields["folderId"] = args["folderId"]
             if args.get("tags"):
-                dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
+                updateFields["tags"] = args["tags"]
+            if updateFields:
+                dbMgmt.updateFile(fileItem.id, updateFields)
 
             chatDocId = _attachFileAsChatDocument(
                 services, fileItem,
@@ -429,7 +433,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
         "writeFile", _writeFile,
         description=(
             "Create, append, or overwrite a file. Modes:\n"
-            "- create (default): create a new file (name required).\n"
+            "- create (default): create a new file (name required). Use folderId to place it in a specific folder.\n"
             "- append: append content to an existing file (fileId required). "
             "Use for large content that exceeds a single tool call (~8000 chars per call).\n"
             "- overwrite: replace entire file content (fileId required).\n"
@@ -445,7 +449,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
                 "content": {"type": "string", "description": "Content to write/append"},
                 "mode": {"type": "string", "enum": ["create", "append", "overwrite"], "description": "Write mode (default: create)"},
                 "fileId": {"type": "string", "description": "File ID (required for mode=append/overwrite)"},
-                "groupId": {"type": "string", "description": "Group ID to place the file in (mode=create only). Omit to use the instance default group."},
+                "folderId": {"type": "string", "description": "Folder ID to place the file in (mode=create only). Use listFolders to find IDs. Omit for root."},
                 "tags": {"type": "array", "items": {"type": "string"}, "description": "Tags (mode=create only)"},
             },
             "required": ["content"]
@@ -704,7 +708,147 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
         readOnly=False
     )
 
-    # Group tree tools removed — file grouping now uses view-based display grouping (TableListView)
+    # ---- Folder management tools ----
+
+    async def _createFolder(args: Dict[str, Any], context: Dict[str, Any]):
+        name = args.get("name", "")
+        parentId = args.get("parentId") or None
+        if not name:
+            return ToolResult(toolCallId="", toolName="createFolder", success=False, error="name is required")
+        try:
+            chatService = services.chat
+            dbMgmt = chatService.interfaceDbComponent
+            folder = dbMgmt.createFolder(name, parentId=parentId)
+            folderId = folder.get("id") if isinstance(folder, dict) else getattr(folder, "id", None)
+            folderName = folder.get("name") if isinstance(folder, dict) else getattr(folder, "name", name)
+            return ToolResult(
+                toolCallId="", toolName="createFolder", success=True,
+                data=f"Folder '{folderName}' created (id: {folderId})" + (f" inside parent {parentId}" if parentId else ""),
+                sideEvents=[{"type": "folderCreated", "data": {"folderId": folderId, "folderName": folderName, "parentId": parentId}}],
+            )
+        except Exception as e:
+            return ToolResult(toolCallId="", toolName="createFolder", success=False, error=str(e))
+
+    async def _listFolders(args: Dict[str, Any], context: Dict[str, Any]):
+        try:
+            chatService = services.chat
+            dbMgmt = chatService.interfaceDbComponent
+            folders = dbMgmt.getOwnFolderTree()
+            if not folders:
+                return ToolResult(toolCallId="", toolName="listFolders", success=True, data="No folders found.")
+            lines = []
+            folderMap: Dict[Optional[str], List] = {}
+            for f in folders:
+                pid = f.get("parentId") if isinstance(f, dict) else getattr(f, "parentId", None)
+                folderMap.setdefault(pid, []).append(f)
+
+            def _walk(parentId: Optional[str], indent: int):
+                for f in sorted(folderMap.get(parentId, []), key=lambda x: (x.get("name") if isinstance(x, dict) else getattr(x, "name", "")).lower()):
+                    fId = f.get("id") if isinstance(f, dict) else getattr(f, "id", "")
+                    fName = f.get("name") if isinstance(f, dict) else getattr(f, "name", "")
+                    prefix = "  " * indent
+                    lines.append(f"{prefix}- {fName} (id: {fId})")
+                    _walk(fId, indent + 1)
+
+            _walk(None, 0)
+            return ToolResult(toolCallId="", toolName="listFolders", success=True, data="\n".join(lines))
+        except Exception as e:
+            return ToolResult(toolCallId="", toolName="listFolders", success=False, error=str(e))
+
+    async def _moveFile(args: Dict[str, Any], context: Dict[str, Any]):
+        fileId = args.get("fileId", "")
+        folderId = args.get("folderId")
+        if not fileId:
+            return ToolResult(toolCallId="", toolName="moveFile", success=False, error="fileId is required")
+        try:
+            chatService = services.chat
+            dbMgmt = chatService.interfaceDbComponent
+            file = dbMgmt.getFile(fileId)
+            if not file:
+                return ToolResult(toolCallId="", toolName="moveFile", success=False, error=f"File {fileId} not found")
+            dbMgmt.updateFile(fileId, {"folderId": folderId or None})
+            targetLabel = f"folder {folderId}" if folderId else "root"
+            return ToolResult(
+                toolCallId="", toolName="moveFile", success=True,
+                data=f"File '{file.fileName}' (id: {fileId}) moved to {targetLabel}",
+                sideEvents=[{"type": "fileUpdated", "data": {"fileId": fileId, "fileName": file.fileName}}],
+            )
+        except Exception as e:
+            return ToolResult(toolCallId="", toolName="moveFile", success=False, error=str(e))
+
+    registry.register(
+        "createFolder", _createFolder,
+        description=(
+            "Create a new folder in the workspace file tree. "
+            "Use parentId to create nested folders. Returns the new folder ID."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "name": {"type": "string", "description": "Folder name"},
+                "parentId": {"type": "string", "description": "Parent folder ID for nesting. Omit to create at root level."},
+            },
+            "required": ["name"]
+        },
+        readOnly=False
+    )
+
+    registry.register(
+        "listFolders", _listFolders,
+        description=(
+            "List all folders in the workspace as an indented tree. "
+            "Use to find folder IDs for createFolder (parentId), writeFile (folderId), or moveFile."
+        ),
+        parameters={"type": "object", "properties": {}},
+        readOnly=True
+    )
+
+    async def _renameFolder(args: Dict[str, Any], context: Dict[str, Any]):
+        folderId = args.get("folderId", "")
+        newName = args.get("newName", "")
+        if not folderId or not newName:
+            return ToolResult(toolCallId="", toolName="renameFolder", success=False, error="folderId and newName are required")
+        try:
+            chatService = services.chat
+            dbMgmt = chatService.interfaceDbComponent
+            folder = dbMgmt.renameFolder(folderId, newName)
+            return ToolResult(
+                toolCallId="", toolName="renameFolder", success=True,
+                data=f"Folder {folderId} renamed to '{newName}'",
+                sideEvents=[{"type": "folderUpdated", "data": {"folderId": folderId, "folderName": newName}}],
+            )
+        except Exception as e:
+            return ToolResult(toolCallId="", toolName="renameFolder", success=False, error=str(e))
+
+    registry.register(
+        "renameFolder", _renameFolder,
+        description="Rename an existing folder in the workspace file tree.",
+        parameters={
+            "type": "object",
+            "properties": {
+                "folderId": {"type": "string", "description": "The folder ID to rename"},
+                "newName": {"type": "string", "description": "New folder name"},
+            },
+            "required": ["folderId", "newName"]
+        },
+        readOnly=False
+    )
+
+    registry.register(
+        "moveFile", _moveFile,
+        description=(
+            "Move a file into a specific folder. Set folderId to null or omit to move the file back to the root level."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "fileId": {"type": "string", "description": "The file ID to move"},
+                "folderId": {"type": "string", "description": "Target folder ID. Omit or null to move to root."},
+            },
+            "required": ["fileId"]
+        },
+        readOnly=False
+    )
 
     registry.register(
         "replaceInFile", _replaceInFile,
diff --git a/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py b/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py
index 941168d5..a7b06266 100644
--- a/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py
+++ b/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py
@@ -77,6 +77,7 @@ class ContainerExtractor(Extractor):
         """Extract by recursively unpacking the container."""
         fileName = context.get("fileName", "archive")
         mimeType = context.get("mimeType", "application/octet-stream")
+        cascadeDepth = context.get("_cascadeDepth", 0)
 
         rootId = makeId()
         parts: List[ContentPart] = [
@@ -97,7 +98,7 @@ class ContainerExtractor(Extractor):
                 parts.extend(lazy)
                 return parts
 
-        state = {"totalSize": 0, "fileCount": 0}
+        state = {"totalSize": 0, "fileCount": 0, "cascadeDepth": cascadeDepth}
         try:
             childParts = _resolveContainerRecursive(
                 fileBytes, mimeType, fileName, rootId, "", 0, state
@@ -209,7 +210,12 @@ def _addFilePart(
 
     if extractor and not isinstance(extractor, ContainerExtractor):
         try:
-            childParts = extractor.extract(data, {"fileName": fileName, "mimeType": detectedMime})
+            cascadeDepth = state.get("cascadeDepth", 0)
+            childParts = extractor.extract(data, {
+                "fileName": fileName,
+                "mimeType": detectedMime,
+                "_cascadeDepth": cascadeDepth + 1,
+            })
             for part in childParts:
                 part.parentId = parentId
                 if not part.metadata:
diff --git a/modules/serviceCenter/services/serviceExtraction/extractors/extractorEmail.py b/modules/serviceCenter/services/serviceExtraction/extractors/extractorEmail.py
index 2c4295ab..7f750835 100644
--- a/modules/serviceCenter/services/serviceExtraction/extractors/extractorEmail.py
+++ b/modules/serviceCenter/services/serviceExtraction/extractors/extractorEmail.py
@@ -53,12 +53,13 @@ class EmailExtractor(Extractor):
     def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
         fileName = context.get("fileName", "email")
         lower = (fileName or "").lower()
+        depth = context.get("_cascadeDepth", 0)
 
         if lower.endswith(".msg"):
-            return self._extractMsg(fileBytes, fileName)
-        return self._extractEml(fileBytes, fileName)
+            return self._extractMsg(fileBytes, fileName, depth)
+        return self._extractEml(fileBytes, fileName, depth)
 
-    def _extractEml(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
+    def _extractEml(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]:
         """Parse standard EML (RFC 822) using stdlib email."""
         rootId = makeId()
         parts: List[ContentPart] = []
@@ -91,7 +92,7 @@ class EmailExtractor(Extractor):
                 attachName = part.get_filename() or "attachment"
                 attachData = part.get_payload(decode=True)
                 if attachData:
-                    parts.extend(_delegateAttachment(attachData, attachName, rootId))
+                    parts.extend(_delegateAttachment(attachData, attachName, rootId, depth))
                 continue
 
             if contentType == "text/plain":
@@ -113,7 +114,7 @@ class EmailExtractor(Extractor):
 
         return parts
 
-    def _extractMsg(self, fileBytes: bytes, fileName: str) -> List[ContentPart]:
+    def _extractMsg(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]:
         """Parse Outlook MSG files using extract-msg (optional)."""
         rootId = makeId()
         parts: List[ContentPart] = []
@@ -179,7 +180,7 @@ class EmailExtractor(Extractor):
             attachName = getattr(attachment, "longFilename", None) or getattr(attachment, "shortFilename", None) or "attachment"
             attachData = getattr(attachment, "data", None)
             if attachData:
-                parts.extend(_delegateAttachment(attachData, attachName, rootId))
+                parts.extend(_delegateAttachment(attachData, attachName, rootId, depth))
 
         try:
             msgFile.close()
@@ -199,18 +200,39 @@ def _buildHeaderText(msg) -> str:
     return "\n".join(lines)
 
 
-def _delegateAttachment(attachData: bytes, attachName: str, parentId: str) -> List[ContentPart]:
-    """Delegate an attachment to the appropriate type-specific extractor."""
+_MAX_CASCADE_DEPTH = 10
+
+def _delegateAttachment(attachData: bytes, attachName: str, parentId: str, depth: int = 0) -> List[ContentPart]:
+    """Delegate an attachment to the appropriate type-specific extractor.
+
+    Passes ``_cascadeDepth`` through the context so nested Email→Container→Email
+    chains share a global depth counter and don't recurse infinitely.
+    """
+    if depth >= _MAX_CASCADE_DEPTH:
+        logger.warning(f"Cascade depth {depth} reached for {attachName}, skipping extraction")
+        import base64
+        encodedData = base64.b64encode(attachData).decode("utf-8") if attachData else ""
+        return [ContentPart(
+            id=makeId(), parentId=parentId, label=attachName,
+            typeGroup="binary", mimeType="application/octet-stream",
+            data=encodedData,
+            metadata={"size": len(attachData), "emailAttachment": attachName, "cascadeDepthExceeded": True},
+        )]
+
     guessedMime, _ = mimetypes.guess_type(attachName)
     detectedMime = guessedMime or "application/octet-stream"
 
-    from ..subRegistry import ExtractorRegistry
-    registry = ExtractorRegistry()
+    from ..subRegistry import getExtractorRegistry
+    registry = getExtractorRegistry()
     extractor = registry.resolve(detectedMime, attachName)
 
-    if extractor and not isinstance(extractor, EmailExtractor):
+    if extractor:
         try:
-            childParts = extractor.extract(attachData, {"fileName": attachName, "mimeType": detectedMime})
+            childParts = extractor.extract(attachData, {
+                "fileName": attachName,
+                "mimeType": detectedMime,
+                "_cascadeDepth": depth + 1,
+            })
             for part in childParts:
                 part.parentId = parentId
                 if not part.metadata:
diff --git a/requirements.txt b/requirements.txt
index f5ffb715..2d2f5ee5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -110,6 +110,9 @@ asyncpg==0.30.0
 ## Stripe payments
 stripe>=11.0.0
 
+## Outlook MSG file extraction
+extract-msg>=0.55.0
+
 ## Geospatial libraries for STAC connector
 pyproj>=3.6.0  # For coordinate transformations (EPSG:2056 <-> EPSG:4326)
 shapely>=2.0.0  # For geometric operations (intersections, area calculations)

From e3284994d0ec90de43e9d5ff74fd853f830bba61 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Tue, 12 May 2026 23:33:43 +0200
Subject: [PATCH 8/8] fixes stt paras

---
 modules/features/teamsbot/service.py | 76 ++++++++++++++++++++++------
 1 file changed, 60 insertions(+), 16 deletions(-)

diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py
index 93cc27a2..fcce44bd 100644
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@@ -602,6 +602,13 @@ class TeamsbotService:
         self._lastTranscriptText: Optional[str] = None
         self._lastTranscriptId: Optional[str] = None
         self._lastSttTime: float = 0.0
+
+        # Audio chunk aggregation: collect chunks and send to STT only
+        # after a speech pause or when the buffer reaches a target duration.
+        self._audioBuffer: bytes = b""
+        self._audioBufferStartTime: float = 0.0
+        self._audioBufferLastChunkTime: float = 0.0
+        self._audioBufferSampleRate: int = 16000
         self._lastBotResponseText: Optional[str] = None
         self._lastBotResponseTs: float = 0.0
 
@@ -1203,6 +1210,14 @@ class TeamsbotService:
         interface.updateSession(sessionId, updates)
         await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage})
 
+        # Flush remaining audio buffer before generating summary
+        if dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]:
+            if self._audioBuffer:
+                logger.info(f"[AudioChunk] Flushing remaining buffer on session end ({len(self._audioBuffer)} bytes)")
+                self._audioBuffer = b""
+                self._audioBufferStartTime = 0.0
+                self._audioBufferLastChunkTime = 0.0
+
         # Generate summary when session ends
         if dbStatus == TeamsbotSessionStatus.ENDED.value:
             asyncio.create_task(self._generateMeetingSummary(sessionId))
@@ -1217,11 +1232,18 @@ class TeamsbotService:
         voiceInterface,
         websocket: WebSocket,
     ):
-        """Process an audio chunk from WebRTC capture — run STT and feed into transcript pipeline."""
+        """Process an audio chunk from WebRTC capture. The bot-side VAD
+        (AudioWorklet / ScriptProcessor) already segments speech into 1-8s
+        voiced chunks. Here we apply a minimum-duration safety net: very short
+        chunks (<1s) are buffered until they reach 1s; everything else goes
+        straight to STT. A wall-clock timeout flushes stale buffers."""
         import base64
+        _MIN_CHUNK_SEC = 1.0
+        _STALE_TIMEOUT_SEC = 3.0
+
         try:
             audioBytes = base64.b64decode(audioBase64)
-            if len(audioBytes) < 1000:
+            if len(audioBytes) < 500:
                 return
 
             if captureDiagnostics:
@@ -1234,14 +1256,12 @@ class TeamsbotService:
                     f"rms={rms}, nativeRate={nativeSampleRate}, bytes={len(audioBytes)}"
                 )
 
-            # Use RMS from capture diagnostics to skip real silence.
-            # Byte-variation heuristics produced false positives and dropped valid speech.
+            isSilent = False
             if captureDiagnostics and captureDiagnostics.get("rms") is not None:
                 try:
                     rmsVal = float(captureDiagnostics.get("rms"))
                     if rmsVal < 0.0003:
-                        logger.debug(f"[AudioChunk] Skipping silent audio ({len(audioBytes)} bytes, rms={rmsVal:.6f})")
-                        return
+                        isSilent = True
                 except Exception:
                     pass
 
@@ -1249,23 +1269,47 @@ class TeamsbotService:
                 logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}")
                 return
 
-            # Treat sampleRate=0 as unknown (triggers auto-detection)
-            effectiveSampleRate = sampleRate if sampleRate and sampleRate > 0 else None
+            now = time.time()
+            effectiveRate = sampleRate if sampleRate and sampleRate > 0 else 16000
+
+            if not isSilent:
+                if not self._audioBuffer:
+                    self._audioBufferStartTime = now
+                self._audioBuffer += audioBytes
+                self._audioBufferLastChunkTime = now
+                self._audioBufferSampleRate = effectiveRate
+
+            bufferDuration = len(self._audioBuffer) / (effectiveRate * 2) if self._audioBuffer else 0.0
+            bufferAge = (now - self._audioBufferStartTime) if self._audioBuffer else 0.0
+
+            shouldFlush = (
+                self._audioBuffer
+                and (
+                    bufferDuration >= _MIN_CHUNK_SEC
+                    or (bufferAge >= _STALE_TIMEOUT_SEC and bufferDuration > 0.3)
+                )
+            )
+
+            if not shouldFlush:
+                return
+
+            flushBytes = self._audioBuffer
+            flushRate = self._audioBufferSampleRate
+            self._audioBuffer = b""
+            self._audioBufferStartTime = 0.0
+            self._audioBufferLastChunkTime = 0.0
+
+            flushDuration = len(flushBytes) / (flushRate * 2)
+            logger.info(f"[AudioChunk] Flushing buffer: {len(flushBytes)} bytes, {flushDuration:.1f}s, {flushRate}Hz")
 
             phraseHints = list(self._knownSpeakers)
             if self.config.botName:
                 phraseHints.append(self.config.botName)
 
-            # Sprache kommt ausschliesslich aus der Session/Instance-Konfig
-            # (TeamsbotUserSettings.language ueberschreibt
-            # TeamsbotConfig.language, Fallback de-DE im Schema).
-            # KEIN hardcodierter Alternative-Sprachen-Pool — der hat dafuer
-            # gesorgt, dass Google STT bei verrauschter Audio auf en-US
-            # gesprungen ist und englisches Kauderwelsch geliefert hat.
             sttResult = await voiceInterface.speechToText(
-                audioContent=audioBytes,
+                audioContent=flushBytes,
                 language=self.config.language or "de-DE",
-                sampleRate=effectiveSampleRate,
+                sampleRate=flushRate,
                 channels=1,
                 skipFallbacks=True,
                 phraseHints=phraseHints if phraseHints else None,