From 75e07743a6743123f8bd214d992e1583e3267868 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 11 May 2026 21:26:20 +0200 Subject: [PATCH 01/13] google keys transferred to account poweron.center.ai --- env-gateway-dev.env | 10 +- env-gateway-int.env | 10 +- env-gateway-prod-forgejo.env | 8 +- env-gateway-prod.env | 10 +- modules/connectors/connectorVoiceGoogle.py | 103 +++++++++++++++--- .../features/commcoach/serviceCommcoach.py | 2 + .../features/teamsbot/datamodelTeamsbot.py | 13 +++ modules/features/teamsbot/mainTeamsbot.py | 13 +++ .../features/teamsbot/routeFeatureTeamsbot.py | 54 +++++++++ modules/features/teamsbot/service.py | 1 + modules/interfaces/interfaceVoiceObjects.py | 23 +++- modules/routes/routeVoiceGoogle.py | 30 ++++- .../test_connectorVoiceGoogle_sttHelpers.py | 23 ++++ 13 files changed, 257 insertions(+), 43 deletions(-) create mode 100644 tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py diff --git a/env-gateway-dev.env b/env-gateway-dev.env index 3709b0d8..158e00aa 100644 --- a/env-gateway-dev.env +++ b/env-gateway-dev.env @@ -19,7 +19,7 @@ APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2Z APP_TOKEN_EXPIRY=300 # CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss +APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net # Logging configuration APP_LOGGING_LOG_LEVEL = DEBUG @@ -39,11 +39,11 @@ Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk= Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback -Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback -Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM= +Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_DATA_CLIENT_SECRET = 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 Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. diff --git a/env-gateway-int.env b/env-gateway-int.env index d22b7d2a..33b21f1f 100644 --- a/env-gateway-int.env +++ b/env-gateway-int.env @@ -21,7 +21,7 @@ APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZ APP_TOKEN_EXPIRY=300 # CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net +APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net # Logging configuration APP_LOGGING_LOG_LEVEL = DEBUG @@ -41,11 +41,11 @@ Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk= Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback -Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback -Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. diff --git a/env-gateway-prod-forgejo.env b/env-gateway-prod-forgejo.env index e0ab455b..cc35f9c1 100644 --- a/env-gateway-prod-forgejo.env +++ b/env-gateway-prod-forgejo.env @@ -39,11 +39,11 @@ Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4= Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback -Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_AUTH_REDIRECT_URI = -Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_DATA_REDIRECT_URI = # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. diff --git a/env-gateway-prod.env b/env-gateway-prod.env index 0183ae1f..6c840977 100644 --- a/env-gateway-prod.env +++ b/env-gateway-prod.env @@ -20,7 +20,7 @@ APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUl APP_TOKEN_EXPIRY=300 # CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://playground.poweron.swiss,https://playground-int.poweron.swiss,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net +APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net # Logging configuration APP_LOGGING_LOG_LEVEL = DEBUG @@ -40,11 +40,11 @@ Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4= Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback -Service_GOOGLE_AUTH_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback -Service_GOOGLE_DATA_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3eWFwSEZ4YnRJcjU1OW5kcXZKdkt1Z3gzWDFhVW5Eelh3VnpnNlppcWxweHY5UUQzeDIyVk83cW1XNVE4bllVWnR2MjlSQzFrV1UyUVV6OUt5b3Vqa3QzMUIwNFBqc2FVSXRxTlQ1OHVJZVFibnhBQ2puXzBwSXp5NUZhZjM1d1o= +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = 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 Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. diff --git a/modules/connectors/connectorVoiceGoogle.py b/modules/connectors/connectorVoiceGoogle.py index f875c72c..3dd3221d 100644 --- a/modules/connectors/connectorVoiceGoogle.py +++ b/modules/connectors/connectorVoiceGoogle.py @@ -19,6 +19,30 @@ from modules.shared.voiceCatalog import getDefaultVoice as _catalogDefaultVoice logger = logging.getLogger(__name__) + +def _buildPrimarySttRecognitionFields( + *, + model: str, + lightweight: bool, +) -> Dict[str, Any]: + """Shared fields for batch + streaming primary RecognitionConfig.""" + base: Dict[str, Any] = { + "enable_automatic_punctuation": True, + "model": model, + } + if lightweight: + base["enable_word_time_offsets"] = False + base["enable_word_confidence"] = False + base["max_alternatives"] = 1 + base["use_enhanced"] = False + else: + base["enable_word_time_offsets"] = True + base["enable_word_confidence"] = True + base["max_alternatives"] = 3 + base["use_enhanced"] = True + return base + + # Gemini-TTS speaker IDs from voices.list use short names (e.g. "Kore") and require # SynthesisInput.prompt + VoiceSelectionParams.model_name (google-cloud-texttospeech >= 2.24.0). _GEMINI_TTS_DEFAULT_MODEL = "gemini-2.5-flash-tts" @@ -73,7 +97,10 @@ class ConnectorGoogleSpeech: sampleRate: int = None, channels: int = None, skipFallbacks: bool = False, phraseHints: Optional[list] = None, - alternativeLanguages: Optional[list] = None) -> Dict: + alternativeLanguages: Optional[list] = None, + model: str = "latest_long", + lightweight: bool = False, + audioFormat: Optional[str] = None) -> Dict: """ Convert speech to text using Google Cloud Speech-to-Text API. @@ -82,6 +109,9 @@ class ConnectorGoogleSpeech: language: Language code (e.g., 'de-DE', 'en-US') sample_rate: Audio sample rate (auto-detected if None) channels: Number of audio channels (auto-detected if None) + model: Google recognition model (e.g. latest_long, latest_short) + lightweight: If True, omit word timings/confidence, single alternative, no enhanced model + audioFormat: If set (webm_opus, linear16, mp3, flac, wav), skip auto-detection Returns: Dict containing transcribed text, confidence, and metadata @@ -92,8 +122,24 @@ class ConnectorGoogleSpeech: logger.warning(f"Invalid sampleRate={sampleRate}, treating as unknown for auto-detection") sampleRate = None - # Auto-detect audio format if not provided - if sampleRate is None or channels is None: + explicitFormat = (audioFormat or "").strip().lower() or None + if explicitFormat: + if channels is None: + channels = 1 + if sampleRate is None: + if explicitFormat == "webm_opus": + sampleRate = 48000 + elif explicitFormat == "linear16": + sampleRate = 16000 + elif explicitFormat in ("mp3", "flac"): + sampleRate = 44100 + elif explicitFormat == "wav": + sampleRate = 16000 + else: + sampleRate = 16000 + audioFormat = explicitFormat + logger.info(f"STT explicit format: {audioFormat}, {sampleRate}Hz, {channels}ch") + elif sampleRate is None or channels is None: validation = self.validateAudioFormat(audioContent) if not validation["valid"]: return { @@ -156,12 +202,7 @@ class ConnectorGoogleSpeech: "encoding": encoding, "audio_channel_count": channels, "language_code": language, - "enable_automatic_punctuation": True, - "model": "latest_long", - "enable_word_time_offsets": True, - "enable_word_confidence": True, - "max_alternatives": 3, - "use_enhanced": True, + **_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight), } if phraseHints: @@ -205,8 +246,7 @@ class ConnectorGoogleSpeech: sample_rate_hertz=16000, audio_channel_count=1, language_code=language, - enable_automatic_punctuation=True, - model="latest_long" + **_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight), ) try: response = await asyncio.to_thread( @@ -343,7 +383,7 @@ class ConnectorGoogleSpeech: "error": "No recognition results (silence or unclear audio)" } - models = ["latest_long", "phone_call", "latest_short"] + models = list(dict.fromkeys([model, "latest_long", "phone_call", "latest_short"])) for fallback_config in fallback_configs: for model in models: @@ -419,6 +459,9 @@ class ConnectorGoogleSpeech: audioQueue: asyncio.Queue, language: str = "de-DE", phraseHints: Optional[list] = None, + model: str = "latest_long", + lightweight: bool = False, + singleUtterance: bool = False, ) -> AsyncGenerator[Dict[str, Any], None]: """ Stream audio chunks to Google Cloud Speech-to-Text Streaming API. @@ -429,9 +472,13 @@ class ConnectorGoogleSpeech: Send (b"", True) to signal end of stream. language: Language code phraseHints: Optional boost phrases + model: Google recognition model (e.g. latest_long, latest_short) + lightweight: If True, use non-enhanced primary config (lower latency) + singleUtterance: If True, end stream after first utterance (client should reconnect) Yields: - Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec + Dicts with keys: isFinal, transcript, confidence, stabilityScore, audioDurationSec; + optionally endOfSingleUtterance, reconnectRequired """ STREAM_LIMIT_SEC = 290 streamStartTs = time.time() @@ -442,9 +489,7 @@ class ConnectorGoogleSpeech: "sample_rate_hertz": 48000, "audio_channel_count": 1, "language_code": language, - "enable_automatic_punctuation": True, - "model": "latest_long", - "use_enhanced": True, + **_buildPrimarySttRecognitionFields(model=model, lightweight=lightweight), } if phraseHints: configParams["speech_contexts"] = [speech.SpeechContext(phrases=phraseHints, boost=15.0)] @@ -453,7 +498,7 @@ class ConnectorGoogleSpeech: streamingConfig = speech.StreamingRecognitionConfig( config=recognitionConfig, interim_results=True, - single_utterance=False, + single_utterance=singleUtterance, ) import queue as threadQueue @@ -490,7 +535,22 @@ class ConnectorGoogleSpeech: ) for response in responseStream: elapsed = time.time() - streamStartTs - estimatedDurationSec = totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0 + + durationFromResults = 0.0 + for result in response.results: + rt = getattr(result, "result_end_time", None) + if rt is None: + continue + if hasattr(rt, "total_seconds"): + durationFromResults = max(durationFromResults, float(rt.total_seconds())) + else: + durationFromResults = max( + durationFromResults, + float(getattr(rt, "seconds", 0)) + float(getattr(rt, "nanos", 0)) * 1e-9, + ) + estimatedDurationSec = durationFromResults if durationFromResults > 0 else ( + totalAudioBytes / (48000 * 1 * 2) if totalAudioBytes else 0.0 + ) finalTexts = [] interimTexts = [] @@ -524,6 +584,13 @@ class ConnectorGoogleSpeech: "stabilityScore": 0.0, "audioDurationSec": estimatedDurationSec, }), loop) + + speechEvt = getattr(response, "speech_event_type", None) + if speechEvt and "END_OF_SINGLE_UTTERANCE" in str(speechEvt): + asyncio.run_coroutine_threadsafe(resultOutQ.put({ + "endOfSingleUtterance": True, + "audioDurationSec": estimatedDurationSec, + }), loop) if elapsed >= STREAM_LIMIT_SEC: logger.info("Streaming STT approaching 5-min limit, client should reconnect") asyncio.run_coroutine_threadsafe(resultOutQ.put({ diff --git a/modules/features/commcoach/serviceCommcoach.py b/modules/features/commcoach/serviceCommcoach.py index 39b96b55..5ac3af23 100644 --- a/modules/features/commcoach/serviceCommcoach.py +++ b/modules/features/commcoach/serviceCommcoach.py @@ -1080,6 +1080,8 @@ class CommcoachService: audioContent=audioContent, language=language, skipFallbacks=True, + model="latest_short", + lightweight=True, ) transcribedText = "" diff --git a/modules/features/teamsbot/datamodelTeamsbot.py b/modules/features/teamsbot/datamodelTeamsbot.py index a7a22c9b..076b0eda 100644 --- a/modules/features/teamsbot/datamodelTeamsbot.py +++ b/modules/features/teamsbot/datamodelTeamsbot.py @@ -111,6 +111,14 @@ class TeamsbotMeetingModule(PowerOnModel): defaultDirectorPrompts: Optional[str] = Field(default=None, description="JSON list of default director prompts") goals: Optional[str] = Field(default=None, description="Free-text goals") kpiTargets: Optional[str] = Field(default=None, description="JSON object with structured KPI targets") + defaultMeetingLink: Optional[str] = Field( + default=None, + description="Default Teams meeting URL for new sessions in this module (user can override)", + ) + defaultBotName: Optional[str] = Field( + default=None, + description="Default display name for the bot when starting a session from this module", + ) status: TeamsbotModuleStatus = Field(default=TeamsbotModuleStatus.ACTIVE) @@ -257,6 +265,7 @@ class TeamsbotStartSessionRequest(BaseModel): """Request to start a new Teams Bot session.""" meetingLink: str = Field(description="Teams meeting join link (e.g., https://teams.microsoft.com/l/meetup-join/...)") botName: Optional[str] = Field(default=None, description="Override bot name for this session") + moduleId: Optional[str] = Field(default=None, description="Optional MeetingModule to attach this session to") connectionId: Optional[str] = Field(default=None, description="Microsoft connection ID for Graph API access") joinMode: Optional[TeamsbotJoinMode] = Field(default=None, description="How the bot joins: systemBot, anonymous, or userAccount. Defaults to systemBot if credentials configured, else anonymous.") sessionContext: Optional[str] = Field(default=None, description="Custom context/knowledge to provide to the bot for this session (e.g. meeting agenda, documents, background info)") @@ -277,6 +286,8 @@ class CreateMeetingModuleRequest(BaseModel): defaultDirectorPrompts: Optional[str] = None goals: Optional[str] = None kpiTargets: Optional[str] = None + defaultMeetingLink: Optional[str] = None + defaultBotName: Optional[str] = None class UpdateMeetingModuleRequest(BaseModel): @@ -287,6 +298,8 @@ class UpdateMeetingModuleRequest(BaseModel): defaultDirectorPrompts: Optional[str] = None goals: Optional[str] = None kpiTargets: Optional[str] = None + defaultMeetingLink: Optional[str] = None + defaultBotName: Optional[str] = None status: Optional[TeamsbotModuleStatus] = None diff --git a/modules/features/teamsbot/mainTeamsbot.py b/modules/features/teamsbot/mainTeamsbot.py index 66bc9247..850135d6 100644 --- a/modules/features/teamsbot/mainTeamsbot.py +++ b/modules/features/teamsbot/mainTeamsbot.py @@ -290,6 +290,19 @@ def _runMigrations(): migrated = False + # M2: MeetingModule default meeting link / bot name (additive columns) + if _tableExists("TeamsbotMeetingModule"): + for col, sqlType in ( + ("defaultMeetingLink", "TEXT"), + ("defaultBotName", "TEXT"), + ): + if not _columnExists("TeamsbotMeetingModule", col): + cur.execute( + f'ALTER TABLE "TeamsbotMeetingModule" ADD COLUMN "{col}" {sqlType} NULL', + ) + logger.info(f"Migration M2: Added TeamsbotMeetingModule.{col}") + migrated = True + # M1: Create default Adhoc modules for orphaned sessions # (only runs if TeamsbotSession table exists with moduleId column # and there are sessions without a moduleId) diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py index ab42db22..b3088f8e 100644 --- a/modules/features/teamsbot/routeFeatureTeamsbot.py +++ b/modules/features/teamsbot/routeFeatureTeamsbot.py @@ -280,6 +280,11 @@ async def startSession( mandateId = _validateInstanceAccess(instanceId, context) interface = _getInterface(context, instanceId) config = _getInstanceConfig(instanceId) + + if body.moduleId: + mod = interface.getModule(body.moduleId) + if not mod or str(mod.get("instanceId") or "") != str(instanceId): + raise HTTPException(status_code=400, detail="Invalid moduleId for this instance") # Extract and validate meeting URL from user input (handles SafeLinks, invitation text, etc.) cleanMeetingUrl = _extractTeamsMeetingUrl(body.meetingLink) @@ -288,6 +293,7 @@ async def startSession( sessionData = TeamsbotSession( instanceId=instanceId, mandateId=mandateId, + moduleId=body.moduleId, meetingLink=cleanMeetingUrl, botName=body.botName or config.botName, sessionContext=body.sessionContext, @@ -426,6 +432,54 @@ async def listSessions( return {"sessions": sessions} +@router.get("/{instanceId}/dashboard/stream") +@limiter.limit("60/minute") +async def streamDashboard( + request: Request, + instanceId: str, + context: RequestContext = Depends(getRequestContext), +): + """ + SSE channel for the Teamsbot dashboard: repeated snapshots of sessions and meeting modules. + Push interval: 3s while any own session is pending/joining/active, otherwise 20s. + Same session visibility rules as GET /sessions (own sessions unless platform admin). + """ + _validateInstanceAccess(instanceId, context) + interface = _getInterface(context, instanceId) + userId = None if context.isPlatformAdmin else str(context.user.id) + activeStatuses = { + TeamsbotSessionStatus.PENDING.value, + TeamsbotSessionStatus.JOINING.value, + TeamsbotSessionStatus.ACTIVE.value, + } + + async def eventGenerator(): + while True: + sessionRows = [] + try: + sessionRows = interface.getSessions(instanceId, includeEnded=True, userId=userId) + moduleRows = interface.getModules(instanceId) + payload = {"type": "dashboardState", "sessions": sessionRows, "modules": moduleRows} + yield f"data: {json.dumps(payload, default=str)}\n\n" + except asyncio.CancelledError: + raise + except Exception as ex: + logger.warning("dashboard stream tick failed: %s", ex) + yield f"data: {json.dumps({'type': 'error', 'message': 'dashboard_tick_failed'})}\n\n" + hasActive = any((s.get("status") in activeStatuses) for s in sessionRows) + await asyncio.sleep(3.0 if hasActive else 20.0) + + return StreamingResponse( + eventGenerator(), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }, + ) + + @router.get("/{instanceId}/sessions/{sessionId}") @limiter.limit("30/minute") async def getSession( diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index fe0d6c34..d520bf49 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -1225,6 +1225,7 @@ class TeamsbotService: skipFallbacks=True, phraseHints=phraseHints if phraseHints else None, alternativeLanguages=["en-US"], + audioFormat="linear16", ) if sttResult and sttResult.get("success") and sttResult.get("text"): diff --git a/modules/interfaces/interfaceVoiceObjects.py b/modules/interfaces/interfaceVoiceObjects.py index d0b6f461..03729f86 100644 --- a/modules/interfaces/interfaceVoiceObjects.py +++ b/modules/interfaces/interfaceVoiceObjects.py @@ -69,7 +69,10 @@ class VoiceObjects: sampleRate: int = None, channels: int = None, skipFallbacks: bool = False, phraseHints: list = None, - alternativeLanguages: list = None) -> Dict[str, Any]: + alternativeLanguages: list = None, + model: str = "latest_long", + lightweight: bool = False, + audioFormat: Optional[str] = None) -> Dict[str, Any]: """ Convert speech to text using Google Cloud Speech-to-Text API. @@ -81,6 +84,9 @@ class VoiceObjects: skipFallbacks: If True, skip fallback attempts (use when audio format is known) phraseHints: Optional list of phrases to boost recognition (names, terms) alternativeLanguages: Optional list of additional language codes for multi-language + model: Google STT model (e.g. latest_long, latest_short) + lightweight: If True, omit word-level features and enhanced model + audioFormat: If set (webm_opus, linear16, ...), skip format auto-detection Returns: Dict containing transcribed text, confidence, and metadata @@ -97,6 +103,9 @@ class VoiceObjects: skipFallbacks=skipFallbacks, phraseHints=phraseHints, alternativeLanguages=alternativeLanguages, + model=model, + lightweight=lightweight, + audioFormat=audioFormat, ) if result["success"]: @@ -120,13 +129,23 @@ class VoiceObjects: audioQueue: asyncio.Queue, language: str = "de-DE", phraseHints: Optional[list] = None, + model: str = "latest_long", + lightweight: bool = False, + singleUtterance: bool = False, ) -> AsyncGenerator[Dict[str, Any], None]: """ Stream audio to Google Streaming STT and yield interim/final results. Billing is recorded for each final result. """ connector = self._getGoogleSpeechConnector() - async for event in connector.streamingRecognize(audioQueue, language, phraseHints): + async for event in connector.streamingRecognize( + audioQueue, + language, + phraseHints, + model=model, + lightweight=lightweight, + singleUtterance=singleUtterance, + ): if event.get("isFinal") and self.billingCallback: durationSec = event.get("audioDurationSec", 0) priceCHF = connector.calculateSttCostCHF(durationSec) diff --git a/modules/routes/routeVoiceGoogle.py b/modules/routes/routeVoiceGoogle.py index 8987e73f..10185cc2 100644 --- a/modules/routes/routeVoiceGoogle.py +++ b/modules/routes/routeVoiceGoogle.py @@ -155,12 +155,13 @@ async def sttStream( Protocol: Client sends JSON: - {"type": "open", "language": "de-DE"} + {"type": "open", "language": "de-DE", "model": "latest_short", "lightweight": true, "singleUtterance": true} {"type": "audio", "chunk": ""} {"type": "close"} Server sends JSON: {"type": "interim", "text": "..."} {"type": "final", "text": "...", "confidence": 0.95} + {"type": "end_of_single_utterance", "audioDurationSec": 0.0} {"type": "error", "message": "..."} {"type": "closed"} """ @@ -205,7 +206,12 @@ async def sttStream( logger.warning(f"STT billing pre-flight skipped: {e}") audioQueue: asyncio.Queue = asyncio.Queue() - language = "de-DE" + sttOpenOptions: Dict[str, Any] = { + "language": "de-DE", + "model": "latest_long", + "lightweight": False, + "singleUtterance": False, + } streamingTask: Optional[asyncio.Task] = None voiceInterface: Optional[VoiceObjects] = None @@ -233,10 +239,23 @@ async def sttStream( voiceInterface.billingCallback = _billingCb try: - async for event in voiceInterface.streamingSpeechToText(audioQueue, language): + async for event in voiceInterface.streamingSpeechToText( + audioQueue, + sttOpenOptions["language"], + phraseHints=None, + model=sttOpenOptions["model"], + lightweight=sttOpenOptions["lightweight"], + singleUtterance=sttOpenOptions["singleUtterance"], + ): if event.get("reconnectRequired"): await _sendJson({"type": "reconnect_required"}) return + if event.get("endOfSingleUtterance"): + await _sendJson({ + "type": "end_of_single_utterance", + "audioDurationSec": event.get("audioDurationSec", 0.0), + }) + continue if event.get("isFinal"): if event.get("transcript"): await _sendJson({"type": "final", "text": event["transcript"], "confidence": event.get("confidence", 0.0)}) @@ -258,7 +277,10 @@ async def sttStream( msgType = (msg.get("type") or "").strip() if msgType == "open": - language = msg.get("language") or "de-DE" + sttOpenOptions["language"] = msg.get("language") or "de-DE" + sttOpenOptions["model"] = msg.get("model") or "latest_long" + sttOpenOptions["lightweight"] = bool(msg.get("lightweight")) + sttOpenOptions["singleUtterance"] = bool(msg.get("singleUtterance")) if streamingTask and not streamingTask.done(): await audioQueue.put((b"", True)) streamingTask.cancel() diff --git a/tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py b/tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py new file mode 100644 index 00000000..258dc0db --- /dev/null +++ b/tests/unit/connectors/test_connectorVoiceGoogle_sttHelpers.py @@ -0,0 +1,23 @@ +# Copyright (c) 2025 Patrick Motsch +"""Unit tests for Google STT helper config (no API calls).""" + +from modules.connectors.connectorVoiceGoogle import _buildPrimarySttRecognitionFields + + +def test_buildPrimaryStt_lightweight_stripsHeavyFeatures(): + d = _buildPrimarySttRecognitionFields(model="latest_short", lightweight=True) + assert d["model"] == "latest_short" + assert d["enable_word_time_offsets"] is False + assert d["enable_word_confidence"] is False + assert d["max_alternatives"] == 1 + assert d["use_enhanced"] is False + assert d["enable_automatic_punctuation"] is True + + +def test_buildPrimaryStt_full_matchesLegacyDefaults(): + d = _buildPrimarySttRecognitionFields(model="latest_long", lightweight=False) + assert d["model"] == "latest_long" + assert d["enable_word_time_offsets"] is True + assert d["enable_word_confidence"] is True + assert d["max_alternatives"] == 3 + assert d["use_enhanced"] is True From 03a6d3248bf3c449c3026f58e4cd2ca226f87027 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 11 May 2026 23:59:31 +0200 Subject: [PATCH 02/13] fixed teamsbot issues --- env-gateway-dev.env | 12 +++---- env-gateway-int.env | 14 ++++---- env-gateway-prod-forgejo.env | 14 ++++---- env-gateway-prod.env | 14 ++++---- modules/features/teamsbot/service.py | 34 ++++++++++--------- .../services/serviceAgent/agentLoop.py | 7 +++- .../coreTools/_connectionTools.py | 1 + .../serviceAgent/coreTools/_mediaTools.py | 6 +++- .../serviceAgent/coreTools/_workspaceTools.py | 9 +++-- .../services/serviceAgent/datamodelAgent.py | 6 ++++ .../services/serviceAgent/toolRegistry.py | 3 +- .../services/serviceAi/mainServiceAi.py | 13 ++++--- .../services/serviceWeb/mainServiceWeb.py | 9 +++-- 13 files changed, 83 insertions(+), 59 deletions(-) diff --git a/env-gateway-dev.env b/env-gateway-dev.env index 158e00aa..7802b33d 100644 --- a/env-gateway-dev.env +++ b/env-gateway-dev.env @@ -32,18 +32,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760 APP_LOGGING_BACKUP_COUNT = 5 # OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP. -Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk= +Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kxaG9WY1FJaWdCbVFVaTllUlJfU3Y3MmJkRmkzMDVDWUNtZEhlNVhISzJPcy00ZUVZcklYLXFMV0dIODV3NXNSSFBKQ0ZsZllES3diTEgySDF0T1ZCbFZHREZtcXFGSWNZN1NJbzJzczRRQWxoeVNsNzlsa0VzMHJPWHUydjBBclo= Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback -Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk= +Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyUW96aXFVOVJlLUdyRlVvT1hVU09ILWtMZnV2M19mVUxGMnFPV3FzNTdQa3dTbHVGTDBHTk01ZThLcjh6QUR5VldVZUpfcDlZNTh5YldtLWtjTll6VzJNQ3JCQ3ZubHdmd2JvaExDOXdvQ1pjWDVQTUtFWVAtUHhwS1lFQnJXWk4= Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = 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 +Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyd1hPd09vcVFtbVg0Sm5Nd1VYVEEtWjZMZkFndmFVS0ZlcTU0dzJnYVYzRkZWbjh0QldyZkhseDV2cUgxYkNHTzF6MXhqQlZ2N0UtbmhPeWRKUHBVdzV0Q1ROaWNuN2xjMmVzMjNZQ2ZYZ3dOTHgxaU5sTGRjVHpfakhYeWF0ZGU= Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_DATA_CLIENT_SECRET = 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 +Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kySXoyd1BmTnhOd1owTUJOWm53WlZMMjFHNGJhSUwyd2NDUW9BanlRWVJPLU5jYzRlcm5QeW96d0JYUkVWVWd2dGNBVEpJbElZY2lWb0o5S0gyNnhoV1pnNXhpSFEyaklZZjcwX2lVU0ktMEJGN01DMDhXQ3k4R1BXc1Q3ejFjOEg= Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. diff --git a/env-gateway-int.env b/env-gateway-int.env index 33b21f1f..a1924fff 100644 --- a/env-gateway-int.env +++ b/env-gateway-int.env @@ -34,18 +34,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760 APP_LOGGING_BACKUP_COUNT = 5 # OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs) -Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk= +Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kydlVubld1d1h6SUNSWW1aZ3p4X3Zod1NDTjhZVnVYS2lqOERGTFp2OXJ4TGRiNlRLVFpzLUVDTUhkZGhGUWdxa1djdEV5UWkyblN1UHZoaFBjaExNTEpGMG1PRGJEbDdHVll0Ungwcl9JemZ4ZXFzZUNFQmFlZi1DZFlCekU1S3E= Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/login/callback -Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk= +Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyS1hWZXEzUzZTTE5MUlJncVowMU95Y0hmV1hveDBZOWdLU1RIUWt3SGlXNGxVTXVKc2QyQmtmWTlJRU43ZnRDdnlDTGxQY0hTU25CWWFFdDhUem9HU0VYcTFJTVFEbVk0dUhmVzJNVlEzNTNWdjdmaW9WeUVDVW5PRmNFZEQzNTY= Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = 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 +Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyUTUwNXNGaHRNaGxxbF9sdWJ3Q0xLYU5yOHB4Yk8zMDZvQ29yaEhWOE5JMENXRk5jb2ZBdzRKQ2ZTTld6ZlIxemhOYzN1VE10TjBDRWZEMXlLVWRNYjZ0VG5RZ3I3NWt0SEJzMzdsUmRzcVNmbktRNHZqTUF6a2EyUkVUSFJnZFE= Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback -Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = 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 +Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyV1FRVjF0c0d3d0dyWU1TdW9HdXVkdHdsVWZKYTJjbGZPRDhMRjA2M0FkaUZIVmhIUmFKNjg2ekFodHd6NG80VTI3TC1icW1LZ01jWVZuQ1pKRm5nMW5UREJEaGp2Wl9oRDRCSmZVT0JpTnkwXzgwY0pkV29yczQ5akF2d1ZGcVY= Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. diff --git a/env-gateway-prod-forgejo.env b/env-gateway-prod-forgejo.env index cc35f9c1..b9c9e686 100644 --- a/env-gateway-prod-forgejo.env +++ b/env-gateway-prod-forgejo.env @@ -32,18 +32,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760 APP_LOGGING_BACKUP_COUNT = 5 # OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs) -Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4= +Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyeUZORDYxOFdlNHk1N25kV3pSQVJMUVFwLUFlMzlzQjQ1eVljOTlzX184RndsTmtTV1FjdWkyQlBiUkdCbGt5S2ltZjJxa2I2dHBMdnJqZnhFSnBCampHYjB3RG5URDM1YzZSLVd6TGdaRXRVcEdadE5zM2thNV9SZy1KZDdLSHY= Service_MSFT_AUTH_REDIRECT_URI=https://api.poweron.swiss/api/msft/auth/login/callback -Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4= +Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySk5uMmlWczBWTE00MHBIcWlBbVJmVmc3MlBWbDA1YTFaS3psZjVLd3d1X2FvRHV0X0c5blpLV0FpY05aMTJMMzUtcG8wakF2TlM3SGQ2VjFZM3JLT1MwTlZ0bm9BRlpkbHVPQTFNaXJvazlQRzN4M2ZZNEVhV1JHV190dWluSUk= Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect/callback Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = 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 +Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kybjVVZ0FldUE1NTJiY2U1N0I0aVU0Z2hfeWlYc2tTdmlxTS1NdGxsRnFHdjZVcW5RRHZkUFhzUTVyX2RaZHlrQThRdTdCRmVBelBOcDlsbFQyd19SZExuWEM5aTcwQ0FvY3ctMUlWU1pndDE0MkdzeTZZRHkwLWU3aW56LW1jS20= Service_GOOGLE_AUTH_REDIRECT_URI = -Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = 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 +Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyMnFma3VPOVJtTFFrNDRLN0NkWHY2dUZDWlJzdDVMd3p3N19IY0tWdURRRzExOGZCMjJOYmpKT1E0cTVwYlgtcVJINTY0anZPc1VoTW00cHl6NVh3ZHVTek1oT1RqWUhtamRkZ1dENWlwNTlZSU1oNWczeGdEOC1Gbk5XU2RBcmI= Service_GOOGLE_DATA_REDIRECT_URI = # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. diff --git a/env-gateway-prod.env b/env-gateway-prod.env index 6c840977..d42bb0f9 100644 --- a/env-gateway-prod.env +++ b/env-gateway-prod.env @@ -33,18 +33,18 @@ APP_LOGGING_ROTATION_SIZE = 10485760 APP_LOGGING_BACKUP_COUNT = 5 # OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs) -Service_MSFT_AUTH_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4= +Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySFR2NjBKM084QTNpeUlyUmM4R0N0SU1BZ2x4MmVTZTVHQkVzRE9GdmFkV041MzhudFhobjU0RWNnd3lqeXpKUXA5aGtNZkhtYU12QjBtX0NjemVmdEZBdC1TbXVBSXJTcF9vMlJXd0ZNRTRKRFBMUXNjTF85eTBxakR4RVNfYmU= Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/login/callback -Service_MSFT_DATA_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnBESkk2T25scFU1T1pNd2FENTFRM3kzcEpSXy1HT0trQkR2Wnl3U3RYbExzRy1YUTkxd3lPZE84U2lhX3FZanp5TjhYRGluLXVjU3hjaWRBUnZLbVhtRDItZ3FxNXJ3MUxicUZTXzJWZVNrR0VKN3ZlNEtET1ppOFk0MzNmbkwyRmROUk4= +Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyNVU4cVRIZFdjS3l2S1RJVTVlc1ozQ1liZXZDX1VwdFZQUzFtS0N6UWYyeGxkNGNmY1hoaWxEUDBXVU5QR2t3Vi1ZV1A2QkxqbnpobzJwOXdzYTBZaFZYdnNkeDE1VVl0bm4weHFiLXdON2gtZzAwMTkxNWRoZldFM2djSkNHVS0= Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = 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 +Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyUmJleVpTOF9OaFV3NGVfcWVBX2oxSjUwMWRGOFZRWFRIN1FZRzZ6U3VQMlg5a21RY1drTHh3U254LW4zM1A1cXQ1TTFWYlNoek9hSHJIeE4tbm1wU1lKRXlKNU5HVWI4VGZwTVE0VnJGaV8wZmNvdkVrMjJGeXdmZ3UyNmVXN1E= Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback -Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com -Service_GOOGLE_AUTH_CLIENT_SECRET = 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 +Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyY2pxMDh0U0RqWERianBMTTNtSUZPSzhKUzh4S0RTenR2MmxnRDlvQzJjbDVTczRWLUJtVnhxWTE2MmUxQjJia2xJcVUzVlFlUnpma040NFdHRzVNRUt0OXR0c2JkTkRmQ1RIYllXbXFFaExIQWNycFVHbUxHbmtYOVhOVUV2MFY= Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index d520bf49..8017e6dc 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -83,10 +83,10 @@ _EPHEMERAL_PHRASE_INTENTS: Dict[str, str] = { ), "agentRound": ( "One short sentence (max ~14 words) the assistant says BETWEEN rounds " - "of a longer agent task to signal that work is still in progress. " - "Include the placeholder tokens '{round}' and '{maxRounds}' so the " - "caller can substitute the actual numbers — e.g. 'Step {round} of " - "{maxRounds}, still working.'" + "of a longer agent task to update the audience on what it is doing. " + "Include the placeholder token '{activity}' which will be filled with " + "the current activity — e.g. 'I am {activity}, one moment...' or " + "'Currently {activity}, almost there...'. Do NOT include step numbers." ), } @@ -1253,19 +1253,18 @@ class TeamsbotService: def _registerSpeakerHint(self, speaker: str, text: str, sessionId: str = ""): """Track current speaker from captions for STT attribution. - When the first non-bot caption arrives, retroactively attributes - any STT segments that were created before a speaker was known.""" + Retroactively attributes any unattributed STT segments whenever a + new non-bot caption speaker arrives (not just the first time).""" if not speaker: return normalizedSpeaker = speaker.strip() if not normalizedSpeaker or self._isBotSpeaker(normalizedSpeaker): return - prevSpeaker = self._lastCaptionSpeaker self._lastCaptionSpeaker = normalizedSpeaker self._knownSpeakers.add(normalizedSpeaker) - if prevSpeaker is None and self._unattributedTranscriptIds: + if self._unattributedTranscriptIds: from . import interfaceFeatureTeamsbot as interfaceDb interface = interfaceDb.getInterface(self.currentUser, self.mandateId, self.instanceId) for tid in self._unattributedTranscriptIds: @@ -3244,15 +3243,17 @@ class TeamsbotService: return await self._pickEphemeralPhrase("agentBusy") async def _interimAgentRoundMessage( - self, roundNum: int, maxRounds: int + self, lastToolLabel: Optional[str] = None ) -> Optional[str]: """Per-round progress notice for long agent runs (meeting voice / chat, ephemeral). Phrasing is AI-localised once per session; - ``{round}`` and ``{maxRounds}`` placeholders are substituted at - render time. Returns ``None`` if generation failed.""" + ``{activity}`` placeholder is substituted with the tool's + ``displayLabel`` from the ToolDefinition. Returns ``None`` if + generation failed.""" + activity = lastToolLabel or "processing your request" return await self._pickEphemeralPhrase( "agentRound", - substitutions={"round": roundNum, "maxRounds": maxRounds}, + substitutions={"activity": activity}, ) async def _notifyMeetingEphemeral(self, sessionId: str, text: str) -> None: @@ -3371,6 +3372,7 @@ class TeamsbotService: finalText: str = "" rounds = 0 + lastToolLabel: Optional[str] = None try: async for event in agentService.runAgent( prompt=taskText, @@ -3391,11 +3393,9 @@ class TeamsbotService: "round": roundNum, "maxRounds": maxR, }) - # Runde 1: schon allgemeiner Start-Hinweis; ab Runde 2 ins Meeting melden. - # Director prompts bleiben still — keine Zwischen-Updates ins Meeting. if roundNum >= 2 and not directorPromptMode: try: - roundText = await self._interimAgentRoundMessage(roundNum, maxR) + roundText = await self._interimAgentRoundMessage(lastToolLabel) if roundText: await self._notifyMeetingEphemeral(sessionId, roundText) except Exception as roundNoticeErr: @@ -3403,7 +3403,9 @@ class TeamsbotService: f"Session {sessionId}: Per-round agent notice failed: {roundNoticeErr}" ) elif event.type == AgentEventTypeEnum.TOOL_CALL: - toolName = (event.data or {}).get("toolName") if event.data else None + evtData = event.data or {} + toolName = evtData.get("toolName") + lastToolLabel = evtData.get("displayLabel") await _emitSessionEvent(sessionId, "agentRun", { "source": sourceLabel, "promptId": promptId, diff --git a/modules/serviceCenter/services/serviceAgent/agentLoop.py b/modules/serviceCenter/services/serviceAgent/agentLoop.py index b51ffb85..c1571994 100644 --- a/modules/serviceCenter/services/serviceAgent/agentLoop.py +++ b/modules/serviceCenter/services/serviceAgent/agentLoop.py @@ -335,9 +335,14 @@ async def runAgentLoop( # Execute tool calls for tc in toolCalls: + toolDef = toolRegistry.getTool(tc.name) yield AgentEvent( type=AgentEventTypeEnum.TOOL_CALL, - data={"toolName": tc.name, "args": tc.args} + data={ + "toolName": tc.name, + "displayLabel": toolDef.displayLabel if toolDef else None, + "args": tc.args, + } ) results = await _executeToolCalls(toolCalls, toolRegistry, { diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_connectionTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_connectionTools.py index b0381da2..0f3e4582 100644 --- a/modules/serviceCenter/services/serviceAgent/coreTools/_connectionTools.py +++ b/modules/serviceCenter/services/serviceAgent/coreTools/_connectionTools.py @@ -184,4 +184,5 @@ def _registerConnectionTools(registry: ToolRegistry, services): "required": ["connectionId", "to", "subject", "body"], }, readOnly=False, + displayLabel="composing an email", ) diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py index a3fbb3ed..79e53092 100644 --- a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py +++ b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py @@ -297,6 +297,7 @@ def _registerMediaTools(registry: ToolRegistry, services): }, }, readOnly=False, + displayLabel="creating a document", ) # ── textToSpeech tool ────────────────────────────────────────────── @@ -573,6 +574,7 @@ def _registerMediaTools(registry: ToolRegistry, services): "required": ["prompt"], }, readOnly=False, + displayLabel="generating an image", ) # ── createChart tool ───────────────────────────────────────────────── @@ -770,6 +772,7 @@ def _registerMediaTools(registry: ToolRegistry, services): "required": ["datasets"], }, readOnly=False, + displayLabel="creating a chart", ) # ── Phase 3: speechToText, detectLanguage, neutralizeData, executeCode ── @@ -917,5 +920,6 @@ def _registerMediaTools(registry: ToolRegistry, services): }, "required": ["code"] }, - readOnly=True + readOnly=True, + displayLabel="running calculations", ) diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py index 2ffc808e..c6584735 100644 --- a/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py +++ b/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py @@ -359,7 +359,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services): }, "required": ["fileId"] }, - readOnly=True + readOnly=True, + displayLabel="reviewing a document", ) registry.register( @@ -406,7 +407,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services): "properties": {"query": {"type": "string", "description": "Search query"}}, "required": ["query"] }, - readOnly=True + readOnly=True, + displayLabel="researching on the web", ) registry.register( @@ -581,7 +583,8 @@ def _registerWorkspaceTools(registry: ToolRegistry, services): }, "required": ["url"] }, - readOnly=True + readOnly=True, + displayLabel="reading a webpage", ) registry.register( diff --git a/modules/serviceCenter/services/serviceAgent/datamodelAgent.py b/modules/serviceCenter/services/serviceAgent/datamodelAgent.py index 9428af49..889f31e8 100644 --- a/modules/serviceCenter/services/serviceAgent/datamodelAgent.py +++ b/modules/serviceCenter/services/serviceAgent/datamodelAgent.py @@ -41,6 +41,12 @@ class ToolDefinition(BaseModel): """Schema for a tool available to the agent.""" name: str = Field(description="Unique tool name") description: str = Field(description="What this tool does") + displayLabel: Optional[str] = Field( + default=None, + description="Short human-readable activity phrase (e.g. 'researching on the web'). " + "Used for live progress messages in meetings. English gerund phrase; " + "localised by the caller." + ) parameters: Dict[str, Any] = Field( default_factory=dict, description="JSON Schema for tool parameters" diff --git a/modules/serviceCenter/services/serviceAgent/toolRegistry.py b/modules/serviceCenter/services/serviceAgent/toolRegistry.py index b4b5cd86..b2ba67a0 100644 --- a/modules/serviceCenter/services/serviceAgent/toolRegistry.py +++ b/modules/serviceCenter/services/serviceAgent/toolRegistry.py @@ -23,7 +23,7 @@ class ToolRegistry: def register(self, name: str, handler: Callable[..., Awaitable[ToolResult]], description: str = "", parameters: Dict[str, Any] = None, readOnly: bool = False, featureType: str = None, - toolSet: str = None): + toolSet: str = None, displayLabel: str = None): """Register a tool with its handler function.""" if name in self._tools: logger.warning(f"Tool '{name}' already registered, overwriting") @@ -31,6 +31,7 @@ class ToolRegistry: self._tools[name] = ToolDefinition( name=name, description=description, + displayLabel=displayLabel, parameters=parameters or {}, readOnly=readOnly, featureType=featureType, diff --git a/modules/serviceCenter/services/serviceAi/mainServiceAi.py b/modules/serviceCenter/services/serviceAi/mainServiceAi.py index bcdb9552..d4e5ccdb 100644 --- a/modules/serviceCenter/services/serviceAi/mainServiceAi.py +++ b/modules/serviceCenter/services/serviceAi/mainServiceAi.py @@ -567,11 +567,14 @@ mit Web-Recherche, E-Mail-Versand, Dokumenten-Erzeugung und Datenquellen-Zugriff Setze "needsAgent": true und "agentReason": "" WENN die Aufgabe eines oder mehrere dieser Merkmale hat: -- Recherche im Internet noetig (z.B. "recherchier was im Internet ueber XY", "schau mal nach", "google das") -- E-Mail an Teilnehmer/Kontakte versenden -- Dokument (PDF, Word, Excel) generieren oder im SharePoint/Drive ablegen -- Mehrere Schritte oder Tool-Aufrufe noetig (Zusammenfassung + Versand, Recherche + Empfehlung etc.) -- Daten aus externen Quellen abrufen (Outlook-Kontakte, SharePoint-Dateien, Kalender etc.) +- Recherche im Internet oder aktuelle Informationen noetig +- Informationen beschaffen die du NICHT im Transkript oder in deinem Vorwissen hast +- E-Mail versenden +- Dokument generieren oder in einer Datenquelle ablegen +- Mehrere Schritte oder Tool-Aufrufe noetig +- Daten aus externen Quellen abrufen + +Wenn du den gewuenschten Inhalt nicht selbst liefern kannst, setze needsAgent=true. Wenn needsAgent=true: - Setze shouldRespond=false (der Agent uebernimmt; du sprichst NICHT eigenstaendig). diff --git a/modules/serviceCenter/services/serviceWeb/mainServiceWeb.py b/modules/serviceCenter/services/serviceWeb/mainServiceWeb.py index 4ffc15aa..c4e24947 100644 --- a/modules/serviceCenter/services/serviceWeb/mainServiceWeb.py +++ b/modules/serviceCenter/services/serviceWeb/mainServiceWeb.py @@ -98,7 +98,8 @@ class WebService: searchUrls = [] searchResultsWithContent = [] if needsSearch and (not allUrls or len(allUrls) < maxNumberPages): - self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content") + if operationId: + self._get_service("chat").progressLogUpdate(operationId, 0.3, "Searching for URLs and content") try: searchUrls, searchResultsWithContent = await self._performWebSearch( @@ -113,16 +114,14 @@ class WebService: searchUrls = [] searchResultsWithContent = [] - # Prioritize Tavily search URLs over AI-extracted URLs (they're more relevant) if searchUrls: - # Prepend Tavily URLs to the list (they're more relevant) allUrls = searchUrls + allUrls logger.info(f"Using {len(searchUrls)} Tavily URLs + {len(allUrls) - len(searchUrls)} other URLs = {len(allUrls)} total") else: - # If Tavily search failed, use AI-extracted URLs logger.warning("Tavily search returned no URLs, using AI-extracted URLs only") - self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs") + if operationId: + self._get_service("chat").progressLogUpdate(operationId, 0.5, f"Found {len(allUrls)} total URLs") # If we have search results (even without content), use them directly instead of crawling # Tavily search results are more relevant than generic AI-extracted URLs From 48c0f900af160716ffdecd9e3293b15ac289835a Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 12 May 2026 15:19:01 +0200 Subject: [PATCH 03/13] rag --- app.py | 3 + env-gateway-dev.env | 7 - env-gateway-int.env | 7 - env-gateway-prod-forgejo.env | 11 +- env-gateway-prod.env | 7 - modules/datamodels/datamodelDataSource.py | 12 +- modules/datamodels/datamodelUam.py | 8 +- modules/features/workspace/mainWorkspace.py | 7 - .../workspace/routeFeatureWorkspace.py | 45 --- modules/interfaces/interfaceDbKnowledge.py | 54 ++++ modules/routes/routeBilling.py | 25 +- modules/routes/routeDataConnections.py | 178 +++++++++++- modules/routes/routeDataFiles.py | 2 +- modules/routes/routeDataPrompts.py | 2 +- modules/routes/routeDataSources.py | 74 ++++- modules/routes/routeHelpers.py | 113 ++++++-- modules/routes/routeRagInventory.py | 267 ++++++++++++++++++ .../serviceBackgroundJobs/__init__.py | 6 + .../mainBackgroundJobService.py | 120 +++++++- .../subConnectorIngestConsumer.py | 79 +++++- .../serviceKnowledge/subConnectorPrefs.py | 26 +- .../subConnectorSyncClickup.py | 126 ++++++--- .../subConnectorSyncGdrive.py | 131 +++++---- .../serviceKnowledge/subConnectorSyncGmail.py | 97 +++++-- .../subConnectorSyncOutlook.py | 104 ++++--- .../subConnectorSyncSharepoint.py | 76 +++-- .../serviceKnowledge/subPolicyResolver.py | 78 +++++ modules/system/mainSystem.py | 8 + scripts/script_db_migrate_datasource_rag.py | 88 ++++++ tests/unit/services/test_bootstrap_clickup.py | 41 ++- tests/unit/services/test_bootstrap_gdrive.py | 19 +- tests/unit/services/test_bootstrap_outlook.py | 4 + 32 files changed, 1450 insertions(+), 375 deletions(-) create mode 100644 modules/routes/routeRagInventory.py create mode 100644 modules/serviceCenter/services/serviceKnowledge/subPolicyResolver.py create mode 100644 scripts/script_db_migrate_datasource_rag.py diff --git a/app.py b/app.py index f5adb3d7..73a64064 100644 --- a/app.py +++ b/app.py @@ -604,6 +604,9 @@ app.include_router(promptRouter) from modules.routes.routeDataConnections import router as connectionsRouter app.include_router(connectionsRouter) +from modules.routes.routeRagInventory import router as ragInventoryRouter +app.include_router(ragInventoryRouter) + from modules.routes.routeTableViews import router as tableViewsRouter app.include_router(tableViewsRouter) diff --git a/env-gateway-dev.env b/env-gateway-dev.env index 7802b33d..f4e7e244 100644 --- a/env-gateway-dev.env +++ b/env-gateway-dev.env @@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync -# Manadate Pre-Processing Servers -PREPROCESS_ALTHAUS_CHAT_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGbEphQ3ZUMlFMQ2EwSGpoSE9NNzRJNTJtaGk1N0RGakdIYnVVeVFHZmF5OXB3QTVWLVNaZk9wNkhfQkZWRnVwRGRxem9iRzJIWXdpX1NIN2FwSExfT3c9PQ== - -# Preprocessor API Configuration -PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990 -PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query - # Azure Communication Services Email Configuration MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss diff --git a/env-gateway-int.env b/env-gateway-int.env index a1924fff..0898a985 100644 --- a/env-gateway-int.env +++ b/env-gateway-int.env @@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync -# Manadate Pre-Processing Servers -PREPROCESS_ALTHAUS_CHAT_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4UkNBelhvckxCQUVjZm94N3BZUDcxaEMyckE2dm1lRVhqODhrWU1SUjNXZ3dQZlVJOWhveXFkZXpobW5xT0NneGZ2SkNUblFmYXd0WTBYNTl3UmRnSWc9PQ== - -# Preprocessor API Configuration -PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990 -PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query - # Azure Communication Services Email Configuration MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss diff --git a/env-gateway-prod-forgejo.env b/env-gateway-prod-forgejo.env index b9c9e686..80a175e6 100644 --- a/env-gateway-prod-forgejo.env +++ b/env-gateway-prod-forgejo.env @@ -41,10 +41,10 @@ Service_MSFT_DATA_REDIRECT_URI = https://api.poweron.swiss/api/msft/auth/connect Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kybjVVZ0FldUE1NTJiY2U1N0I0aVU0Z2hfeWlYc2tTdmlxTS1NdGxsRnFHdjZVcW5RRHZkUFhzUTVyX2RaZHlrQThRdTdCRmVBelBOcDlsbFQyd19SZExuWEM5aTcwQ0FvY3ctMUlWU1pndDE0MkdzeTZZRHkwLWU3aW56LW1jS20= -Service_GOOGLE_AUTH_REDIRECT_URI = +Service_GOOGLE_AUTH_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/login/callback Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyMnFma3VPOVJtTFFrNDRLN0NkWHY2dUZDWlJzdDVMd3p3N19IY0tWdURRRzExOGZCMjJOYmpKT1E0cTVwYlgtcVJINTY0anZPc1VoTW00cHl6NVh3ZHVTek1oT1RqWUhtamRkZ1dENWlwNTlZSU1oNWczeGdEOC1Gbk5XU2RBcmI= -Service_GOOGLE_DATA_REDIRECT_URI = +Service_GOOGLE_DATA_REDIRECT_URI = https://api.poweron.swiss/api/google/auth/connect/callback # ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4 @@ -86,13 +86,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync -# Manadate Pre-Processing Servers -PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo= - -# Preprocessor API Configuration -PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990 -PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query - # Azure Communication Services Email Configuration MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss diff --git a/env-gateway-prod.env b/env-gateway-prod.env index d42bb0f9..6f4cfab0 100644 --- a/env-gateway-prod.env +++ b/env-gateway-prod.env @@ -87,13 +87,6 @@ APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync -# Manadate Pre-Processing Servers -PREPROCESS_ALTHAUS_CHAT_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4RVRmYW5IelNIbklTUDZIMEoycEN4ZFF0YUJoWWlUTUh2M0dhSXpYRXcwVkRGd1VieDNsYkdCRlpxMUR5Rjk1RDhPRkE5bmVtc2VDMURfLW9QNkxMVHN0M1JhbU9sa3JHWmdDZnlHS3BQRVBGTERVMHhXOVdDOWVqNkhfSUQyOHo= - -# Preprocessor API Configuration -PP_QUERY_API_KEY=ouho02j0rj2oijroi3rj2oijro23jr0990 -PP_QUERY_BASE_URL=https://poweron-althaus-preprocess-prod-e3fegaatc7faency.switzerlandnorth-01.azurewebsites.net/api/v1/dataquery/query - # Azure Communication Services Email Configuration MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss diff --git a/modules/datamodels/datamodelDataSource.py b/modules/datamodels/datamodelDataSource.py index d9e40bde..fe3f0442 100644 --- a/modules/datamodels/datamodelDataSource.py +++ b/modules/datamodels/datamodelDataSource.py @@ -62,15 +62,15 @@ class DataSource(PowerOnModel): description="Owner user ID", json_schema_extra={"label": "Benutzer-ID", "fk_target": {"db": "poweron_app", "table": "UserInDB", "labelField": "username"}}, ) - autoSync: bool = Field( + ragIndexEnabled: bool = Field( default=False, - description="Automatically sync on schedule", - json_schema_extra={"label": "Auto-Sync"}, + description="When true this tree element is indexed into the RAG knowledge store", + json_schema_extra={"label": "Im RAG indexieren", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False}, ) - lastSynced: Optional[float] = Field( + lastIndexed: Optional[float] = Field( default=None, - description="Last sync timestamp", - json_schema_extra={"label": "Letzter Sync", "frontend_type": "timestamp"}, + description="Timestamp of last successful RAG indexing run", + json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"}, ) scope: str = Field( default="personal", diff --git a/modules/datamodels/datamodelUam.py b/modules/datamodels/datamodelUam.py index 6aba24eb..f6cbd8fa 100644 --- a/modules/datamodels/datamodelUam.py +++ b/modules/datamodels/datamodelUam.py @@ -484,10 +484,10 @@ class UserConnection(PowerOnModel): default=None, description=( "Per-connection knowledge ingestion preferences. schemaVersion=1 keys: " - "neutralizeBeforeEmbed (bool), mailContentDepth (metadata|snippet|full), " - "mailIndexAttachments (bool), filesIndexBinaries (bool), mimeAllowlist (list[str]), " - "clickupScope (titles|title_description|with_comments), " - "surfaceToggles (dict per authority), maxAgeDays (int)." + "mailContentDepth (metadata|snippet|full), mailIndexAttachments (bool), " + "filesIndexBinaries (bool), clickupScope (titles|title_description|with_comments), " + "clickupIndexAttachments (bool), maxAgeDays (int). " + "Neutralization is controlled per DataSource.neutralize (not here)." ), json_schema_extra={"frontend_type": "json", "frontend_readonly": False, "frontend_required": False, "label": "Wissenspräferenzen"}, ) diff --git a/modules/features/workspace/mainWorkspace.py b/modules/features/workspace/mainWorkspace.py index 24307b45..77f5b290 100644 --- a/modules/features/workspace/mainWorkspace.py +++ b/modules/features/workspace/mainWorkspace.py @@ -33,11 +33,6 @@ UI_OBJECTS = [ "label": t("Einstellungen", context="UI"), "meta": {"area": "settings"} }, - { - "objectKey": "ui.feature.workspace.rag-insights", - "label": t("Wissens-Insights", context="UI"), - "meta": {"area": "rag-insights"}, - }, ] RESOURCE_OBJECTS = [ @@ -86,7 +81,6 @@ TEMPLATE_ROLES = [ {"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True}, {"context": "UI", "item": "ui.feature.workspace.editor", "view": True}, {"context": "UI", "item": "ui.feature.workspace.settings", "view": True}, - {"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True}, {"context": "DATA", "item": None, "view": True, "read": "m", "create": "n", "update": "n", "delete": "n"}, ] }, @@ -97,7 +91,6 @@ TEMPLATE_ROLES = [ {"context": "UI", "item": "ui.feature.workspace.dashboard", "view": True}, {"context": "UI", "item": "ui.feature.workspace.editor", "view": True}, {"context": "UI", "item": "ui.feature.workspace.settings", "view": True}, - {"context": "UI", "item": "ui.feature.workspace.rag-insights", "view": True}, {"context": "RESOURCE", "item": "resource.feature.workspace.start", "view": True}, {"context": "RESOURCE", "item": "resource.feature.workspace.stop", "view": True}, {"context": "RESOURCE", "item": "resource.feature.workspace.files", "view": True}, diff --git a/modules/features/workspace/routeFeatureWorkspace.py b/modules/features/workspace/routeFeatureWorkspace.py index 9595fee4..4487e5fe 100644 --- a/modules/features/workspace/routeFeatureWorkspace.py +++ b/modules/features/workspace/routeFeatureWorkspace.py @@ -2192,49 +2192,4 @@ async def putWorkspaceUserSettings( # ========================================================================= # RAG / Knowledge — anonymised instance statistics (presentation / KPIs) -# ========================================================================= -def _collectWorkspaceFileIdsForStats(instanceId: str, mandateId: Optional[str]) -> List[str]: - """All FileItem ids for this feature instance (any user). Knowledge rows are often stored - without featureInstanceId; we correlate by file id from the Management DB.""" - from modules.datamodels.datamodelFiles import FileItem - from modules.interfaces.interfaceDbManagement import ComponentObjects - - co = ComponentObjects() - rows = co.db.getRecordset(FileItem, recordFilter={"featureInstanceId": instanceId}) - out: List[str] = [] - m = str(mandateId) if mandateId else "" - for r in rows or []: - rid = r.get("id") if isinstance(r, dict) else getattr(r, "id", None) - if not rid: - continue - if m: - mid = r.get("mandateId") if isinstance(r, dict) else getattr(r, "mandateId", "") or "" - if mid and mid != m: - continue - out.append(str(rid)) - return out - - -@router.get("/{instanceId}/rag-statistics") -@limiter.limit("60/minute") -async def getRagStatistics( - request: Request, - instanceId: str = Path(...), - days: int = Query(90, ge=7, le=365, description="Timeline window in days"), - context: RequestContext = Depends(getRequestContext), -): - """Aggregated, non-identifying knowledge-store metrics for this workspace instance.""" - mandateId, _instanceConfig = _validateInstanceAccess(instanceId, context) - workspaceFileIds = _collectWorkspaceFileIdsForStats(instanceId, mandateId) - kdb = getKnowledgeInterface(context.user) - stats = kdb.getRagStatisticsForInstance( - featureInstanceId=instanceId, - mandateId=str(mandateId) if mandateId else "", - timelineDays=days, - workspaceFileIds=workspaceFileIds, - ) - if isinstance(stats, dict): - stats.setdefault("scope", {}) - stats["scope"]["workspaceFileIdsResolved"] = len(workspaceFileIds) - return JSONResponse(stats) diff --git a/modules/interfaces/interfaceDbKnowledge.py b/modules/interfaces/interfaceDbKnowledge.py index c2f79b67..31a5af61 100644 --- a/modules/interfaces/interfaceDbKnowledge.py +++ b/modules/interfaces/interfaceDbKnowledge.py @@ -133,6 +133,60 @@ class KnowledgeObjects: return {"indexRows": indexCount, "chunks": chunkCount} + def deleteFileContentIndexByDataSource(self, dataSourceId: str) -> Dict[str, int]: + """Delete all FileContentIndex rows whose provenance.dataSourceId matches. + + Used when a user disables ragIndexEnabled on a DataSource to purge + only those chunks that were ingested from that specific tree element. + """ + if not dataSourceId: + return {"indexRows": 0, "chunks": 0} + + allRows = self.db.getRecordset(FileContentIndex) + matchedRows = [] + for row in allRows: + prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None) + if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId: + matchedRows.append(row) + + mandateIds: set = set() + chunkCount = 0 + indexCount = 0 + for row in matchedRows: + fid = row.get("id") if isinstance(row, dict) else getattr(row, "id", None) + mid = row.get("mandateId") if isinstance(row, dict) else getattr(row, "mandateId", "") + if not fid: + continue + chunks = self.db.getRecordset(ContentChunk, recordFilter={"fileId": fid}) + for chunk in chunks: + if self.db.recordDelete(ContentChunk, chunk["id"]): + chunkCount += 1 + if self.db.recordDelete(FileContentIndex, fid): + indexCount += 1 + if mid: + mandateIds.add(str(mid)) + + for mid in mandateIds: + try: + from modules.interfaces.interfaceDbBilling import _getRootInterface + _getRootInterface().reconcileMandateStorageBilling(mid) + except Exception as ex: + logger.warning("reconcileMandateStorageBilling after datasource purge failed: %s", ex) + + return {"indexRows": indexCount, "chunks": chunkCount} + + def listFileContentIndexByDataSource(self, dataSourceId: str) -> List[Dict[str, Any]]: + """List all FileContentIndex rows whose provenance.dataSourceId matches.""" + if not dataSourceId: + return [] + allRows = self.db.getRecordset(FileContentIndex) + out = [] + for row in allRows: + prov = row.get("provenance") if isinstance(row, dict) else getattr(row, "provenance", None) + if isinstance(prov, dict) and prov.get("dataSourceId") == dataSourceId: + out.append(dict(row) if not isinstance(row, dict) else row) + return out + def deleteFileContentIndex(self, fileId: str) -> bool: """Delete a FileContentIndex and all associated ContentChunks.""" existing = self.getFileContentIndex(fileId) diff --git a/modules/routes/routeBilling.py b/modules/routes/routeBilling.py index b7fcdeca..04251e09 100644 --- a/modules/routes/routeBilling.py +++ b/modules/routes/routeBilling.py @@ -1986,10 +1986,10 @@ def getUserViewTransactions( if not pagination: raise HTTPException(status_code=400, detail="pagination required for groupSummary") import json as _json - from collections import defaultdict from modules.interfaces.interfaceDbApp import getInterface as getAppInterface from modules.routes.routeHelpers import ( applyViewToParams, + build_group_summary_groups, effective_group_by_levels, resolveView, ) @@ -2018,28 +2018,7 @@ def getUserViewTransactions( summary_params, ctx.user, ) - counts: Dict[str, int] = defaultdict(int) - labels: Dict[str, str] = {} - null_key = "\x00NULL" - for item in all_rows: - raw = item.get(field) - if raw is None or raw == "": - nk = null_key - labels[nk] = null_label - else: - nk = str(raw) - if nk not in labels: - labels[nk] = nk - counts[nk] += 1 - groups_out: List[Dict[str, Any]] = [] - for nk in sorted(counts.keys(), key=lambda x: (x == null_key, labels.get(x, x).lower())): - groups_out.append( - { - "value": None if nk == null_key else nk, - "label": labels.get(nk, nk), - "totalCount": counts[nk], - } - ) + groups_out = build_group_summary_groups(all_rows, field, null_label, groupByLevels=levels) return JSONResponse(content={"groups": groups_out}) paginationParams = None diff --git a/modules/routes/routeDataConnections.py b/modules/routes/routeDataConnections.py index 58d36b91..04f652fb 100644 --- a/modules/routes/routeDataConnections.py +++ b/modules/routes/routeDataConnections.py @@ -130,7 +130,7 @@ def get_auth_authority_options( # ============================================================================ @router.get("/") -@limiter.limit("30/minute") +@limiter.limit("60/minute") async def get_connections( request: Request, pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object"), @@ -197,7 +197,9 @@ async def get_connections( "lastChecked": connection.lastChecked, "expiresAt": connection.expiresAt, "tokenStatus": tokenStatus, - "tokenExpiresAt": tokenExpiresAt + "tokenExpiresAt": tokenExpiresAt, + "knowledgeIngestionEnabled": getattr(connection, "knowledgeIngestionEnabled", False), + "knowledgePreferences": getattr(connection, "knowledgePreferences", None) or {}, }) return items @@ -264,7 +266,7 @@ async def get_connections( }) enrichRowsWithFkLabels(enhanced_connections_dict, UserConnection) filtered = apply_strategy_b_filters_and_sort(enhanced_connections_dict, paginationParams, currentUser) - groups_out = build_group_summary_groups(filtered, field, null_label) + groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels) return JSONResponse(content={"groups": groups_out}) try: @@ -724,4 +726,172 @@ def delete_connection( raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to delete connection: {str(e)}" - ) \ No newline at end of file + ) + + +# ========================================================================= +# Knowledge Consent & Control Endpoints +# ========================================================================= + + +def _findOwnConnection(interface, userId: str, connectionId: str): + """Find a connection owned by the user. Returns None if not found.""" + connections = interface.getUserConnections(userId) + for conn in connections: + if conn.id == connectionId: + return conn + return None + + +@router.patch("/{connectionId}/knowledge-consent") +@limiter.limit("10/minute") +def _updateKnowledgeConsent( + request: Request, + connectionId: str = Path(..., description="Connection ID"), + enabled: bool = Body(..., embed=True), + currentUser: User = Depends(getCurrentUser), +) -> Dict[str, Any]: + """Master switch: can PowerOn ingest data from this connection into the RAG knowledge store? + + enabled=False: purge ALL chunks for this connection + cancel running jobs. + enabled=True: set flag; enqueue bootstrap only if rag-enabled DataSources exist. + """ + try: + interface = getInterface(currentUser) + connection = _findOwnConnection(interface, currentUser.id, connectionId) + if not connection: + raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found")) + + from modules.interfaces.interfaceDbApp import getRootInterface + rootIf = getRootInterface() + rootIf.db.recordModify(UserConnection, connectionId, {"knowledgeIngestionEnabled": enabled}) + + purged = None + cancelled = 0 + bootstrapEnqueued = False + + if not enabled: + from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface + purged = getKnowledgeInterface(None).deleteFileContentIndexByConnectionId(connectionId) + + from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection + cancelled = cancelJobsByConnection(connectionId) + else: + from modules.datamodels.datamodelDataSource import DataSource + dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId, "ragIndexEnabled": True}) + if dataSources: + import asyncio + from modules.serviceCenter.services.serviceBackgroundJobs import startJob + authority = connection.authority.value if hasattr(connection.authority, "value") else str(connection.authority or "") + + async def _enqueue(): + await startJob( + "connection.bootstrap", + {"connectionId": connectionId, "authority": authority.lower()}, + triggeredBy=str(currentUser.id), + ) + try: + loop = asyncio.get_event_loop() + if loop.is_running(): + loop.create_task(_enqueue()) + else: + loop.run_until_complete(_enqueue()) + except RuntimeError: + asyncio.run(_enqueue()) + bootstrapEnqueued = True + + import json as _json + from modules.shared.auditLogger import audit_logger + from modules.datamodels.datamodelAudit import AuditCategory + audit_logger.logEvent( + userId=str(currentUser.id), + mandateId=str(getattr(connection, "mandateId", "") or ""), + category=AuditCategory.PERMISSION.value, + action="knowledge_consent_changed", + details=_json.dumps({"connectionId": connectionId, "enabled": enabled}), + ) + + logger.info("Knowledge consent %s for connection %s by user %s", + "enabled" if enabled else "disabled", connectionId, currentUser.id) + return { + "connectionId": connectionId, + "knowledgeIngestionEnabled": enabled, + "purged": purged, + "cancelledJobs": cancelled, + "bootstrapEnqueued": bootstrapEnqueued, + } + except HTTPException: + raise + except Exception as e: + logger.error("Error updating knowledge consent: %s", e, exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +@router.patch("/{connectionId}/knowledge-preferences") +@limiter.limit("20/minute") +def _updateKnowledgePreferences( + request: Request, + connectionId: str = Path(..., description="Connection ID"), + preferences: Dict[str, Any] = Body(..., embed=True), + currentUser: User = Depends(getCurrentUser), +) -> Dict[str, Any]: + """Update per-connection knowledge ingestion preferences (mail depth, attachments, etc.).""" + _ALLOWED_KEYS = {"mailContentDepth", "mailIndexAttachments", "filesIndexBinaries", + "clickupScope", "clickupIndexAttachments", "maxAgeDays"} + try: + interface = getInterface(currentUser) + connection = _findOwnConnection(interface, currentUser.id, connectionId) + if not connection: + raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found")) + + existing = getattr(connection, "knowledgePreferences", None) or {} + cleaned = {k: v for k, v in preferences.items() if k in _ALLOWED_KEYS} + merged = {**existing, **cleaned, "schemaVersion": 1} + + from modules.interfaces.interfaceDbApp import getRootInterface + getRootInterface().db.recordModify(UserConnection, connectionId, {"knowledgePreferences": merged}) + + logger.info("Knowledge preferences updated for connection %s", connectionId) + return {"connectionId": connectionId, "knowledgePreferences": merged, "updated": True} + except HTTPException: + raise + except Exception as e: + logger.error("Error updating knowledge preferences: %s", e, exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/{connectionId}/knowledge-stop") +@limiter.limit("10/minute") +def _stopKnowledgeJobs( + request: Request, + connectionId: str = Path(..., description="Connection ID"), + currentUser: User = Depends(getCurrentUser), +) -> Dict[str, Any]: + """Cancel all running/pending bootstrap jobs for this connection.""" + try: + interface = getInterface(currentUser) + connection = _findOwnConnection(interface, currentUser.id, connectionId) + if not connection: + raise HTTPException(status_code=404, detail=routeApiMsg("Connection not found")) + + from modules.serviceCenter.services.serviceBackgroundJobs import cancelJobsByConnection + cancelled = cancelJobsByConnection(connectionId) + + import json as _json + from modules.shared.auditLogger import audit_logger + from modules.datamodels.datamodelAudit import AuditCategory + audit_logger.logEvent( + userId=str(currentUser.id), + mandateId=str(getattr(connection, "mandateId", "") or ""), + category=AuditCategory.PERMISSION.value, + action="knowledge_jobs_stopped", + details=_json.dumps({"connectionId": connectionId, "cancelledCount": cancelled}), + ) + + logger.info("Stopped %d knowledge jobs for connection %s", cancelled, connectionId) + return {"connectionId": connectionId, "cancelled": cancelled} + except HTTPException: + raise + except Exception as e: + logger.error("Error stopping knowledge jobs: %s", e, exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/modules/routes/routeDataFiles.py b/modules/routes/routeDataFiles.py index 244b77b0..3a951f3e 100644 --- a/modules/routes/routeDataFiles.py +++ b/modules/routes/routeDataFiles.py @@ -543,7 +543,7 @@ def get_files( FileItem, ) filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser) - groups_out = build_group_summary_groups(filtered, field, null_label) + groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels) return JSONResponse(content={"groups": groups_out}) if mode == "filterValues": diff --git a/modules/routes/routeDataPrompts.py b/modules/routes/routeDataPrompts.py index c410d26a..331267b5 100644 --- a/modules/routes/routeDataPrompts.py +++ b/modules/routes/routeDataPrompts.py @@ -100,7 +100,7 @@ def get_prompts( result if isinstance(result, list) else (result.items if hasattr(result, "items") else []) ) filtered = apply_strategy_b_filters_and_sort(allItems, paginationParams, currentUser) - groups_out = build_group_summary_groups(filtered, field, null_label) + groups_out = build_group_summary_groups(filtered, field, null_label, groupByLevels=groupByLevels) return JSONResponse(content={"groups": groups_out}) if mode == "filterValues": diff --git a/modules/routes/routeDataSources.py b/modules/routes/routeDataSources.py index 5df8a18b..f7e5425d 100644 --- a/modules/routes/routeDataSources.py +++ b/modules/routes/routeDataSources.py @@ -1,6 +1,6 @@ # Copyright (c) 2025 Patrick Motsch # All rights reserved. -"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize tagging.""" +"""PATCH endpoints for DataSource and FeatureDataSource scope/neutralize/rag-index tagging.""" import logging from typing import Any, Dict, List, Optional @@ -125,3 +125,75 @@ def _updateNeutralizeFields( except Exception as e: logger.error("Error updating neutralizeFields: %s", e) raise HTTPException(status_code=500, detail=str(e)) + + +@router.patch("/{sourceId}/rag-index") +@limiter.limit("30/minute") +def _updateDataSourceRagIndex( + request: Request, + sourceId: str = Path(..., description="ID of the DataSource"), + ragIndexEnabled: bool = Body(..., embed=True), + context: RequestContext = Depends(getRequestContext), +) -> Dict[str, Any]: + """Toggle RAG indexing for a DataSource. + + true: sets flag + enqueues mini-bootstrap for this DataSource only. + false: sets flag + synchronously purges all chunks from this DataSource. + """ + try: + from modules.interfaces.interfaceDbApp import getRootInterface + rootIf = getRootInterface() + rec = rootIf.db.getRecord(DataSource, sourceId) + if not rec: + raise HTTPException(status_code=404, detail=f"DataSource {sourceId} not found") + + rootIf.db.recordModify(DataSource, sourceId, {"ragIndexEnabled": ragIndexEnabled}) + logger.info("Updated ragIndexEnabled=%s for DataSource %s", ragIndexEnabled, sourceId) + + if ragIndexEnabled: + from modules.serviceCenter.services.serviceBackgroundJobs import startJob + import asyncio + + connectionId = rec.get("connectionId") or rec.get("connection_id") or "" + conn = rootIf.getUserConnectionById(connectionId) if connectionId else None + authority = "" + if conn: + authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "") + + async def _enqueue(): + await startJob( + "connection.bootstrap", + {"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": [sourceId]}, + triggeredBy=str(context.user.id), + ) + try: + loop = asyncio.get_event_loop() + if loop.is_running(): + loop.create_task(_enqueue()) + else: + loop.run_until_complete(_enqueue()) + except RuntimeError: + asyncio.run(_enqueue()) + else: + from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface + purgeResult = getKnowledgeInterface(None).deleteFileContentIndexByDataSource(sourceId) + logger.info("Purged %d index rows / %d chunks for DataSource %s", + purgeResult.get("indexRows", 0), purgeResult.get("chunks", 0), sourceId) + + import json + from modules.shared.auditLogger import audit_logger + from modules.datamodels.datamodelAudit import AuditCategory + audit_logger.logEvent( + userId=str(context.user.id), + mandateId=context.mandateId, + category=AuditCategory.PERMISSION.value, + action="rag_index_toggled", + details=json.dumps({"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled}), + ) + + return {"sourceId": sourceId, "ragIndexEnabled": ragIndexEnabled, "updated": True} + except HTTPException: + raise + except Exception as e: + logger.error("Error updating datasource ragIndexEnabled: %s", e) + raise HTTPException(status_code=500, detail=str(e)) diff --git a/modules/routes/routeHelpers.py b/modules/routes/routeHelpers.py index f5af7d06..f1d88e31 100644 --- a/modules/routes/routeHelpers.py +++ b/modules/routes/routeHelpers.py @@ -825,45 +825,106 @@ def build_group_summary_groups( items: List[Dict[str, Any]], field: str, null_label: str = "—", + groupByLevels: List[Dict[str, Any]] | None = None, ) -> List[Dict[str, Any]]: """ - Build {"value", "label", "totalCount"} for mode=groupSummary (single grouping level). + Build {"value", "label", "totalCount"} summaries for mode=groupSummary. + + When *groupByLevels* contains more than one level the function produces one + entry per unique combination of all level values (flat permutations). + ``value`` becomes a ``///``-joined composite key and ``label`` the ``/``-joined + human-readable label so the frontend can split them back. """ from collections import defaultdict - counts: Dict[str, int] = defaultdict(int) - display_by_key: Dict[str, str] = {} - null_key = "\x00NULL" - label_attr = f"{field}Label" + fields: list[dict] = [] + if groupByLevels and len(groupByLevels) > 1: + for lvl in groupByLevels: + f = lvl.get("field", "") + nl = str(lvl.get("nullLabel") or null_label) + if f: + fields.append({"field": f, "nullLabel": nl}) + if not fields: + fields = [{"field": field, "nullLabel": null_label}] + nullKey = "\x00NULL" + + if len(fields) == 1: + f = fields[0]["field"] + nl = fields[0]["nullLabel"] + counts: Dict[str, int] = defaultdict(int) + displayByKey: Dict[str, str] = {} + labelAttr = f"{f}Label" + for item in items: + raw = item.get(f) + if raw is None or raw == "": + nk = nullKey + display = nl + else: + nk = str(raw) + display = None + lbl = item.get(labelAttr) + if lbl is not None and lbl != "": + display = str(lbl) + if display is None: + display = nk + counts[nk] += 1 + if nk not in displayByKey: + displayByKey[nk] = display + orderedKeys = sorted( + counts.keys(), + key=lambda x: (x == nullKey, str(displayByKey.get(x, x)).lower()), + ) + return [ + { + "value": None if nk == nullKey else nk, + "label": displayByKey.get(nk, nk), + "totalCount": counts[nk], + } + for nk in orderedKeys + ] + + counts = defaultdict(int) + displayByComposite: Dict[str, list] = {} + filtersByComposite: Dict[str, dict] = {} for item in items: - raw = item.get(field) - if raw is None or raw == "": - nk = null_key - display = null_label - else: - nk = str(raw) - display = None - lbl = item.get(label_attr) - if lbl is not None and lbl != "": - display = str(lbl) - if display is None: - display = nk - counts[nk] += 1 - if nk not in display_by_key: - display_by_key[nk] = display + parts: list[str] = [] + labels: list[str] = [] + filterMap: dict = {} + for fd in fields: + f = fd["field"] + nl = fd["nullLabel"] + labelAttr = f"{f}Label" + raw = item.get(f) + if raw is None or raw == "": + parts.append(nullKey) + labels.append(nl) + filterMap[f] = None + else: + parts.append(str(raw)) + lbl = item.get(labelAttr) + labels.append(str(lbl) if lbl not in (None, "") else str(raw)) + filterMap[f] = str(raw) + compositeKey = "///".join(parts) + counts[compositeKey] += 1 + if compositeKey not in displayByComposite: + displayByComposite[compositeKey] = labels + filtersByComposite[compositeKey] = filterMap - ordered_keys = sorted( + orderedKeys = sorted( counts.keys(), - key=lambda x: (x == null_key, str(display_by_key.get(x, x)).lower()), + key=lambda x: tuple( + (seg == nullKey, seg.lower()) for seg in x.split("///") + ), ) return [ { - "value": None if nk == null_key else nk, - "label": display_by_key.get(nk, nk), - "totalCount": counts[nk], + "value": ck.replace(nullKey, "__null__") if nullKey in ck else ck, + "label": " / ".join(displayByComposite[ck]), + "totalCount": counts[ck], + "filters": filtersByComposite[ck], } - for nk in ordered_keys + for ck in orderedKeys ] diff --git a/modules/routes/routeRagInventory.py b/modules/routes/routeRagInventory.py new file mode 100644 index 00000000..08d2a245 --- /dev/null +++ b/modules/routes/routeRagInventory.py @@ -0,0 +1,267 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +"""RAG Inventory API — global knowledge-store visibility for users, admins, platform.""" + +import logging +from typing import Any, Dict, List, Optional + +from fastapi import APIRouter, HTTPException, Depends, Request +from modules.auth import limiter, getCurrentUser, getRequestContext, RequestContext +from modules.datamodels.datamodelUam import User +from modules.shared.i18nRegistry import apiRouteContext + +routeApiMsg = apiRouteContext("routeRagInventory") +logger = logging.getLogger(__name__) + +router = APIRouter( + prefix="/api/rag/inventory", + tags=["RAG Inventory"], + responses={ + 401: {"description": "Unauthorized"}, + 403: {"description": "Forbidden"}, + 500: {"description": "Internal server error"}, + }, +) + + +def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> List[Dict[str, Any]]: + from modules.datamodels.datamodelDataSource import DataSource + from modules.datamodels.datamodelKnowledge import FileContentIndex + + out = [] + for conn in connections: + connectionId = str(conn.id) + dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId}) + + connIndexRows = knowledgeIf.db.getRecordset(FileContentIndex, recordFilter={"connectionId": connectionId}) + connChunkTotal = len(connIndexRows) + + dsItems = [] + for ds in dataSources: + dsId = ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "") + dsItems.append({ + "id": dsId, + "label": ds.get("label") if isinstance(ds, dict) else getattr(ds, "label", ""), + "path": ds.get("path") if isinstance(ds, dict) else getattr(ds, "path", ""), + "sourceType": ds.get("sourceType") if isinstance(ds, dict) else getattr(ds, "sourceType", ""), + "ragIndexEnabled": ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False), + "neutralize": ds.get("neutralize") if isinstance(ds, dict) else getattr(ds, "neutralize", False), + "lastIndexed": ds.get("lastIndexed") if isinstance(ds, dict) else getattr(ds, "lastIndexed", None), + "chunkCount": 0, + }) + + if dsItems and connChunkTotal > 0 and len(dsItems) == 1: + dsItems[0]["chunkCount"] = connChunkTotal + + jobs = jobService.listJobs(jobType="connection.bootstrap", limit=5) + connJobs = [j for j in jobs if (j.get("payload") or {}).get("connectionId") == connectionId] + runningJobs = [ + {"jobId": j["id"], "progress": j.get("progress", 0), "progressMessage": j.get("progressMessage", "")} + for j in connJobs + if j.get("status") in ("PENDING", "RUNNING") + ] + lastError = None + for j in connJobs: + if j.get("status") == "ERROR": + lastError = {"jobId": j["id"], "errorMessage": j.get("errorMessage", "")} + break + + out.append({ + "id": connectionId, + "authority": conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority), + "externalEmail": getattr(conn, "externalEmail", ""), + "knowledgeIngestionEnabled": getattr(conn, "knowledgeIngestionEnabled", False), + "preferences": getattr(conn, "knowledgePreferences", None) or {}, + "dataSources": dsItems, + "totalChunks": connChunkTotal, + "runningJobs": runningJobs, + "lastError": lastError, + }) + return out + + +@router.get("/me") +@limiter.limit("30/minute") +def _getInventoryMe( + request: Request, + currentUser: User = Depends(getCurrentUser), +) -> Dict[str, Any]: + """Personal RAG inventory: own connections + DataSources + chunk counts.""" + try: + from modules.interfaces.interfaceDbApp import getRootInterface + from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface + from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService + + rootIf = getRootInterface() + knowledgeIf = getKnowledgeInterface(None) + connections = rootIf.getUserConnections(currentUser.id) + + items = _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) + totalChunks = sum(c.get("totalChunks", 0) for c in items) + + return {"connections": items, "totals": {"chunks": totalChunks}} + except Exception as e: + logger.error("Error in RAG inventory /me: %s", e, exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/mandate") +@limiter.limit("20/minute") +def _getInventoryMandate( + request: Request, + context: RequestContext = Depends(getRequestContext), +) -> Dict[str, Any]: + """Mandate-level RAG aggregation (requires mandate membership).""" + if not context.mandateId: + raise HTTPException(status_code=403, detail=routeApiMsg("Mandate context required")) + try: + from modules.interfaces.interfaceDbApp import getRootInterface + from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface, aggregateMandateRagTotalBytes + from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService + + rootIf = getRootInterface() + knowledgeIf = getKnowledgeInterface(None) + mandateId = str(context.mandateId) if context.mandateId else "" + + from modules.datamodels.datamodelUam import UserConnection + allConnections = rootIf.db.getRecordset(UserConnection, recordFilter={"mandateId": mandateId}) + connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections] + + items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService) + totalChunks = sum(c.get("totalChunks", 0) for c in items) + totalBytes = aggregateMandateRagTotalBytes(mandateId) + + return {"connections": items, "totals": {"chunks": totalChunks, "bytes": totalBytes}} + except HTTPException: + raise + except Exception as e: + logger.error("Error in RAG inventory /mandate: %s", e, exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/platform") +@limiter.limit("10/minute") +def _getInventoryPlatform( + request: Request, + context: RequestContext = Depends(getRequestContext), +) -> Dict[str, Any]: + """Platform-wide RAG statistics (sysadmin only).""" + if not context.isSysAdmin: + raise HTTPException(status_code=403, detail=routeApiMsg("Platform admin required")) + try: + from modules.interfaces.interfaceDbApp import getRootInterface + from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface + from modules.serviceCenter.services.serviceBackgroundJobs import mainBackgroundJobService as jobService + from modules.datamodels.datamodelUam import UserConnection + + rootIf = getRootInterface() + knowledgeIf = getKnowledgeInterface(None) + allConnections = rootIf.db.getRecordset(UserConnection) + connectionObjects = [type("C", (), row)() if isinstance(row, dict) else row for row in allConnections] + + items = _buildConnectionInventory(connectionObjects, rootIf, knowledgeIf, jobService) + totalChunks = sum(c.get("totalChunks", 0) for c in items) + + return {"connections": items, "totals": {"chunks": totalChunks}} + except HTTPException: + raise + except Exception as e: + logger.error("Error in RAG inventory /platform: %s", e, exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/reindex/{connectionId}") +@limiter.limit("10/minute") +def _reindexConnection( + request: Request, + connectionId: str, + currentUser: User = Depends(getCurrentUser), +) -> Dict[str, Any]: + """Re-trigger bootstrap for a connection (re-index all ragIndexEnabled DataSources). + + Submits a new connection.bootstrap job, regardless of previous failures. + """ + try: + from modules.interfaces.interfaceDbApp import getRootInterface + from modules.serviceCenter.services.serviceBackgroundJobs import startJob + from modules.datamodels.datamodelDataSource import DataSource + import asyncio + + rootIf = getRootInterface() + conn = rootIf.getUserConnectionById(connectionId) + if conn is None: + raise HTTPException(status_code=404, detail="Connection not found") + + if str(conn.userId) != str(currentUser.id): + raise HTTPException(status_code=403, detail="Not your connection") + + dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId}) + ragDs = [ds for ds in dataSources if (ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False))] + if not ragDs: + return {"status": "skipped", "reason": "no_rag_enabled_datasources"} + + authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "") + dsIds = [(ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")) for ds in ragDs] + + async def _enqueue(): + return await startJob( + "connection.bootstrap", + {"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": dsIds}, + triggeredBy=str(currentUser.id), + ) + try: + loop = asyncio.get_event_loop() + if loop.is_running(): + future = asyncio.ensure_future(_enqueue()) + jobId = None + else: + jobId = loop.run_until_complete(_enqueue()) + except RuntimeError: + jobId = asyncio.run(_enqueue()) + + logger.info("Reindex triggered for connection %s (%d DataSources)", connectionId, len(dsIds)) + return {"status": "queued", "connectionId": connectionId, "dataSourceCount": len(dsIds), "jobId": jobId} + except HTTPException: + raise + except Exception as e: + logger.error("Error triggering reindex: %s", e, exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/jobs") +@limiter.limit("60/minute") +def _getActiveJobs( + request: Request, + currentUser: User = Depends(getCurrentUser), +) -> List[Dict[str, Any]]: + """Active RAG jobs for the current user (used by header badge).""" + try: + from modules.serviceCenter.services.serviceBackgroundJobs import listJobs + from modules.interfaces.interfaceDbApp import getRootInterface + + rootIf = getRootInterface() + connections = rootIf.getUserConnections(currentUser.id) + connectionMap = {str(c.id): c for c in connections} + connectionIds = set(connectionMap.keys()) + + jobs = listJobs(jobType="connection.bootstrap", limit=50) + active = [] + for j in jobs: + if j.get("status") not in ("PENDING", "RUNNING"): + continue + payload = j.get("payload") or {} + connId = payload.get("connectionId") + if connId in connectionIds: + conn = connectionMap[connId] + active.append({ + "jobId": j["id"], + "connectionId": connId, + "connectionLabel": getattr(conn, "displayLabel", None) or getattr(conn, "authority", connId), + "jobType": j.get("jobType", "connection.bootstrap"), + "progress": j.get("progress", 0), + "progressMessage": j.get("progressMessage", ""), + }) + return active + except Exception as e: + logger.error("Error in RAG inventory /jobs: %s", e, exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) diff --git a/modules/serviceCenter/services/serviceBackgroundJobs/__init__.py b/modules/serviceCenter/services/serviceBackgroundJobs/__init__.py index e9d4c94c..ce67dc4a 100644 --- a/modules/serviceCenter/services/serviceBackgroundJobs/__init__.py +++ b/modules/serviceCenter/services/serviceBackgroundJobs/__init__.py @@ -7,6 +7,9 @@ from .mainBackgroundJobService import ( startJob, getJobStatus, listJobs, + cancelJob, + cancelJobsByConnection, + isTerminalStatus, JobProgressCallback, ) @@ -15,5 +18,8 @@ __all__ = [ "startJob", "getJobStatus", "listJobs", + "cancelJob", + "cancelJobsByConnection", + "isTerminalStatus", "JobProgressCallback", ] diff --git a/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py b/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py index b8a55e28..66ca4708 100644 --- a/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py +++ b/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py @@ -30,6 +30,7 @@ clear message. No silent zombies. import asyncio import logging +import time from datetime import datetime, timezone from typing import Any, Awaitable, Callable, Dict, List, Optional @@ -49,7 +50,46 @@ JOBS_DATABASE = APP_CONFIG.get("DB_DATABASE", "poweron_app") registerDatabase(JOBS_DATABASE) -JobProgressCallback = Callable[[int, Optional[str]], None] +_CANCEL_CHECK_INTERVAL_S = 3.0 + + +class JobProgressCallback: + """Callable progress reporter with cooperative cancel-check for long-running walkers.""" + + def __init__(self, jobId: str): + self._jobId = jobId + self._cancelledCache: Optional[bool] = None + self._lastCheckedAt: float = 0.0 + + def __call__(self, progress: int, message: Optional[str] = None) -> None: + try: + clamped = max(0, min(100, int(progress))) + fields: Dict[str, Any] = {"progress": clamped} + if message is not None: + fields["progressMessage"] = message[:500] + _updateJob(self._jobId, fields) + except Exception as ex: + logger.warning("Progress update failed for job %s: %s", self._jobId, ex) + + def isCancelled(self) -> bool: + """Check if this job was cancelled. Reads DB at most every 3s to limit load.""" + now = time.time() + if self._cancelledCache is True: + return True + if now - self._lastCheckedAt < _CANCEL_CHECK_INTERVAL_S: + return self._cancelledCache or False + self._lastCheckedAt = now + try: + job = _loadJob(self._jobId) + if job and job.get("status") == BackgroundJobStatusEnum.CANCELLED.value: + self._cancelledCache = True + return True + except Exception: + pass + self._cancelledCache = False + return False + + JobHandler = Callable[[Dict[str, Any], JobProgressCallback], Awaitable[Optional[Dict[str, Any]]]] @@ -155,16 +195,7 @@ def _markError(jobId: str, errorMessage: str) -> None: def _makeProgressCallback(jobId: str) -> JobProgressCallback: - def _cb(progress: int, message: Optional[str] = None) -> None: - try: - clamped = max(0, min(100, int(progress))) - fields: Dict[str, Any] = {"progress": clamped} - if message is not None: - fields["progressMessage"] = message[:500] - _updateJob(jobId, fields) - except Exception as ex: - logger.warning("Progress update failed for job %s: %s", jobId, ex) - return _cb + return JobProgressCallback(jobId) async def _runJob(jobId: str) -> None: @@ -220,12 +251,51 @@ def isTerminalStatus(status: str) -> bool: return status in {s.value for s in TERMINAL_JOB_STATUSES} +def cancelJob(jobId: str, *, reason: str = "user_requested") -> bool: + """Mark a job as CANCELLED. Walkers detect this via JobProgressCallback.isCancelled(). + + Returns False if the job is already in a terminal state or does not exist. + """ + job = _loadJob(jobId) + if not job: + return False + if isTerminalStatus(job.get("status", "")): + return False + _updateJob(jobId, { + "status": BackgroundJobStatusEnum.CANCELLED.value, + "errorMessage": f"cancelled: {reason}"[:1000], + "finishedAt": datetime.now(timezone.utc).timestamp(), + }) + logger.info("BackgroundJob %s cancelled (reason=%s)", jobId, reason) + return True + + +def cancelJobsByConnection(connectionId: str, *, jobType: str = "connection.bootstrap") -> int: + """Cancel all RUNNING/PENDING jobs whose payload.connectionId matches. + + Returns count of jobs marked as cancelled. + """ + db = _getDb() + rows = db.getRecordset(BackgroundJob, recordFilter={"jobType": jobType}) + count = 0 + for row in rows: + status = row.get("status", "") + if status not in (BackgroundJobStatusEnum.PENDING.value, BackgroundJobStatusEnum.RUNNING.value): + continue + payload = row.get("payload") or {} + if payload.get("connectionId") == connectionId: + if cancelJob(row["id"], reason=f"connection_stop:{connectionId[:8]}"): + count += 1 + return count + + def recoverInterruptedJobs() -> int: - """Flip any RUNNING jobs to ERROR (called at worker boot). + """Flip any RUNNING jobs to ERROR and re-queue bootstrap jobs (called at worker boot). A RUNNING job in the DB after process restart means the previous worker died mid-execution; the asyncio task is gone and the job will never - finish on its own. + finish on its own. For connection.bootstrap jobs, a fresh job is + automatically re-queued so the user doesn't have to manually retry. """ db = _getDb() try: @@ -234,12 +304,34 @@ def recoverInterruptedJobs() -> int: logger.warning("recoverInterruptedJobs: failed to scan RUNNING jobs: %s", ex) return 0 count = 0 + requeued = 0 for row in rows: try: _markError(row["id"], "Interrupted by worker restart") count += 1 except Exception as ex: logger.warning("recoverInterruptedJobs: could not mark %s as ERROR: %s", row.get("id"), ex) + continue + + if row.get("jobType") == "connection.bootstrap": + payload = row.get("payload") or {} + if payload.get("connectionId"): + try: + newJob = BackgroundJob( + jobType="connection.bootstrap", + payload=payload, + triggeredBy="recovery.requeue", + ) + record = db.recordCreate(BackgroundJob, _serialiseDatetimes(newJob.model_dump())) + asyncio.create_task(_runJob(record["id"])) + requeued += 1 + logger.info( + "recoverInterruptedJobs: re-queued bootstrap for connectionId=%s (new jobId=%s)", + payload["connectionId"], record["id"], + ) + except Exception as reqEx: + logger.warning("recoverInterruptedJobs: re-queue failed for %s: %s", row.get("id"), reqEx) + if count: - logger.warning("Recovered %d interrupted background job(s) after restart", count) + logger.warning("Recovered %d interrupted background job(s) after restart (re-queued %d)", count, requeued) return count diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py index 97ac61d5..0e2d251f 100644 --- a/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py @@ -122,21 +122,54 @@ def _onConnectionRevoked( ) +_SOURCE_TYPE_MAP = { + "msft": { + "sharepoint": ("sharepointFolder", "onedriveFolder"), + "outlook": ("outlookFolder", "calendarFolder", "contactFolder"), + }, + "google": { + "drive": ("googleDriveFolder",), + "gmail": ("gmailFolder",), + }, + "clickup": { + "clickup": ("clickupList",), + }, + "infomaniak": { + "kdrive": ("kdriveFolder",), + }, +} + + +def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list] = None): + """Load DataSource rows with ragIndexEnabled=true for a connection. + + If dataSourceIds is provided (mini-bootstrap), filter to only those IDs. + """ + from modules.interfaces.interfaceDbApp import getRootInterface + from modules.datamodels.datamodelDataSource import DataSource + + rootIf = getRootInterface() + allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId}) + if dataSourceIds: + return [ds for ds in allDs if ds.get("id") in dataSourceIds and ds.get("ragIndexEnabled")] + return [ds for ds in allDs if ds.get("ragIndexEnabled")] + + async def _bootstrapJobHandler( job: Dict[str, Any], progressCb, ) -> Dict[str, Any]: - """Dispatch bootstrap by authority. Each authority runs its own sub-bootstraps.""" + """Dispatch bootstrap by authority, iterating only over ragIndexEnabled DataSources.""" payload = job.get("payload") or {} connectionId = payload.get("connectionId") authority = (payload.get("authority") or "").lower() + dataSourceIds = payload.get("dataSourceIds") if not connectionId: raise ValueError("connection.bootstrap requires payload.connectionId") progressCb(5, f"resolving {authority} connection") - # Defensive consent check: if the connection has since disabled knowledge ingestion - # (e.g. user toggled setting after the job was enqueued), skip all walkers. + # Defensive consent check try: from modules.interfaces.interfaceDbApp import getRootInterface _root = getRootInterface() @@ -156,6 +189,21 @@ async def _bootstrapJobHandler( except Exception as _guardErr: logger.debug("Could not load connection for consent guard: %s", _guardErr) + # Load only ragIndexEnabled DataSources for this connection + dataSources = _loadRagEnabledDataSources(connectionId, dataSourceIds) + if not dataSources: + logger.info( + "ingestion.connection.bootstrap.skipped — no rag-enabled DataSources connectionId=%s", + connectionId, + extra={ + "event": "ingestion.connection.bootstrap.skipped", + "connectionId": connectionId, + "authority": authority, + "reason": "no_data_sources", + }, + ) + return {"connectionId": connectionId, "authority": authority, "skipped": True, "reason": "no_data_sources"} + def _normalize(res: Any, label: str) -> Dict[str, Any]: if isinstance(res, Exception): logger.error( @@ -165,6 +213,10 @@ async def _bootstrapJobHandler( return {"error": str(res)} return res or {} + def _filterDs(walkerKey: str) -> list: + sourceTypes = _SOURCE_TYPE_MAP.get(authority, {}).get(walkerKey, ()) + return [ds for ds in dataSources if ds.get("sourceType") in sourceTypes] + if authority == "msft": from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncSharepoint import ( bootstrapSharepoint, @@ -174,9 +226,14 @@ async def _bootstrapJobHandler( ) progressCb(10, "sharepoint + outlook") + spDs = _filterDs("sharepoint") + olDs = _filterDs("outlook") + async def _noopResult(): + return {"skipped": True, "reason": "no_datasources"} + spResult, olResult = await asyncio.gather( - bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb), - bootstrapOutlook(connectionId=connectionId, progressCb=progressCb), + bootstrapSharepoint(connectionId=connectionId, progressCb=progressCb, dataSources=spDs) if spDs else _noopResult(), + bootstrapOutlook(connectionId=connectionId, progressCb=progressCb, dataSources=olDs) if olDs else _noopResult(), return_exceptions=True, ) return { @@ -195,9 +252,14 @@ async def _bootstrapJobHandler( ) progressCb(10, "drive + gmail") + gdDs = _filterDs("drive") + gmDs = _filterDs("gmail") + async def _noopResult(): + return {"skipped": True, "reason": "no_datasources"} + gdResult, gmResult = await asyncio.gather( - bootstrapGdrive(connectionId=connectionId, progressCb=progressCb), - bootstrapGmail(connectionId=connectionId, progressCb=progressCb), + bootstrapGdrive(connectionId=connectionId, progressCb=progressCb, dataSources=gdDs) if gdDs else _noopResult(), + bootstrapGmail(connectionId=connectionId, progressCb=progressCb, dataSources=gmDs) if gmDs else _noopResult(), return_exceptions=True, ) return { @@ -213,7 +275,8 @@ async def _bootstrapJobHandler( ) progressCb(10, "clickup tasks") - cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb) + cuDs = _filterDs("clickup") + cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb, dataSources=cuDs) if cuDs else {"skipped": True, "reason": "no_datasources"} return { "connectionId": connectionId, "authority": authority, diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorPrefs.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorPrefs.py index 950400ce..4aaaa9bf 100644 --- a/modules/serviceCenter/services/serviceKnowledge/subConnectorPrefs.py +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorPrefs.py @@ -9,7 +9,7 @@ is None). from __future__ import annotations import logging -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) @@ -21,10 +21,11 @@ _DEFAULT_CLICKUP_SCOPE = "title_description" @dataclass class ConnectionIngestionPrefs: - """Parsed per-connection preferences for knowledge ingestion walkers.""" + """Parsed per-connection preferences for knowledge ingestion walkers. - # PII - neutralizeBeforeEmbed: bool = False + Neutralization is now controlled per DataSource.neutralize (not here). + Surface toggles are obsolete — walker iterates only over ragIndexEnabled DataSources. + """ # Mail (Outlook + Gmail) mailContentDepth: str = _DEFAULT_MAIL_DEPTH # "metadata" | "snippet" | "full" @@ -32,18 +33,11 @@ class ConnectionIngestionPrefs: # Files (Drive / SharePoint / OneDrive) filesIndexBinaries: bool = True - mimeAllowlist: List[str] = field(default_factory=list) # empty = all allowed # ClickUp clickupScope: str = _DEFAULT_CLICKUP_SCOPE # "titles" | "title_description" | "with_comments" clickupIndexAttachments: bool = False - # Per-authority surface toggles (default everything on) - gmailEnabled: bool = True - driveEnabled: bool = True - sharepointEnabled: bool = True - outlookEnabled: bool = True - # Time window maxAgeDays: int = _DEFAULT_MAX_AGE_DAYS # 0 = no limit @@ -78,22 +72,12 @@ def loadConnectionPrefs(connectionId: str) -> ConnectionIngestionPrefs: v = raw.get(key) return int(v) if isinstance(v, int) else default - surface = raw.get("surfaceToggles") or {} - google_surf = surface.get("google") or {} - msft_surf = surface.get("msft") or {} - return ConnectionIngestionPrefs( - neutralizeBeforeEmbed=_bool("neutralizeBeforeEmbed", False), mailContentDepth=_str("mailContentDepth", ["metadata", "snippet", "full"], _DEFAULT_MAIL_DEPTH), mailIndexAttachments=_bool("mailIndexAttachments", False), filesIndexBinaries=_bool("filesIndexBinaries", True), - mimeAllowlist=list(raw.get("mimeAllowlist") or []), clickupScope=_str("clickupScope", ["titles", "title_description", "with_comments"], _DEFAULT_CLICKUP_SCOPE), clickupIndexAttachments=_bool("clickupIndexAttachments", False), - gmailEnabled=bool(google_surf.get("gmail", True)), - driveEnabled=bool(google_surf.get("drive", True)), - sharepointEnabled=bool(msft_surf.get("sharepoint", True)), - outlookEnabled=bool(msft_surf.get("outlook", True)), maxAgeDays=_int("maxAgeDays", _DEFAULT_MAX_AGE_DAYS), ) except Exception as exc: diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py index 31ac9687..7acbaa19 100644 --- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py @@ -23,7 +23,7 @@ import logging import time from dataclasses import dataclass, field from datetime import datetime, timedelta, timezone -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) @@ -150,8 +150,6 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) - "data": description, "contextRef": {"part": "description"}, }) - # text_content is ClickUp's rendered-markdown version; include if it adds - # something beyond the plain description (common for bullet lists, checklists). textContent = _truncate(task.get("text_content"), limits.maxDescriptionChars) if textContent and textContent != description: parts.append({ @@ -166,33 +164,35 @@ def _buildContentObjects(task: Dict[str, Any], limits: ClickupBootstrapLimits) - async def bootstrapClickup( connectionId: str, *, - progressCb: Optional[Callable[[int, Optional[str]], None]] = None, + dataSources: Optional[List[Dict[str, Any]]] = None, + progressCb: Optional[Any] = None, adapter: Any = None, connection: Any = None, knowledgeService: Any = None, limits: Optional[ClickupBootstrapLimits] = None, ) -> Dict[str, Any]: - """Walk workspaces → lists → tasks and ingest each task as a virtual doc.""" - from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs - prefs = loadConnectionPrefs(connectionId) + """Walk workspaces → lists → tasks and ingest each task as a virtual doc. + + Iterates only over explicitly provided dataSources (ragIndexEnabled=true). + Each DataSource defines the neutralize policy for its subtree. + """ + if not dataSources: + return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"} if not limits: - limits = ClickupBootstrapLimits( - maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None, - neutralize=prefs.neutralizeBeforeEmbed, - clickupScope=prefs.clickupScope, - ) + limits = ClickupBootstrapLimits() startMs = time.time() result = ClickupBootstrapResult(connectionId=connectionId) logger.info( - "ingestion.connection.bootstrap.started part=clickup connectionId=%s", - connectionId, + "ingestion.connection.bootstrap.started part=clickup connectionId=%s dataSources=%d", + connectionId, len(dataSources), extra={ "event": "ingestion.connection.bootstrap.started", "part": "clickup", "connectionId": connectionId, + "dataSourceCount": len(dataSources), }, ) @@ -215,30 +215,56 @@ async def bootstrapClickup( return _finalizeResult(connectionId, result, startMs) teams = (teamsResp or {}).get("teams") or [] - for team in teams[: limits.maxWorkspaces]: + + cancelled = False + for ds in dataSources: if result.indexed + result.skippedDuplicate >= limits.maxTasks: break - teamId = str(team.get("id", "") or "") - if not teamId: - continue - result.workspaces += 1 - try: - await _walkTeam( - svc=svc, - knowledgeService=knowledgeService, - connectionId=connectionId, - mandateId=mandateId, - userId=userId, - team=team, - limits=limits, - result=result, - progressCb=progressCb, - ) - except Exception as exc: - logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True) - result.errors.append(f"team({teamId}): {exc}") + if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + cancelled = True + break - return _finalizeResult(connectionId, result, startMs) + dsId = ds.get("id", "") + dsNeutralize = ds.get("neutralize", False) + dsLimits = ClickupBootstrapLimits( + maxTasks=limits.maxTasks, + maxWorkspaces=limits.maxWorkspaces, + maxListsPerWorkspace=limits.maxListsPerWorkspace, + maxDescriptionChars=limits.maxDescriptionChars, + maxAgeDays=limits.maxAgeDays, + includeClosed=limits.includeClosed, + neutralize=dsNeutralize, + clickupScope=limits.clickupScope, + ) + + for team in teams[:dsLimits.maxWorkspaces]: + if result.indexed + result.skippedDuplicate >= dsLimits.maxTasks: + break + teamId = str(team.get("id", "") or "") + if not teamId: + continue + result.workspaces += 1 + try: + await _walkTeam( + svc=svc, + knowledgeService=knowledgeService, + connectionId=connectionId, + mandateId=mandateId, + userId=userId, + team=team, + limits=dsLimits, + result=result, + progressCb=progressCb, + dataSourceId=dsId, + ) + except Exception as exc: + logger.error("clickup team %s walk failed: %s", teamId, exc, exc_info=True) + result.errors.append(f"team({teamId}): {exc}") + + finalResult = _finalizeResult(connectionId, result, startMs) + if cancelled: + finalResult["cancelled"] = True + return finalResult async def _resolveDependencies(connectionId: str): @@ -280,8 +306,12 @@ async def _walkTeam( team: Dict[str, Any], limits: ClickupBootstrapLimits, result: ClickupBootstrapResult, - progressCb: Optional[Callable[[int, Optional[str]], None]], + progressCb: Optional[Any], + dataSourceId: str = "", ) -> None: + if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + return + teamId = str(team.get("id", "") or "") spacesResp = await svc.getSpaces(teamId) spaces = (spacesResp or {}).get("spaces") or [] @@ -294,14 +324,12 @@ async def _walkTeam( if not spaceId: continue - # Folderless lists directly under the space folderless = await svc.getFolderlessLists(spaceId) for lst in (folderless or {}).get("lists") or []: if len(listsCollected) >= limits.maxListsPerWorkspace: break listsCollected.append({**lst, "_space": space}) - # Lists inside folders foldersResp = await svc.getFolders(spaceId) for folder in (foldersResp or {}).get("folders") or []: if len(listsCollected) >= limits.maxListsPerWorkspace: @@ -318,6 +346,8 @@ async def _walkTeam( for lst in listsCollected: if result.indexed + result.skippedDuplicate >= limits.maxTasks: return + if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + return result.lists += 1 await _walkList( svc=svc, @@ -330,6 +360,7 @@ async def _walkTeam( limits=limits, result=result, progressCb=progressCb, + dataSourceId=dataSourceId, ) @@ -344,13 +375,16 @@ async def _walkList( lst: Dict[str, Any], limits: ClickupBootstrapLimits, result: ClickupBootstrapResult, - progressCb: Optional[Callable[[int, Optional[str]], None]], + progressCb: Optional[Any], + dataSourceId: str = "", ) -> None: listId = str(lst.get("id", "") or "") if not listId: return page = 0 while result.indexed + result.skippedDuplicate < limits.maxTasks: + if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + return resp = await svc.getTasksInList( listId, page=page, @@ -371,7 +405,6 @@ async def _walkList( if not _isRecent(task.get("date_updated"), limits.maxAgeDays): result.skippedPolicy += 1 continue - # Inject the list/folder/space metadata we already loaded. task["list"] = task.get("list") or {"id": listId, "name": lst.get("name")} task["folder"] = task.get("folder") or lst.get("_folder") or {} task["space"] = task.get("space") or lst.get("_space") or {} @@ -385,9 +418,10 @@ async def _walkList( limits=limits, result=result, progressCb=progressCb, + dataSourceId=dataSourceId, ) - if len(tasks) < 100: # ClickUp page-size hint: fewer than 100 => last page + if len(tasks) < 100: return page += 1 @@ -402,7 +436,8 @@ async def _ingestTask( task: Dict[str, Any], limits: ClickupBootstrapLimits, result: ClickupBootstrapResult, - progressCb: Optional[Callable[[int, Optional[str]], None]], + progressCb: Optional[Any], + dataSourceId: str = "", ) -> None: from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob @@ -431,6 +466,7 @@ async def _ingestTask( neutralize=limits.neutralize, provenance={ "connectionId": connectionId, + "dataSourceId": dataSourceId, "authority": "clickup", "service": "clickup", "externalItemId": taskId, @@ -456,8 +492,10 @@ async def _ingestTask( else: result.failed += 1 - if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0: - processed = result.indexed + result.skippedDuplicate + processed = result.indexed + result.skippedDuplicate + if progressCb is not None and processed % 50 == 0: + if hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + return try: progressCb( min(90, 10 + int(80 * processed / max(1, limits.maxTasks))), diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py index 5e4e659b..398b9af9 100644 --- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py @@ -12,6 +12,7 @@ via export), runs the standard extraction pipeline and routes results through from __future__ import annotations +import asyncio import hashlib import logging import time @@ -30,7 +31,6 @@ SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/") MAX_DEPTH_DEFAULT = 4 MAX_AGE_DAYS_DEFAULT = 365 -# Google Drive uses virtual mime-types for folders and non-downloadable assets. FOLDER_MIME = "application/vnd.google-apps.folder" @@ -41,12 +41,8 @@ class GdriveBootstrapLimits: maxFileSize: int = MAX_FILE_SIZE_DEFAULT skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT maxDepth: int = MAX_DEPTH_DEFAULT - # Only ingest files modified within the last N days. None disables filter. maxAgeDays: Optional[int] = MAX_AGE_DAYS_DEFAULT - # Pass-through to IngestionJob.neutralize neutralize: bool = False - # Whether to skip binary/non-text files - filesIndexBinaries: bool = True @dataclass @@ -95,10 +91,8 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool: if not maxAgeDays: return True if not modifiedIso: - # No timestamp -> be permissive (Drive native docs sometimes omit it on export). return True try: - # Google returns RFC 3339 with `Z` or offset; python 3.11+ parses both. ts = datetime.fromisoformat(modifiedIso.replace("Z", "+00:00")) except Exception: return True @@ -111,34 +105,36 @@ def _isRecent(modifiedIso: Optional[str], maxAgeDays: Optional[int]) -> bool: async def bootstrapGdrive( connectionId: str, *, - progressCb: Optional[Callable[[int, Optional[str]], None]] = None, + dataSources: Optional[List[Dict[str, Any]]] = None, + progressCb: Optional[Any] = None, adapter: Any = None, connection: Any = None, knowledgeService: Any = None, limits: Optional[GdriveBootstrapLimits] = None, runExtractionFn: Optional[Callable[..., Any]] = None, ) -> Dict[str, Any]: - """Walk My Drive starting from the virtual root folder.""" - from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs - prefs = loadConnectionPrefs(connectionId) + """Walk My Drive starting from the virtual root folder. + + Iterates only over explicitly provided dataSources (ragIndexEnabled=true). + Each DataSource defines the root path + neutralize policy for its subtree. + """ + if not dataSources: + return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"} if not limits: - limits = GdriveBootstrapLimits( - maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None, - neutralize=prefs.neutralizeBeforeEmbed, - filesIndexBinaries=prefs.filesIndexBinaries, - ) + limits = GdriveBootstrapLimits() startMs = time.time() result = GdriveBootstrapResult(connectionId=connectionId) logger.info( - "ingestion.connection.bootstrap.started part=gdrive connectionId=%s", - connectionId, + "ingestion.connection.bootstrap.started part=gdrive connectionId=%s dataSources=%d", + connectionId, len(dataSources), extra={ "event": "ingestion.connection.bootstrap.started", "part": "gdrive", "connectionId": connectionId, + "dataSourceCount": len(dataSources), }, ) @@ -158,25 +154,51 @@ async def bootstrapGdrive( mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else "" userId = str(getattr(connection, "userId", "") or "") if connection is not None else "" - try: - await _walkFolder( - adapter=adapter, - knowledgeService=knowledgeService, - runExtractionFn=runExtractionFn, - connectionId=connectionId, - mandateId=mandateId, - userId=userId, - folderPath="/", # DriveAdapter.browse maps "" / "/" -> "root" - depth=0, - limits=limits, - result=result, - progressCb=progressCb, - ) - except Exception as exc: - logger.error("gdrive walk failed for %s: %s", connectionId, exc, exc_info=True) - result.errors.append(f"walk: {exc}") + cancelled = False + for ds in dataSources: + if result.indexed + result.skippedDuplicate >= limits.maxItems: + break + if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + cancelled = True + break - return _finalizeResult(connectionId, result, startMs) + dsPath = ds.get("path", "/") + dsId = ds.get("id", "") + dsNeutralize = ds.get("neutralize", False) + dsMaxAgeDays = ds.get("maxAgeDays", limits.maxAgeDays) + dsLimits = GdriveBootstrapLimits( + maxItems=limits.maxItems, + maxBytes=limits.maxBytes, + maxFileSize=limits.maxFileSize, + skipMimePrefixes=limits.skipMimePrefixes, + maxDepth=limits.maxDepth, + maxAgeDays=dsMaxAgeDays, + neutralize=dsNeutralize, + ) + + try: + await _walkFolder( + adapter=adapter, + knowledgeService=knowledgeService, + runExtractionFn=runExtractionFn, + connectionId=connectionId, + mandateId=mandateId, + userId=userId, + folderPath=dsPath, + depth=0, + limits=dsLimits, + result=result, + progressCb=progressCb, + dataSourceId=dsId, + ) + except Exception as exc: + logger.error("gdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True) + result.errors.append(f"walk({dsPath}): {exc}") + + finalResult = _finalizeResult(connectionId, result, startMs) + if cancelled: + finalResult["cancelled"] = True + return finalResult async def _resolveDependencies(connectionId: str): @@ -220,10 +242,13 @@ async def _walkFolder( depth: int, limits: GdriveBootstrapLimits, result: GdriveBootstrapResult, - progressCb: Optional[Callable[[int, Optional[str]], None]], + progressCb: Optional[Any], + dataSourceId: str = "", ) -> None: if depth > limits.maxDepth: return + if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + return try: entries = await adapter.browse(folderPath) except Exception as exc: @@ -236,6 +261,8 @@ async def _walkFolder( return if result.bytesProcessed >= limits.maxBytes: return + if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled(): + return entryPath = getattr(entry, "path", "") or "" metadata = getattr(entry, "metadata", {}) or {} @@ -254,6 +281,7 @@ async def _walkFolder( limits=limits, result=result, progressCb=progressCb, + dataSourceId=dataSourceId, ) continue @@ -288,6 +316,7 @@ async def _walkFolder( limits=limits, result=result, progressCb=progressCb, + dataSourceId=dataSourceId, ) @@ -306,7 +335,8 @@ async def _ingestOne( revision: Optional[str], limits: GdriveBootstrapLimits, result: GdriveBootstrapResult, - progressCb: Optional[Callable[[int, Optional[str]], None]], + progressCb: Optional[Any], + dataSourceId: str = "", ) -> None: from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob @@ -321,14 +351,13 @@ async def _ingestOne( result.errors.append(f"download({entryPath}): {exc}") return - # Adapter.download returns raw bytes today; guard DownloadResult shape too. fileBytes: bytes if isinstance(downloaded, (bytes, bytearray)): fileBytes = bytes(downloaded) else: fileBytes = bytes(getattr(downloaded, "data", b"") or b"") if getattr(downloaded, "mimeType", None): - mimeType = downloaded.mimeType # export may have changed the type + mimeType = downloaded.mimeType if not fileBytes: result.failed += 1 return @@ -354,6 +383,15 @@ async def _ingestOne( result.skippedPolicy += 1 return + provenance: Dict[str, Any] = { + "connectionId": connectionId, + "dataSourceId": dataSourceId, + "authority": "google", + "service": "drive", + "externalItemId": externalItemId, + "entryPath": entryPath, + "tier": "body", + } try: handle = await knowledgeService.requestIngestion( IngestionJob( @@ -366,14 +404,7 @@ async def _ingestOne( contentObjects=contentObjects, contentVersion=revision, neutralize=limits.neutralize, - provenance={ - "connectionId": connectionId, - "authority": "google", - "service": "drive", - "externalItemId": externalItemId, - "entryPath": entryPath, - "tier": "body", - }, + provenance=provenance, ) ) except Exception as exc: @@ -388,6 +419,8 @@ async def _ingestOne( result.indexed += 1 else: result.failed += 1 + if handle.error: + result.errors.append(f"ingest({entryPath}): {handle.error}") if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0: processed = result.indexed + result.skippedDuplicate @@ -411,6 +444,8 @@ async def _ingestOne( }, ) + await asyncio.sleep(0) + def _finalizeResult(connectionId: str, result: GdriveBootstrapResult, startMs: float) -> Dict[str, Any]: durationMs = int((time.time() - startMs) * 1000) diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py index 21fec83d..f5c345c6 100644 --- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py @@ -175,35 +175,36 @@ def _buildContentObjects( async def bootstrapGmail( connectionId: str, *, - progressCb: Optional[Callable[[int, Optional[str]], None]] = None, + dataSources: Optional[List[Dict[str, Any]]] = None, + progressCb: Optional[Any] = None, adapter: Any = None, connection: Any = None, knowledgeService: Any = None, limits: Optional[GmailBootstrapLimits] = None, googleGetFn: Optional[Callable[..., Any]] = None, ) -> Dict[str, Any]: - """Enumerate Gmail labels (INBOX + SENT default) and ingest messages.""" - from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs - prefs = loadConnectionPrefs(connectionId) + """Enumerate Gmail labels (INBOX + SENT default) and ingest messages. + + Iterates only over explicitly provided dataSources (ragIndexEnabled=true). + Each DataSource defines the neutralize policy for its scope. + """ + if not dataSources: + return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"} if not limits: - limits = GmailBootstrapLimits( - includeAttachments=prefs.mailIndexAttachments, - maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None, - mailContentDepth=prefs.mailContentDepth, - neutralize=prefs.neutralizeBeforeEmbed, - ) + limits = GmailBootstrapLimits() startMs = time.time() result = GmailBootstrapResult(connectionId=connectionId) logger.info( - "ingestion.connection.bootstrap.started part=gmail connectionId=%s", - connectionId, + "ingestion.connection.bootstrap.started part=gmail connectionId=%s dataSources=%d", + connectionId, len(dataSources), extra={ "event": "ingestion.connection.bootstrap.started", "part": "gmail", "connectionId": connectionId, + "dataSourceCount": len(dataSources), }, ) @@ -221,26 +222,51 @@ async def bootstrapGmail( mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else "" userId = str(getattr(connection, "userId", "") or "") if connection is not None else "" - for labelId in limits.labels: + cancelled = False + for ds in dataSources: if result.indexed + result.skippedDuplicate >= limits.maxMessages: break - try: - await _ingestLabel( - googleGetFn=googleGetFn, - knowledgeService=knowledgeService, - connectionId=connectionId, - mandateId=mandateId, - userId=userId, - labelId=labelId, - limits=limits, - result=result, - progressCb=progressCb, - ) - except Exception as exc: - logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True) - result.errors.append(f"label({labelId}): {exc}") + if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + cancelled = True + break - return _finalizeResult(connectionId, result, startMs) + dsId = ds.get("id", "") + dsNeutralize = ds.get("neutralize", False) + dsLimits = GmailBootstrapLimits( + maxMessages=limits.maxMessages, + labels=limits.labels, + maxBodyChars=limits.maxBodyChars, + includeAttachments=limits.includeAttachments, + maxAttachmentBytes=limits.maxAttachmentBytes, + maxAgeDays=limits.maxAgeDays, + mailContentDepth=limits.mailContentDepth, + neutralize=dsNeutralize, + ) + + for labelId in dsLimits.labels: + if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages: + break + try: + await _ingestLabel( + googleGetFn=googleGetFn, + knowledgeService=knowledgeService, + connectionId=connectionId, + mandateId=mandateId, + userId=userId, + labelId=labelId, + limits=dsLimits, + result=result, + progressCb=progressCb, + dataSourceId=dsId, + ) + except Exception as exc: + logger.error("gmail ingestion label %s failed: %s", labelId, exc, exc_info=True) + result.errors.append(f"label({labelId}): {exc}") + + finalResult = _finalizeResult(connectionId, result, startMs) + if cancelled: + finalResult["cancelled"] = True + return finalResult async def _resolveDependencies(connectionId: str): @@ -282,7 +308,8 @@ async def _ingestLabel( labelId: str, limits: GmailBootstrapLimits, result: GmailBootstrapResult, - progressCb: Optional[Callable[[int, Optional[str]], None]], + progressCb: Optional[Any], + dataSourceId: str = "", ) -> None: remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate) if remaining <= 0: @@ -316,6 +343,8 @@ async def _ingestLabel( for stub in messageStubs: if result.indexed + result.skippedDuplicate >= limits.maxMessages: break + if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled(): + return msgId = stub.get("id") if not msgId: continue @@ -337,6 +366,7 @@ async def _ingestLabel( limits=limits, result=result, progressCb=progressCb, + dataSourceId=dataSourceId, ) nextPageToken = page.get("nextPageToken") @@ -355,7 +385,8 @@ async def _ingestMessage( message: Dict[str, Any], limits: GmailBootstrapLimits, result: GmailBootstrapResult, - progressCb: Optional[Callable[[int, Optional[str]], None]], + progressCb: Optional[Any], + dataSourceId: str = "", ) -> None: from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob @@ -386,6 +417,7 @@ async def _ingestMessage( neutralize=limits.neutralize, provenance={ "connectionId": connectionId, + "dataSourceId": dataSourceId, "authority": "google", "service": "gmail", "externalItemId": messageId, @@ -420,6 +452,7 @@ async def _ingestMessage( parentSyntheticId=syntheticId, limits=limits, result=result, + dataSourceId=dataSourceId, ) except Exception as exc: logger.warning("gmail attachments %s failed: %s", messageId, exc) @@ -461,6 +494,7 @@ async def _ingestAttachments( parentSyntheticId: str, limits: GmailBootstrapLimits, result: GmailBootstrapResult, + dataSourceId: str = "", ) -> None: """Child ingestion jobs for file attachments. Skips inline images (cid: refs).""" from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob @@ -561,6 +595,7 @@ async def _ingestAttachments( contentObjects=contentObjects, provenance={ "connectionId": connectionId, + "dataSourceId": dataSourceId, "authority": "google", "service": "gmail", "parentId": parentSyntheticId, diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py index 64a3545f..3f4a8afb 100644 --- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py @@ -18,7 +18,7 @@ import hashlib import logging import time from dataclasses import dataclass, field -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Dict, List, Optional from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody @@ -139,34 +139,35 @@ def _buildContentObjects( async def bootstrapOutlook( connectionId: str, *, - progressCb: Optional[Callable[[int, Optional[str]], None]] = None, + dataSources: Optional[List[Dict[str, Any]]] = None, + progressCb: Optional[Any] = None, adapter: Any = None, connection: Any = None, knowledgeService: Any = None, limits: Optional[OutlookBootstrapLimits] = None, ) -> Dict[str, Any]: - """Enumerate Outlook folders (inbox + sent by default) and ingest messages.""" - from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs - prefs = loadConnectionPrefs(connectionId) + """Enumerate Outlook folders (inbox + sent by default) and ingest messages. + + Iterates only over explicitly provided dataSources (ragIndexEnabled=true). + Each DataSource defines the neutralize policy for its messages. + """ + if not dataSources: + return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"} if not limits: - limits = OutlookBootstrapLimits( - includeAttachments=prefs.mailIndexAttachments, - maxAgeDays=prefs.maxAgeDays if prefs.maxAgeDays > 0 else None, - mailContentDepth=prefs.mailContentDepth, - neutralize=prefs.neutralizeBeforeEmbed, - ) + limits = OutlookBootstrapLimits() startMs = time.time() result = OutlookBootstrapResult(connectionId=connectionId) logger.info( - "ingestion.connection.bootstrap.started part=outlook connectionId=%s", - connectionId, + "ingestion.connection.bootstrap.started part=outlook connectionId=%s dataSources=%d", + connectionId, len(dataSources), extra={ "event": "ingestion.connection.bootstrap.started", "part": "outlook", "connectionId": connectionId, + "dataSourceCount": len(dataSources), }, ) @@ -176,27 +177,52 @@ async def bootstrapOutlook( mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else "" userId = str(getattr(connection, "userId", "") or "") if connection is not None else "" - folderIds = await _selectFolderIds(adapter, limits) - for folderId in folderIds: + cancelled = False + for ds in dataSources: if result.indexed + result.skippedDuplicate >= limits.maxMessages: break - try: - await _ingestFolder( - adapter=adapter, - knowledgeService=knowledgeService, - connectionId=connectionId, - mandateId=mandateId, - userId=userId, - folderId=folderId, - limits=limits, - result=result, - progressCb=progressCb, - ) - except Exception as exc: - logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True) - result.errors.append(f"folder({folderId}): {exc}") + if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + cancelled = True + break - return _finalizeResult(connectionId, result, startMs) + dsId = ds.get("id", "") + dsNeutralize = ds.get("neutralize", False) + dsLimits = OutlookBootstrapLimits( + maxMessages=limits.maxMessages, + maxFolders=limits.maxFolders, + maxBodyChars=limits.maxBodyChars, + includeAttachments=limits.includeAttachments, + maxAttachmentBytes=limits.maxAttachmentBytes, + maxAgeDays=limits.maxAgeDays, + mailContentDepth=limits.mailContentDepth, + neutralize=dsNeutralize, + ) + + folderIds = await _selectFolderIds(adapter, dsLimits) + for folderId in folderIds: + if result.indexed + result.skippedDuplicate >= dsLimits.maxMessages: + break + try: + await _ingestFolder( + adapter=adapter, + knowledgeService=knowledgeService, + connectionId=connectionId, + mandateId=mandateId, + userId=userId, + folderId=folderId, + limits=dsLimits, + result=result, + progressCb=progressCb, + dataSourceId=dsId, + ) + except Exception as exc: + logger.error("outlook ingestion folder %s failed: %s", folderId, exc, exc_info=True) + result.errors.append(f"folder({folderId}): {exc}") + + finalResult = _finalizeResult(connectionId, result, startMs) + if cancelled: + finalResult["cancelled"] = True + return finalResult async def _resolveDependencies(connectionId: str): @@ -266,8 +292,12 @@ async def _ingestFolder( folderId: str, limits: OutlookBootstrapLimits, result: OutlookBootstrapResult, - progressCb: Optional[Callable[[int, Optional[str]], None]], + progressCb: Optional[Any], + dataSourceId: str = "", ) -> None: + if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + return + remaining = limits.maxMessages - (result.indexed + result.skippedDuplicate) if remaining <= 0: return @@ -307,6 +337,8 @@ async def _ingestFolder( for message in page.get("value", []) or []: if result.indexed + result.skippedDuplicate >= limits.maxMessages: break + if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled(): + return await _ingestMessage( adapter=adapter, knowledgeService=knowledgeService, @@ -317,6 +349,7 @@ async def _ingestFolder( limits=limits, result=result, progressCb=progressCb, + dataSourceId=dataSourceId, ) nextLink = page.get("@odata.nextLink") @@ -338,7 +371,8 @@ async def _ingestMessage( message: Dict[str, Any], limits: OutlookBootstrapLimits, result: OutlookBootstrapResult, - progressCb: Optional[Callable[[int, Optional[str]], None]], + progressCb: Optional[Any], + dataSourceId: str = "", ) -> None: from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob @@ -369,6 +403,7 @@ async def _ingestMessage( neutralize=limits.neutralize, provenance={ "connectionId": connectionId, + "dataSourceId": dataSourceId, "authority": "msft", "service": "outlook", "externalItemId": messageId, @@ -402,6 +437,7 @@ async def _ingestMessage( parentSyntheticId=syntheticId, limits=limits, result=result, + dataSourceId=dataSourceId, ) except Exception as exc: logger.warning("outlook attachments %s failed: %s", messageId, exc) @@ -443,6 +479,7 @@ async def _ingestAttachments( parentSyntheticId: str, limits: OutlookBootstrapLimits, result: OutlookBootstrapResult, + dataSourceId: str = "", ) -> None: """Child ingestion jobs for file attachments (skip inline & oversized).""" from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob @@ -531,6 +568,7 @@ async def _ingestAttachments( neutralize=limits.neutralize, provenance={ "connectionId": connectionId, + "dataSourceId": dataSourceId, "authority": "msft", "service": "outlook", "parentId": parentSyntheticId, diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py index 07fef7a8..f664f1a8 100644 --- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py @@ -94,35 +94,36 @@ def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]: async def bootstrapSharepoint( connectionId: str, *, - progressCb: Optional[Callable[[int, Optional[str]], None]] = None, + dataSources: Optional[List[Dict[str, Any]]] = None, + progressCb: Optional[Any] = None, adapter: Any = None, connection: Any = None, knowledgeService: Any = None, limits: Optional[SharepointBootstrapLimits] = None, runExtractionFn: Optional[Callable[..., Any]] = None, ) -> Dict[str, Any]: - """Enumerate SharePoint drives and ingest every reachable file via the façade. + """Enumerate SharePoint drives and ingest files via the facade. - Parameters allow injection for tests; production callers pass only - `connectionId` (and optionally a progressCb) and everything else is - resolved against the registered services. + Iterates only over explicitly provided dataSources (ragIndexEnabled=true). + Each DataSource defines the root path + neutralize policy for its subtree. """ - from modules.serviceCenter.services.serviceKnowledge.subConnectorPrefs import loadConnectionPrefs - prefs = loadConnectionPrefs(connectionId) + if not dataSources: + return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"} if not limits: - limits = SharepointBootstrapLimits(neutralize=prefs.neutralizeBeforeEmbed) + limits = SharepointBootstrapLimits() startMs = time.time() result = SharepointBootstrapResult(connectionId=connectionId) logger.info( - "ingestion.connection.bootstrap.started part=sharepoint connectionId=%s", - connectionId, + "ingestion.connection.bootstrap.started part=sharepoint connectionId=%s dataSources=%d", + connectionId, len(dataSources), extra={ "event": "ingestion.connection.bootstrap.started", "part": "sharepoint", "connectionId": connectionId, + "dataSourceCount": len(dataSources), }, ) @@ -142,17 +143,27 @@ async def bootstrapSharepoint( mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else "" userId = str(getattr(connection, "userId", "") or "") if connection is not None else "" - try: - sites = await adapter.browse("/", limit=limits.maxSites) - except Exception as exc: - logger.error("sharepoint site discovery failed for %s: %s", connectionId, exc, exc_info=True) - result.errors.append(f"site_discovery: {exc}") - return _finalizeResult(connectionId, result, startMs) - - for site in sites[: limits.maxSites]: + cancelled = False + for ds in dataSources: if result.indexed + result.skippedDuplicate >= limits.maxItems: break - sitePath = getattr(site, "path", "") or "" + if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + cancelled = True + break + + dsPath = ds.get("path", "") + dsId = ds.get("id", "") + dsNeutralize = ds.get("neutralize", False) + dsLimits = SharepointBootstrapLimits( + maxItems=limits.maxItems, + maxBytes=limits.maxBytes, + maxFileSize=limits.maxFileSize, + skipMimePrefixes=limits.skipMimePrefixes, + maxDepth=limits.maxDepth, + maxSites=limits.maxSites, + neutralize=dsNeutralize, + ) + try: await _walkFolder( adapter=adapter, @@ -161,17 +172,21 @@ async def bootstrapSharepoint( connectionId=connectionId, mandateId=mandateId, userId=userId, - folderPath=sitePath, + folderPath=dsPath, depth=0, - limits=limits, + limits=dsLimits, result=result, progressCb=progressCb, + dataSourceId=dsId, ) except Exception as exc: - logger.error("sharepoint walk failed for site %s: %s", sitePath, exc, exc_info=True) - result.errors.append(f"walk({sitePath}): {exc}") + logger.error("sharepoint walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True) + result.errors.append(f"walk({dsPath}): {exc}") - return _finalizeResult(connectionId, result, startMs) + finalResult = _finalizeResult(connectionId, result, startMs) + if cancelled: + finalResult["cancelled"] = True + return finalResult async def _resolveDependencies(connectionId: str): @@ -221,10 +236,13 @@ async def _walkFolder( depth: int, limits: SharepointBootstrapLimits, result: SharepointBootstrapResult, - progressCb: Optional[Callable[[int, Optional[str]], None]], + progressCb: Optional[Any], + dataSourceId: str = "", ) -> None: if depth > limits.maxDepth: return + if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + return try: entries = await adapter.browse(folderPath) except Exception as exc: @@ -237,6 +255,8 @@ async def _walkFolder( return if result.bytesProcessed >= limits.maxBytes: return + if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled(): + return entryPath = getattr(entry, "path", "") or "" if getattr(entry, "isFolder", False): @@ -252,6 +272,7 @@ async def _walkFolder( limits=limits, result=result, progressCb=progressCb, + dataSourceId=dataSourceId, ) continue @@ -283,6 +304,7 @@ async def _walkFolder( limits=limits, result=result, progressCb=progressCb, + dataSourceId=dataSourceId, ) @@ -301,7 +323,8 @@ async def _ingestOne( revision: Optional[str], limits: SharepointBootstrapLimits, result: SharepointBootstrapResult, - progressCb: Optional[Callable[[int, Optional[str]], None]], + progressCb: Optional[Any], + dataSourceId: str = "", ) -> None: from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob @@ -339,6 +362,7 @@ async def _ingestOne( provenance: Dict[str, Any] = { "connectionId": connectionId, + "dataSourceId": dataSourceId, "authority": "msft", "service": "sharepoint", "externalItemId": externalItemId, diff --git a/modules/serviceCenter/services/serviceKnowledge/subPolicyResolver.py b/modules/serviceCenter/services/serviceKnowledge/subPolicyResolver.py new file mode 100644 index 00000000..10be150d --- /dev/null +++ b/modules/serviceCenter/services/serviceKnowledge/subPolicyResolver.py @@ -0,0 +1,78 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +"""Resolve effective policies (neutralize, ragIndexEnabled) for DataSource tree hierarchies. + +Tree-inheritance rule: nearest ancestor DataSource with an explicit value wins. +If no ancestor has a value, the default (False) is used. +""" + +from __future__ import annotations + +import logging +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +def resolveEffectiveNeutralize( + ds: Dict[str, Any], + allDataSources: List[Dict[str, Any]], +) -> bool: + """Compute effective neutralize by walking up the path tree. + + A DataSource at /sites/HR/Documents inherits from /sites/HR if + that ancestor has neutralize=True and the child has no explicit override. + """ + ownValue = ds.get("neutralize") + if ownValue is not None and ownValue is not False: + return True + if ownValue is False: + return False + return _findAncestorPolicy(ds, allDataSources, "neutralize") + + +def resolveEffectiveRagIndexEnabled( + ds: Dict[str, Any], + allDataSources: List[Dict[str, Any]], +) -> bool: + """Compute effective ragIndexEnabled by walking up the path tree.""" + ownValue = ds.get("ragIndexEnabled") + if ownValue is True: + return True + if ownValue is False: + return False + return _findAncestorPolicy(ds, allDataSources, "ragIndexEnabled") + + +def _findAncestorPolicy( + ds: Dict[str, Any], + allDataSources: List[Dict[str, Any]], + field: str, +) -> bool: + """Walk ancestors (longest-prefix match) to find an inherited policy value.""" + dsPath = ds.get("path", "") + connectionId = ds.get("connectionId", "") + if not dsPath: + return False + + ancestors = [] + for candidate in allDataSources: + if candidate.get("id") == ds.get("id"): + continue + if candidate.get("connectionId") != connectionId: + continue + candidatePath = candidate.get("path", "") + if not candidatePath: + continue + if dsPath.startswith(candidatePath) and len(candidatePath) < len(dsPath): + ancestors.append(candidate) + + ancestors.sort(key=lambda a: len(a.get("path", "")), reverse=True) + + for ancestor in ancestors: + val = ancestor.get(field) + if val is True: + return True + if val is False: + return False + return False diff --git a/modules/system/mainSystem.py b/modules/system/mainSystem.py index b7e45006..21d0cbee 100644 --- a/modules/system/mainSystem.py +++ b/modules/system/mainSystem.py @@ -144,6 +144,14 @@ NAVIGATION_SECTIONS = [ "path": "/automations", "order": 30, }, + { + "id": "rag-inventory", + "objectKey": "ui.system.ragInventory", + "label": t("RAG-Inventar"), + "icon": "FaDatabase", + "path": "/rag-inventory", + "order": 35, + }, { "id": "store", "objectKey": "ui.system.store", diff --git a/scripts/script_db_migrate_datasource_rag.py b/scripts/script_db_migrate_datasource_rag.py new file mode 100644 index 00000000..95c2ae35 --- /dev/null +++ b/scripts/script_db_migrate_datasource_rag.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +"""Migration: Rename DataSource.autoSync -> ragIndexEnabled, lastSynced -> lastIndexed. + +This is a one-off migration for the RAG consent & control unification. +Safe to run multiple times (checks column existence before acting). + +Usage: + python script_db_migrate_datasource_rag.py [--dry-run] +""" + +import os +import sys +import argparse +import logging +from pathlib import Path + +scriptPath = Path(__file__).resolve() +gatewayPath = scriptPath.parent.parent +sys.path.insert(0, str(gatewayPath)) +os.chdir(str(gatewayPath)) + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True) +logger = logging.getLogger(__name__) + +import psycopg2 +from modules.shared.configuration import APP_CONFIG + + +def _getConnection(): + return psycopg2.connect( + host=APP_CONFIG.get("DB_HOST", "localhost"), + port=int(APP_CONFIG.get("DB_PORT", "5432")), + database=APP_CONFIG.get("DB_DATABASE", "poweron_app"), + user=APP_CONFIG.get("DB_USER"), + password=APP_CONFIG.get("DB_PASSWORD_SECRET"), + ) + + +def _columnExists(cur, table: str, column: str) -> bool: + cur.execute( + """SELECT 1 FROM information_schema.columns + WHERE table_schema = 'public' AND table_name = %s AND column_name = %s""", + (table, column), + ) + return cur.fetchone() is not None + + +def migrate(dryRun: bool = False): + conn = _getConnection() + conn.autocommit = False + cur = conn.cursor() + + renames = [ + ("DataSource", "autoSync", "ragIndexEnabled"), + ("DataSource", "lastSynced", "lastIndexed"), + ] + + executed = [] + for table, oldCol, newCol in renames: + if _columnExists(cur, table, oldCol) and not _columnExists(cur, table, newCol): + sql = f'ALTER TABLE public."{table}" RENAME COLUMN "{oldCol}" TO "{newCol}";' + logger.info("EXEC: %s", sql) + if not dryRun: + cur.execute(sql) + executed.append(sql) + elif _columnExists(cur, table, newCol): + logger.info("SKIP: %s.%s already exists (migration already applied)", table, newCol) + elif not _columnExists(cur, table, oldCol): + logger.warning("SKIP: %s.%s does not exist (table schema may differ)", table, oldCol) + + if not dryRun and executed: + conn.commit() + logger.info("Migration committed (%d statements)", len(executed)) + elif dryRun and executed: + conn.rollback() + logger.info("DRY RUN — would execute %d statements", len(executed)) + else: + logger.info("Nothing to do — schema already up to date") + + cur.close() + conn.close() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--dry-run", action="store_true", help="Print SQL without executing") + args = parser.parse_args() + migrate(dryRun=args.dry_run) diff --git a/tests/unit/services/test_bootstrap_clickup.py b/tests/unit/services/test_bootstrap_clickup.py index 87c08c3d..4ed0c4f1 100644 --- a/tests/unit/services/test_bootstrap_clickup.py +++ b/tests/unit/services/test_bootstrap_clickup.py @@ -100,6 +100,9 @@ def _adapter(svc): return SimpleNamespace(_svc=svc) +_DEFAULT_DS = [{"id": "ds-1", "neutralize": False}] + + def test_bootstrap_walks_team_space_lists_and_tasks(): svc = _FakeClickupService(taskCount=2) knowledge = _FakeKnowledgeService() @@ -108,6 +111,7 @@ def test_bootstrap_walks_team_space_lists_and_tasks(): async def _run(): return await bootstrapClickup( connectionId="c1", + dataSources=_DEFAULT_DS, adapter=_adapter(svc), connection=connection, knowledgeService=knowledge, @@ -126,10 +130,10 @@ def test_bootstrap_walks_team_space_lists_and_tasks(): assert job.mimeType == "application/vnd.clickup.task+json" assert job.mandateId == "m1" assert job.provenance["connectionId"] == "c1" + assert job.provenance["dataSourceId"] == "ds-1" assert job.provenance["authority"] == "clickup" assert job.provenance["teamId"] == "team-1" assert job.contentVersion # numeric millisecond string - # At least the header content-object is present. ids = [co["contentObjectId"] for co in job.contentObjects] assert "header" in ids @@ -146,6 +150,7 @@ def test_bootstrap_reports_duplicates_on_second_run(): async def _run(): return await bootstrapClickup( connectionId="c1", + dataSources=_DEFAULT_DS, adapter=_adapter(svc), connection=connection, knowledgeService=knowledge, @@ -165,6 +170,7 @@ def test_bootstrap_skips_tasks_older_than_maxAgeDays(): async def _run(): return await bootstrapClickup( connectionId="c1", + dataSources=_DEFAULT_DS, adapter=_adapter(svc), connection=connection, knowledgeService=knowledge, @@ -185,6 +191,7 @@ def test_bootstrap_maxTasks_caps_ingestion(): async def _run(): return await bootstrapClickup( connectionId="c1", + dataSources=_DEFAULT_DS, adapter=_adapter(svc), connection=connection, knowledgeService=knowledge, @@ -195,9 +202,41 @@ def test_bootstrap_maxTasks_caps_ingestion(): assert result["indexed"] == 3 +def test_bootstrap_skips_when_no_datasources(): + async def _run(): + return await bootstrapClickup(connectionId="c1") + + result = asyncio.run(_run()) + assert result["skipped"] is True + assert result["reason"] == "no_datasources" + + +def test_bootstrap_honours_datasource_neutralize(): + svc = _FakeClickupService(taskCount=1) + knowledge = _FakeKnowledgeService() + connection = SimpleNamespace(mandateId="m1", userId="u1") + + async def _run(): + return await bootstrapClickup( + connectionId="c1", + dataSources=[{"id": "ds-n", "neutralize": True}], + adapter=_adapter(svc), + connection=connection, + knowledgeService=knowledge, + limits=ClickupBootstrapLimits(maxAgeDays=None), + ) + + asyncio.run(_run()) + for job in knowledge.calls: + assert job.neutralize is True + assert job.provenance["dataSourceId"] == "ds-n" + + if __name__ == "__main__": test_bootstrap_walks_team_space_lists_and_tasks() test_bootstrap_reports_duplicates_on_second_run() test_bootstrap_skips_tasks_older_than_maxAgeDays() test_bootstrap_maxTasks_caps_ingestion() + test_bootstrap_skips_when_no_datasources() + test_bootstrap_honours_datasource_neutralize() print("OK — bootstrapClickup tests passed") diff --git a/tests/unit/services/test_bootstrap_gdrive.py b/tests/unit/services/test_bootstrap_gdrive.py index 1b88677e..2741332f 100644 --- a/tests/unit/services/test_bootstrap_gdrive.py +++ b/tests/unit/services/test_bootstrap_gdrive.py @@ -119,6 +119,9 @@ def _fakeRunExtraction(data, name, mime, options): ) +_DEFAULT_DS = [{"id": "ds1", "path": "/", "neutralize": False}] + + def test_bootstrap_walks_drive_and_subfolders(): adapter = _FakeDriveAdapter() knowledge = _FakeKnowledgeService() @@ -127,6 +130,7 @@ def test_bootstrap_walks_drive_and_subfolders(): async def _run(): return await bootstrapGdrive( connectionId="c1", + dataSources=_DEFAULT_DS, adapter=adapter, connection=connection, knowledgeService=knowledge, @@ -160,6 +164,7 @@ def test_bootstrap_reports_duplicates_on_second_run(): async def _run(): return await bootstrapGdrive( connectionId="c1", + dataSources=_DEFAULT_DS, adapter=adapter, connection=connection, knowledgeService=knowledge, @@ -180,11 +185,11 @@ def test_bootstrap_skips_files_older_than_maxAgeDays(): async def _run(): return await bootstrapGdrive( connectionId="c1", + dataSources=[{"id": "ds1", "path": "/", "neutralize": False, "maxAgeDays": 180}], adapter=adapter, connection=connection, knowledgeService=knowledge, runExtractionFn=_fakeRunExtraction, - limits=GdriveBootstrapLimits(maxAgeDays=180), ) result = asyncio.run(_run()) @@ -200,6 +205,7 @@ def test_bootstrap_passes_connection_provenance(): async def _run(): return await bootstrapGdrive( connectionId="c1", + dataSources=_DEFAULT_DS, adapter=adapter, connection=connection, knowledgeService=knowledge, @@ -212,14 +218,25 @@ def test_bootstrap_passes_connection_provenance(): assert job.sourceKind == "gdrive_item" assert job.mandateId == "m1" assert job.provenance["connectionId"] == "c1" + assert job.provenance["dataSourceId"] == "ds1" assert job.provenance["authority"] == "google" assert job.provenance["service"] == "drive" assert job.contentVersion # modifiedTime ISO string +def test_bootstrap_skips_when_no_datasources(): + async def _run(): + return await bootstrapGdrive(connectionId="c1") + + result = asyncio.run(_run()) + assert result["skipped"] is True + assert result["reason"] == "no_datasources" + + if __name__ == "__main__": test_bootstrap_walks_drive_and_subfolders() test_bootstrap_reports_duplicates_on_second_run() test_bootstrap_skips_files_older_than_maxAgeDays() test_bootstrap_passes_connection_provenance() + test_bootstrap_skips_when_no_datasources() print("OK — bootstrapGdrive tests passed") diff --git a/tests/unit/services/test_bootstrap_outlook.py b/tests/unit/services/test_bootstrap_outlook.py index 26664eaa..c5fea524 100644 --- a/tests/unit/services/test_bootstrap_outlook.py +++ b/tests/unit/services/test_bootstrap_outlook.py @@ -111,6 +111,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent(): async def _run(): return await bootstrapOutlook( connectionId="c1", + dataSources=[{"id": "ds1", "neutralize": False}], adapter=adapter, connection=connection, knowledgeService=knowledge, @@ -129,6 +130,7 @@ def test_bootstrap_outlook_indexes_messages_from_inbox_and_sent(): assert job.sourceKind == "outlook_message" assert job.mimeType == "message/rfc822" assert job.provenance["connectionId"] == "c1" + assert job.provenance["dataSourceId"] == "ds1" assert job.provenance["service"] == "outlook" assert job.contentVersion == "ck1" assert any(co["contentObjectId"] == "header" for co in job.contentObjects) @@ -146,6 +148,7 @@ def test_bootstrap_outlook_follows_pagination(): async def _run(): return await bootstrapOutlook( connectionId="c1", + dataSources=[{"id": "ds1", "neutralize": False}], adapter=adapter, connection=connection, knowledgeService=knowledge, @@ -171,6 +174,7 @@ def test_bootstrap_outlook_reports_duplicates(): async def _run(): return await bootstrapOutlook( connectionId="c1", + dataSources=[{"id": "ds1", "neutralize": False}], adapter=adapter, connection=connection, knowledgeService=knowledge, From c130f49cf902ec700f4945314f1af29a15ac7938 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 12 May 2026 17:49:48 +0200 Subject: [PATCH 04/13] fixed teams --- modules/routes/routeRagInventory.py | 16 +++++++++++++--- .../serviceExtraction/mainServiceExtraction.py | 1 + 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/modules/routes/routeRagInventory.py b/modules/routes/routeRagInventory.py index 08d2a245..37fb330b 100644 --- a/modules/routes/routeRagInventory.py +++ b/modules/routes/routeRagInventory.py @@ -36,6 +36,16 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L connIndexRows = knowledgeIf.db.getRecordset(FileContentIndex, recordFilter={"connectionId": connectionId}) connChunkTotal = len(connIndexRows) + chunksByDs: Dict[str, int] = {} + unassigned = 0 + for idx in connIndexRows: + prov = (idx.get("provenance") if isinstance(idx, dict) else getattr(idx, "provenance", None)) or {} + dsIdRef = prov.get("dataSourceId", "") if isinstance(prov, dict) else "" + if dsIdRef: + chunksByDs[dsIdRef] = chunksByDs.get(dsIdRef, 0) + 1 + else: + unassigned += 1 + dsItems = [] for ds in dataSources: dsId = ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "") @@ -47,11 +57,11 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L "ragIndexEnabled": ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False), "neutralize": ds.get("neutralize") if isinstance(ds, dict) else getattr(ds, "neutralize", False), "lastIndexed": ds.get("lastIndexed") if isinstance(ds, dict) else getattr(ds, "lastIndexed", None), - "chunkCount": 0, + "chunkCount": chunksByDs.get(dsId, 0), }) - if dsItems and connChunkTotal > 0 and len(dsItems) == 1: - dsItems[0]["chunkCount"] = connChunkTotal + if unassigned > 0 and len(dsItems) == 1: + dsItems[0]["chunkCount"] += unassigned jobs = jobService.listJobs(jobType="connection.bootstrap", limit=5) connJobs = [j for j in jobs if (j.get("payload") or {}).get("connectionId") == connectionId] diff --git a/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py b/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py index 13f4a1d3..1ffb8557 100644 --- a/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py +++ b/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py @@ -33,6 +33,7 @@ class ExtractionService: self._interfaceDbComponent = getComponentInterface( context.user, mandateId=context.mandate_id, + featureInstanceId=context.feature_instance_id, ) self._extractorRegistry = getExtractorRegistry() if ExtractionService._sharedChunkerRegistry is None: From 6380f14ebe0b5736db4b9be0503d7f277001ddda Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 12 May 2026 19:16:34 +0200 Subject: [PATCH 05/13] teamsbot anonymous bot working --- .../features/teamsbot/browserBotConnector.py | 8 +++- .../features/teamsbot/datamodelTeamsbot.py | 9 ++++ .../teamsbot/interfaceFeatureTeamsbot.py | 3 ++ .../features/teamsbot/routeFeatureTeamsbot.py | 10 ++-- modules/features/teamsbot/service.py | 46 ++++++++++++++++++- 5 files changed, 69 insertions(+), 7 deletions(-) diff --git a/modules/features/teamsbot/browserBotConnector.py b/modules/features/teamsbot/browserBotConnector.py index 2e76d039..d99fe829 100644 --- a/modules/features/teamsbot/browserBotConnector.py +++ b/modules/features/teamsbot/browserBotConnector.py @@ -40,6 +40,8 @@ class BrowserBotConnector: botAccountPassword: Optional[str] = None, transferMode: str = "auto", debugMode: bool = False, + avatarMediaData: Optional[str] = None, + avatarMediaType: Optional[str] = None, ) -> Dict[str, Any]: """ Send join command to the Browser Bot service. @@ -79,12 +81,16 @@ class BrowserBotConnector: "debugMode": debugMode, } - # Add authenticated join credentials if configured if botAccountEmail and botAccountPassword: payload["botAccountEmail"] = botAccountEmail payload["botAccountPassword"] = botAccountPassword logger.info(f"Bot will join authenticated as {botAccountEmail}") + if avatarMediaData and avatarMediaType: + payload["avatarMediaData"] = avatarMediaData + payload["avatarMediaType"] = avatarMediaType + logger.info(f"Avatar media attached: {avatarMediaType}, {len(avatarMediaData)} chars") + try: async with aiohttp.ClientSession(timeout=_BOT_TIMEOUT) as session: async with session.post(f"{self.botUrl}/api/bot", json=payload) as resp: diff --git a/modules/features/teamsbot/datamodelTeamsbot.py b/modules/features/teamsbot/datamodelTeamsbot.py index 076b0eda..18904525 100644 --- a/modules/features/teamsbot/datamodelTeamsbot.py +++ b/modules/features/teamsbot/datamodelTeamsbot.py @@ -119,6 +119,10 @@ class TeamsbotMeetingModule(PowerOnModel): default=None, description="Default display name for the bot when starting a session from this module", ) + defaultAvatarFileId: Optional[str] = Field( + default=None, + description="FileItem ID for the default avatar image/video shown in the meeting", + ) status: TeamsbotModuleStatus = Field(default=TeamsbotModuleStatus.ACTIVE) @@ -225,6 +229,7 @@ class TeamsbotUserSettings(PowerOnModel): triggerCooldownSeconds: Optional[int] = Field(default=None, description="Trigger cooldown override") contextWindowSegments: Optional[int] = Field(default=None, description="Context window override") debugMode: Optional[bool] = Field(default=None, description="Debug mode override") + avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video override") # ============================================================================ @@ -248,6 +253,7 @@ class TeamsbotConfig(BaseModel): triggerCooldownSeconds: int = Field(default=3, ge=1, le=30, description="Minimum seconds between AI calls") contextWindowSegments: int = Field(default=20, ge=5, le=100, description="Number of transcript segments to include in AI context") debugMode: bool = Field(default=False, description="Enable debug mode: screenshots at every join step for diagnostics") + avatarFileId: Optional[str] = Field(default=None, description="FileItem ID for bot avatar image/video shown in the meeting") def _getEffectiveBrowserBotUrl(self) -> Optional[str]: """Resolve the effective browser bot URL: per-instance config takes priority, then env variable.""" @@ -288,6 +294,7 @@ class CreateMeetingModuleRequest(BaseModel): kpiTargets: Optional[str] = None defaultMeetingLink: Optional[str] = None defaultBotName: Optional[str] = None + defaultAvatarFileId: Optional[str] = None class UpdateMeetingModuleRequest(BaseModel): @@ -300,6 +307,7 @@ class UpdateMeetingModuleRequest(BaseModel): kpiTargets: Optional[str] = None defaultMeetingLink: Optional[str] = None defaultBotName: Optional[str] = None + defaultAvatarFileId: Optional[str] = None status: Optional[TeamsbotModuleStatus] = None @@ -317,6 +325,7 @@ class TeamsbotConfigUpdateRequest(BaseModel): triggerCooldownSeconds: Optional[int] = None contextWindowSegments: Optional[int] = None debugMode: Optional[bool] = None + avatarFileId: Optional[str] = None # ============================================================================ diff --git a/modules/features/teamsbot/interfaceFeatureTeamsbot.py b/modules/features/teamsbot/interfaceFeatureTeamsbot.py index 8491b3b9..2bfe77ff 100644 --- a/modules/features/teamsbot/interfaceFeatureTeamsbot.py +++ b/modules/features/teamsbot/interfaceFeatureTeamsbot.py @@ -25,6 +25,7 @@ from .datamodelTeamsbot import ( TeamsbotDirectorPromptStatus, TeamsbotDirectorPromptMode, TeamsbotMeetingModule, + TeamsbotModuleStatus, ) logger = logging.getLogger(__name__) @@ -338,6 +339,8 @@ class TeamsbotObjects: def getModules(self, instanceId: str) -> List[Dict[str, Any]]: """Get all meeting modules for a feature instance.""" records = self.db.getRecordset(TeamsbotMeetingModule, recordFilter={"instanceId": instanceId}) + for r in records: + r.setdefault("status", TeamsbotModuleStatus.ACTIVE.value) records.sort(key=lambda r: r.get("sysCreatedAt") or "", reverse=True) return records diff --git a/modules/features/teamsbot/routeFeatureTeamsbot.py b/modules/features/teamsbot/routeFeatureTeamsbot.py index b3088f8e..f07c98c5 100644 --- a/modules/features/teamsbot/routeFeatureTeamsbot.py +++ b/modules/features/teamsbot/routeFeatureTeamsbot.py @@ -40,6 +40,7 @@ from .datamodelTeamsbot import ( TeamsbotDirectorPromptMode, TeamsbotDirectorPromptStatus, TeamsbotMeetingModule, + TeamsbotModuleStatus, CreateMeetingModuleRequest, UpdateMeetingModuleRequest, DIRECTOR_PROMPT_FILE_LIMIT, @@ -203,6 +204,7 @@ async def createModule( data["instanceId"] = instanceId data["mandateId"] = mandateId data["ownerUserId"] = str(context.user.id) + data.setdefault("status", TeamsbotModuleStatus.ACTIVE.value) module = interface.createModule(data) return {"module": module} @@ -688,12 +690,10 @@ def _getEffectiveConfig(instanceId: str, userId: str, interface) -> TeamsbotConf if not userSettings: return baseConfig - # Merge: user settings override instance defaults (only non-None values) + # Merge: user settings override instance defaults (only non-None values). + # Derive mergeable fields from TeamsbotConfig so new fields are picked up automatically. overrides = {} - for field in ["botName", "aiSystemPrompt", "responseMode", - "responseChannel", "transferMode", "language", "voiceId", - "triggerIntervalSeconds", "triggerCooldownSeconds", "contextWindowSegments", - "debugMode"]: + for field in TeamsbotConfig.model_fields: value = userSettings.get(field) if value is not None: overrides[field] = value diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index 8017e6dc..2136d8e0 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -732,6 +732,12 @@ class TeamsbotService: hasAuth = bool(botAccountEmail and botAccountPassword) logger.info(f"Joining meeting for session {sessionId}: auth={hasAuth}, email={botAccountEmail or 'N/A'}, transferMode={self.config.transferMode}") + avatarMediaData = None + avatarMediaType = None + avatarFileId = self._resolveAvatarFileId(session, interface) + if avatarFileId: + avatarMediaData, avatarMediaType = self._loadAvatarFileData(avatarFileId, interface) + result = await self.browserBotConnector.joinMeeting( sessionId=sessionId, meetingUrl=meetingLink, @@ -743,6 +749,8 @@ class TeamsbotService: botAccountPassword=botAccountPassword, transferMode=self.config.transferMode if hasattr(self.config, 'transferMode') else "auto", debugMode=self.config.debugMode if hasattr(self.config, 'debugMode') else False, + avatarMediaData=avatarMediaData, + avatarMediaType=avatarMediaType, ) if result.get("success"): @@ -767,6 +775,37 @@ class TeamsbotService: }) await _emitSessionEvent(sessionId, "statusChange", {"status": "error", "errorMessage": str(e)}) + def _resolveAvatarFileId(self, session, interface): + """Resolve avatarFileId: module override > config default.""" + moduleId = session.get("moduleId") + if moduleId: + module = interface.getModule(moduleId) + if module and module.get("defaultAvatarFileId"): + return module["defaultAvatarFileId"] + return getattr(self.config, "avatarFileId", None) + + def _loadAvatarFileData(self, fileId, _teamsbotInterface): + """Load avatar file as base64 data + mime type. Returns (data, mimeType) or (None, None).""" + import base64 + from modules.interfaces import interfaceDbManagement + try: + mgmt = interfaceDbManagement.getInterface(self.currentUser, self.mandateId) + fileRecord = mgmt.getFile(fileId) + if not fileRecord: + logger.warning(f"Avatar file {fileId} not found") + return None, None + mimeType = getattr(fileRecord, "mimeType", None) or "image/png" + rawBytes = mgmt.getFileData(fileId) + if not rawBytes: + logger.warning(f"Avatar file {fileId} has no data") + return None, None + b64 = base64.b64encode(rawBytes).decode("ascii") + logger.info(f"Avatar file loaded: {fileId}, {mimeType}, {len(b64)} chars base64") + return b64, mimeType + except Exception as e: + logger.error(f"Failed to load avatar file {fileId}: {e}") + return None, None + async def leaveMeeting(self, sessionId: str): """Send leave command to the Browser Bot service.""" from . import interfaceFeatureTeamsbot as interfaceDb @@ -1217,6 +1256,12 @@ class TeamsbotService: if self.config.botName: phraseHints.append(self.config.botName) + # Sprache kommt ausschliesslich aus der Session/Instance-Konfig + # (TeamsbotUserSettings.language ueberschreibt + # TeamsbotConfig.language, Fallback de-DE im Schema). + # KEIN hardcodierter Alternative-Sprachen-Pool — der hat dafuer + # gesorgt, dass Google STT bei verrauschter Audio auf en-US + # gesprungen ist und englisches Kauderwelsch geliefert hat. sttResult = await voiceInterface.speechToText( audioContent=audioBytes, language=self.config.language or "de-DE", @@ -1224,7 +1269,6 @@ class TeamsbotService: channels=1, skipFallbacks=True, phraseHints=phraseHints if phraseHints else None, - alternativeLanguages=["en-US"], audioFormat="linear16", ) From 37187459312b85b781a23cd55fa4790bd6921bb3 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 12 May 2026 21:31:28 +0200 Subject: [PATCH 06/13] teamsbot auth fixes --- modules/interfaces/interfaceDbManagement.py | 38 ++++++++++++--------- modules/routes/routeDataFiles.py | 2 +- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/modules/interfaces/interfaceDbManagement.py b/modules/interfaces/interfaceDbManagement.py index f74de871..6a3c27b5 100644 --- a/modules/interfaces/interfaceDbManagement.py +++ b/modules/interfaces/interfaceDbManagement.py @@ -1274,17 +1274,20 @@ class ComponentObjects: if getattr(permissions, "update", None) != AccessLevel.ALL: raise PermissionError("Setting global scope requires ALL permission") - self.db.recordModify(FileFolder, folderId, {"scope": scope}) + allFolderIds = self._collectChildFolderIds(folderId) + for fid in allFolderIds: + self.db.recordModify(FileFolder, fid, {"scope": scope}) filesUpdated = 0 if cascadeToFiles: - items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId}) - for item in items: - owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None) - if owner == self.userId: - iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None) - self.db.recordModify(FileItem, iid, {"scope": scope}) - filesUpdated += 1 + for fid in allFolderIds: + items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid}) + for item in items: + owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None) + if owner == self.userId: + iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None) + self.db.recordModify(FileItem, iid, {"scope": scope}) + filesUpdated += 1 return {"folderId": folderId, "scope": scope, "filesUpdated": filesUpdated} @@ -1294,16 +1297,19 @@ class ComponentObjects: raise FileNotFoundError(f"Folder {folderId} not found") self._requireFolderWriteAccess(folder, folderId, "update") - self.db.recordModify(FileFolder, folderId, {"neutralize": neutralize}) + allFolderIds = self._collectChildFolderIds(folderId) + for fid in allFolderIds: + self.db.recordModify(FileFolder, fid, {"neutralize": neutralize}) - items = self.db.getRecordset(FileItem, recordFilter={"folderId": folderId}) filesUpdated = 0 - for item in items: - owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None) - if owner == self.userId: - iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None) - self.db.recordModify(FileItem, iid, {"neutralize": neutralize}) - filesUpdated += 1 + for fid in allFolderIds: + items = self.db.getRecordset(FileItem, recordFilter={"folderId": fid}) + for item in items: + owner = item.get("sysCreatedBy") if isinstance(item, dict) else getattr(item, "sysCreatedBy", None) + if owner == self.userId: + iid = item.get("id") if isinstance(item, dict) else getattr(item, "id", None) + self.db.recordModify(FileItem, iid, {"neutralize": neutralize}) + filesUpdated += 1 return {"folderId": folderId, "neutralize": neutralize, "filesUpdated": filesUpdated} diff --git a/modules/routes/routeDataFiles.py b/modules/routes/routeDataFiles.py index 3a951f3e..b22dacae 100644 --- a/modules/routes/routeDataFiles.py +++ b/modules/routes/routeDataFiles.py @@ -413,7 +413,7 @@ def patch_folder_scope( scope = body.get("scope") if not scope: raise HTTPException(status_code=400, detail="scope is required") - cascadeToFiles = body.get("cascadeToFiles", False) + cascadeToFiles = body.get("cascadeChildren", body.get("cascadeToFiles", False)) managementInterface = interfaceDbManagement.getInterface( currentUser, mandateId=str(context.mandateId) if context.mandateId else None, From 16ab816c65a3a85b82b5e68e25ecd847cf438b3f Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 12 May 2026 22:39:42 +0200 Subject: [PATCH 07/13] teamsbot ux fixes --- modules/features/teamsbot/service.py | 62 ++++++- .../serviceAgent/coreTools/_workspaceTools.py | 156 +++++++++++++++++- .../extractors/extractorContainer.py | 10 +- .../extractors/extractorEmail.py | 46 ++++-- requirements.txt | 3 + 5 files changed, 249 insertions(+), 28 deletions(-) diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index 2136d8e0..93cc27a2 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -3290,16 +3290,50 @@ class TeamsbotService: self, lastToolLabel: Optional[str] = None ) -> Optional[str]: """Per-round progress notice for long agent runs (meeting voice / - chat, ephemeral). Phrasing is AI-localised once per session; - ``{activity}`` placeholder is substituted with the tool's - ``displayLabel`` from the ToolDefinition. Returns ``None`` if - generation failed.""" - activity = lastToolLabel or "processing your request" - return await self._pickEphemeralPhrase( - "agentRound", - substitutions={"activity": activity}, + chat, ephemeral). Generates a single short phrase in the bot's + configured language that describes the current activity. Unlike + the cached ephemeral phrases, this is a per-call AI generation + to avoid mixing English displayLabels into non-English speech.""" + targetLang = (self.config.language or "").strip() or "en-US" + botName = (self.config.botName or "the assistant").strip() + activityHint = lastToolLabel or "working on the task" + + prompt = ( + f"You are a meeting assistant named '{botName}'.\n" + f"Target spoken language (BCP-47): {targetLang}\n\n" + f"The assistant is currently busy with: {activityHint}\n\n" + f"Generate ONE short sentence (max 12 words) in {targetLang} " + f"that tells the audience what the assistant is doing right now. " + f"Natural, spoken style. No step numbers. No quotes around the output.\n" + f"Output ONLY the sentence, nothing else." ) + try: + aiService = createAiService( + self.currentUser, self.mandateId, self.instanceId + ) + await aiService.ensureAiObjectsInitialized() + request = AiCallRequest( + prompt=prompt, + context="", + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.SPEED, + ), + ) + response = await aiService.callAi(request) + except Exception as aiErr: + logger.debug(f"Agent round phrase generation failed: {aiErr}") + return None + + if not response or response.errorCount != 0 or not response.content: + return None + + result = response.content.strip().strip('"').strip("'") + if len(result) > 200: + result = result[:200] + return result + async def _notifyMeetingEphemeral(self, sessionId: str, text: str) -> None: """Deliver a short line to the meeting (TTS + chat per config) without persisting botResponses/transcripts, so the main agent answer stays the @@ -3455,6 +3489,18 @@ class TeamsbotService: "promptId": promptId, "status": "toolCall", "toolName": toolName, + "displayLabel": lastToolLabel, + }) + elif event.type == AgentEventTypeEnum.TOOL_RESULT: + evtData = event.data or {} + resultSnippet = (evtData.get("data") or "")[:200] + await _emitSessionEvent(sessionId, "agentRun", { + "source": sourceLabel, + "promptId": promptId, + "status": "toolResult", + "toolName": evtData.get("toolName", ""), + "success": evtData.get("success", True), + "summary": resultSnippet, }) elif event.type == AgentEventTypeEnum.FILE_CREATED: await _emitSessionEvent(sessionId, "documentCreated", event.data or {}) diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py index c6584735..ed30538a 100644 --- a/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py +++ b/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py @@ -310,11 +310,15 @@ def _registerWorkspaceTools(registry: ToolRegistry, services): return ToolResult(toolCallId="", toolName="writeFile", success=False, error="name is required for mode=create") fileItem, _ = dbMgmt.saveUploadedFile(content.encode("utf-8"), name) fiId = context.get("featureInstanceId") or (services.featureInstanceId if services else "") + updateFields: Dict[str, Any] = {} if fiId: - dbMgmt.updateFile(fileItem.id, {"featureInstanceId": fiId}) - # File group tree removed — groupId arg and instance-group assignment no longer apply + updateFields["featureInstanceId"] = fiId + if args.get("folderId"): + updateFields["folderId"] = args["folderId"] if args.get("tags"): - dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]}) + updateFields["tags"] = args["tags"] + if updateFields: + dbMgmt.updateFile(fileItem.id, updateFields) chatDocId = _attachFileAsChatDocument( services, fileItem, @@ -429,7 +433,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services): "writeFile", _writeFile, description=( "Create, append, or overwrite a file. Modes:\n" - "- create (default): create a new file (name required).\n" + "- create (default): create a new file (name required). Use folderId to place it in a specific folder.\n" "- append: append content to an existing file (fileId required). " "Use for large content that exceeds a single tool call (~8000 chars per call).\n" "- overwrite: replace entire file content (fileId required).\n" @@ -445,7 +449,7 @@ def _registerWorkspaceTools(registry: ToolRegistry, services): "content": {"type": "string", "description": "Content to write/append"}, "mode": {"type": "string", "enum": ["create", "append", "overwrite"], "description": "Write mode (default: create)"}, "fileId": {"type": "string", "description": "File ID (required for mode=append/overwrite)"}, - "groupId": {"type": "string", "description": "Group ID to place the file in (mode=create only). Omit to use the instance default group."}, + "folderId": {"type": "string", "description": "Folder ID to place the file in (mode=create only). Use listFolders to find IDs. Omit for root."}, "tags": {"type": "array", "items": {"type": "string"}, "description": "Tags (mode=create only)"}, }, "required": ["content"] @@ -704,7 +708,147 @@ def _registerWorkspaceTools(registry: ToolRegistry, services): readOnly=False ) - # Group tree tools removed — file grouping now uses view-based display grouping (TableListView) + # ---- Folder management tools ---- + + async def _createFolder(args: Dict[str, Any], context: Dict[str, Any]): + name = args.get("name", "") + parentId = args.get("parentId") or None + if not name: + return ToolResult(toolCallId="", toolName="createFolder", success=False, error="name is required") + try: + chatService = services.chat + dbMgmt = chatService.interfaceDbComponent + folder = dbMgmt.createFolder(name, parentId=parentId) + folderId = folder.get("id") if isinstance(folder, dict) else getattr(folder, "id", None) + folderName = folder.get("name") if isinstance(folder, dict) else getattr(folder, "name", name) + return ToolResult( + toolCallId="", toolName="createFolder", success=True, + data=f"Folder '{folderName}' created (id: {folderId})" + (f" inside parent {parentId}" if parentId else ""), + sideEvents=[{"type": "folderCreated", "data": {"folderId": folderId, "folderName": folderName, "parentId": parentId}}], + ) + except Exception as e: + return ToolResult(toolCallId="", toolName="createFolder", success=False, error=str(e)) + + async def _listFolders(args: Dict[str, Any], context: Dict[str, Any]): + try: + chatService = services.chat + dbMgmt = chatService.interfaceDbComponent + folders = dbMgmt.getOwnFolderTree() + if not folders: + return ToolResult(toolCallId="", toolName="listFolders", success=True, data="No folders found.") + lines = [] + folderMap: Dict[Optional[str], List] = {} + for f in folders: + pid = f.get("parentId") if isinstance(f, dict) else getattr(f, "parentId", None) + folderMap.setdefault(pid, []).append(f) + + def _walk(parentId: Optional[str], indent: int): + for f in sorted(folderMap.get(parentId, []), key=lambda x: (x.get("name") if isinstance(x, dict) else getattr(x, "name", "")).lower()): + fId = f.get("id") if isinstance(f, dict) else getattr(f, "id", "") + fName = f.get("name") if isinstance(f, dict) else getattr(f, "name", "") + prefix = " " * indent + lines.append(f"{prefix}- {fName} (id: {fId})") + _walk(fId, indent + 1) + + _walk(None, 0) + return ToolResult(toolCallId="", toolName="listFolders", success=True, data="\n".join(lines)) + except Exception as e: + return ToolResult(toolCallId="", toolName="listFolders", success=False, error=str(e)) + + async def _moveFile(args: Dict[str, Any], context: Dict[str, Any]): + fileId = args.get("fileId", "") + folderId = args.get("folderId") + if not fileId: + return ToolResult(toolCallId="", toolName="moveFile", success=False, error="fileId is required") + try: + chatService = services.chat + dbMgmt = chatService.interfaceDbComponent + file = dbMgmt.getFile(fileId) + if not file: + return ToolResult(toolCallId="", toolName="moveFile", success=False, error=f"File {fileId} not found") + dbMgmt.updateFile(fileId, {"folderId": folderId or None}) + targetLabel = f"folder {folderId}" if folderId else "root" + return ToolResult( + toolCallId="", toolName="moveFile", success=True, + data=f"File '{file.fileName}' (id: {fileId}) moved to {targetLabel}", + sideEvents=[{"type": "fileUpdated", "data": {"fileId": fileId, "fileName": file.fileName}}], + ) + except Exception as e: + return ToolResult(toolCallId="", toolName="moveFile", success=False, error=str(e)) + + registry.register( + "createFolder", _createFolder, + description=( + "Create a new folder in the workspace file tree. " + "Use parentId to create nested folders. Returns the new folder ID." + ), + parameters={ + "type": "object", + "properties": { + "name": {"type": "string", "description": "Folder name"}, + "parentId": {"type": "string", "description": "Parent folder ID for nesting. Omit to create at root level."}, + }, + "required": ["name"] + }, + readOnly=False + ) + + registry.register( + "listFolders", _listFolders, + description=( + "List all folders in the workspace as an indented tree. " + "Use to find folder IDs for createFolder (parentId), writeFile (folderId), or moveFile." + ), + parameters={"type": "object", "properties": {}}, + readOnly=True + ) + + async def _renameFolder(args: Dict[str, Any], context: Dict[str, Any]): + folderId = args.get("folderId", "") + newName = args.get("newName", "") + if not folderId or not newName: + return ToolResult(toolCallId="", toolName="renameFolder", success=False, error="folderId and newName are required") + try: + chatService = services.chat + dbMgmt = chatService.interfaceDbComponent + folder = dbMgmt.renameFolder(folderId, newName) + return ToolResult( + toolCallId="", toolName="renameFolder", success=True, + data=f"Folder {folderId} renamed to '{newName}'", + sideEvents=[{"type": "folderUpdated", "data": {"folderId": folderId, "folderName": newName}}], + ) + except Exception as e: + return ToolResult(toolCallId="", toolName="renameFolder", success=False, error=str(e)) + + registry.register( + "renameFolder", _renameFolder, + description="Rename an existing folder in the workspace file tree.", + parameters={ + "type": "object", + "properties": { + "folderId": {"type": "string", "description": "The folder ID to rename"}, + "newName": {"type": "string", "description": "New folder name"}, + }, + "required": ["folderId", "newName"] + }, + readOnly=False + ) + + registry.register( + "moveFile", _moveFile, + description=( + "Move a file into a specific folder. Set folderId to null or omit to move the file back to the root level." + ), + parameters={ + "type": "object", + "properties": { + "fileId": {"type": "string", "description": "The file ID to move"}, + "folderId": {"type": "string", "description": "Target folder ID. Omit or null to move to root."}, + }, + "required": ["fileId"] + }, + readOnly=False + ) registry.register( "replaceInFile", _replaceInFile, diff --git a/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py b/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py index 941168d5..a7b06266 100644 --- a/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py +++ b/modules/serviceCenter/services/serviceExtraction/extractors/extractorContainer.py @@ -77,6 +77,7 @@ class ContainerExtractor(Extractor): """Extract by recursively unpacking the container.""" fileName = context.get("fileName", "archive") mimeType = context.get("mimeType", "application/octet-stream") + cascadeDepth = context.get("_cascadeDepth", 0) rootId = makeId() parts: List[ContentPart] = [ @@ -97,7 +98,7 @@ class ContainerExtractor(Extractor): parts.extend(lazy) return parts - state = {"totalSize": 0, "fileCount": 0} + state = {"totalSize": 0, "fileCount": 0, "cascadeDepth": cascadeDepth} try: childParts = _resolveContainerRecursive( fileBytes, mimeType, fileName, rootId, "", 0, state @@ -209,7 +210,12 @@ def _addFilePart( if extractor and not isinstance(extractor, ContainerExtractor): try: - childParts = extractor.extract(data, {"fileName": fileName, "mimeType": detectedMime}) + cascadeDepth = state.get("cascadeDepth", 0) + childParts = extractor.extract(data, { + "fileName": fileName, + "mimeType": detectedMime, + "_cascadeDepth": cascadeDepth + 1, + }) for part in childParts: part.parentId = parentId if not part.metadata: diff --git a/modules/serviceCenter/services/serviceExtraction/extractors/extractorEmail.py b/modules/serviceCenter/services/serviceExtraction/extractors/extractorEmail.py index 2c4295ab..7f750835 100644 --- a/modules/serviceCenter/services/serviceExtraction/extractors/extractorEmail.py +++ b/modules/serviceCenter/services/serviceExtraction/extractors/extractorEmail.py @@ -53,12 +53,13 @@ class EmailExtractor(Extractor): def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]: fileName = context.get("fileName", "email") lower = (fileName or "").lower() + depth = context.get("_cascadeDepth", 0) if lower.endswith(".msg"): - return self._extractMsg(fileBytes, fileName) - return self._extractEml(fileBytes, fileName) + return self._extractMsg(fileBytes, fileName, depth) + return self._extractEml(fileBytes, fileName, depth) - def _extractEml(self, fileBytes: bytes, fileName: str) -> List[ContentPart]: + def _extractEml(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]: """Parse standard EML (RFC 822) using stdlib email.""" rootId = makeId() parts: List[ContentPart] = [] @@ -91,7 +92,7 @@ class EmailExtractor(Extractor): attachName = part.get_filename() or "attachment" attachData = part.get_payload(decode=True) if attachData: - parts.extend(_delegateAttachment(attachData, attachName, rootId)) + parts.extend(_delegateAttachment(attachData, attachName, rootId, depth)) continue if contentType == "text/plain": @@ -113,7 +114,7 @@ class EmailExtractor(Extractor): return parts - def _extractMsg(self, fileBytes: bytes, fileName: str) -> List[ContentPart]: + def _extractMsg(self, fileBytes: bytes, fileName: str, depth: int = 0) -> List[ContentPart]: """Parse Outlook MSG files using extract-msg (optional).""" rootId = makeId() parts: List[ContentPart] = [] @@ -179,7 +180,7 @@ class EmailExtractor(Extractor): attachName = getattr(attachment, "longFilename", None) or getattr(attachment, "shortFilename", None) or "attachment" attachData = getattr(attachment, "data", None) if attachData: - parts.extend(_delegateAttachment(attachData, attachName, rootId)) + parts.extend(_delegateAttachment(attachData, attachName, rootId, depth)) try: msgFile.close() @@ -199,18 +200,39 @@ def _buildHeaderText(msg) -> str: return "\n".join(lines) -def _delegateAttachment(attachData: bytes, attachName: str, parentId: str) -> List[ContentPart]: - """Delegate an attachment to the appropriate type-specific extractor.""" +_MAX_CASCADE_DEPTH = 10 + +def _delegateAttachment(attachData: bytes, attachName: str, parentId: str, depth: int = 0) -> List[ContentPart]: + """Delegate an attachment to the appropriate type-specific extractor. + + Passes ``_cascadeDepth`` through the context so nested Email→Container→Email + chains share a global depth counter and don't recurse infinitely. + """ + if depth >= _MAX_CASCADE_DEPTH: + logger.warning(f"Cascade depth {depth} reached for {attachName}, skipping extraction") + import base64 + encodedData = base64.b64encode(attachData).decode("utf-8") if attachData else "" + return [ContentPart( + id=makeId(), parentId=parentId, label=attachName, + typeGroup="binary", mimeType="application/octet-stream", + data=encodedData, + metadata={"size": len(attachData), "emailAttachment": attachName, "cascadeDepthExceeded": True}, + )] + guessedMime, _ = mimetypes.guess_type(attachName) detectedMime = guessedMime or "application/octet-stream" - from ..subRegistry import ExtractorRegistry - registry = ExtractorRegistry() + from ..subRegistry import getExtractorRegistry + registry = getExtractorRegistry() extractor = registry.resolve(detectedMime, attachName) - if extractor and not isinstance(extractor, EmailExtractor): + if extractor: try: - childParts = extractor.extract(attachData, {"fileName": attachName, "mimeType": detectedMime}) + childParts = extractor.extract(attachData, { + "fileName": attachName, + "mimeType": detectedMime, + "_cascadeDepth": depth + 1, + }) for part in childParts: part.parentId = parentId if not part.metadata: diff --git a/requirements.txt b/requirements.txt index f5ffb715..2d2f5ee5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -110,6 +110,9 @@ asyncpg==0.30.0 ## Stripe payments stripe>=11.0.0 +## Outlook MSG file extraction +extract-msg>=0.55.0 + ## Geospatial libraries for STAC connector pyproj>=3.6.0 # For coordinate transformations (EPSG:2056 <-> EPSG:4326) shapely>=2.0.0 # For geometric operations (intersections, area calculations) From e3284994d0ec90de43e9d5ff74fd853f830bba61 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 12 May 2026 23:33:43 +0200 Subject: [PATCH 08/13] fixes stt paras --- modules/features/teamsbot/service.py | 76 ++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index 93cc27a2..fcce44bd 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -602,6 +602,13 @@ class TeamsbotService: self._lastTranscriptText: Optional[str] = None self._lastTranscriptId: Optional[str] = None self._lastSttTime: float = 0.0 + + # Audio chunk aggregation: collect chunks and send to STT only + # after a speech pause or when the buffer reaches a target duration. + self._audioBuffer: bytes = b"" + self._audioBufferStartTime: float = 0.0 + self._audioBufferLastChunkTime: float = 0.0 + self._audioBufferSampleRate: int = 16000 self._lastBotResponseText: Optional[str] = None self._lastBotResponseTs: float = 0.0 @@ -1203,6 +1210,14 @@ class TeamsbotService: interface.updateSession(sessionId, updates) await _emitSessionEvent(sessionId, "statusChange", {"status": status, "errorMessage": errorMessage}) + # Flush remaining audio buffer before generating summary + if dbStatus in [TeamsbotSessionStatus.ENDED.value, TeamsbotSessionStatus.ERROR.value]: + if self._audioBuffer: + logger.info(f"[AudioChunk] Flushing remaining buffer on session end ({len(self._audioBuffer)} bytes)") + self._audioBuffer = b"" + self._audioBufferStartTime = 0.0 + self._audioBufferLastChunkTime = 0.0 + # Generate summary when session ends if dbStatus == TeamsbotSessionStatus.ENDED.value: asyncio.create_task(self._generateMeetingSummary(sessionId)) @@ -1217,11 +1232,18 @@ class TeamsbotService: voiceInterface, websocket: WebSocket, ): - """Process an audio chunk from WebRTC capture — run STT and feed into transcript pipeline.""" + """Process an audio chunk from WebRTC capture. The bot-side VAD + (AudioWorklet / ScriptProcessor) already segments speech into 1-8s + voiced chunks. Here we apply a minimum-duration safety net: very short + chunks (<1s) are buffered until they reach 1s; everything else goes + straight to STT. A wall-clock timeout flushes stale buffers.""" import base64 + _MIN_CHUNK_SEC = 1.0 + _STALE_TIMEOUT_SEC = 3.0 + try: audioBytes = base64.b64decode(audioBase64) - if len(audioBytes) < 1000: + if len(audioBytes) < 500: return if captureDiagnostics: @@ -1234,14 +1256,12 @@ class TeamsbotService: f"rms={rms}, nativeRate={nativeSampleRate}, bytes={len(audioBytes)}" ) - # Use RMS from capture diagnostics to skip real silence. - # Byte-variation heuristics produced false positives and dropped valid speech. + isSilent = False if captureDiagnostics and captureDiagnostics.get("rms") is not None: try: rmsVal = float(captureDiagnostics.get("rms")) if rmsVal < 0.0003: - logger.debug(f"[AudioChunk] Skipping silent audio ({len(audioBytes)} bytes, rms={rmsVal:.6f})") - return + isSilent = True except Exception: pass @@ -1249,23 +1269,47 @@ class TeamsbotService: logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}") return - # Treat sampleRate=0 as unknown (triggers auto-detection) - effectiveSampleRate = sampleRate if sampleRate and sampleRate > 0 else None + now = time.time() + effectiveRate = sampleRate if sampleRate and sampleRate > 0 else 16000 + + if not isSilent: + if not self._audioBuffer: + self._audioBufferStartTime = now + self._audioBuffer += audioBytes + self._audioBufferLastChunkTime = now + self._audioBufferSampleRate = effectiveRate + + bufferDuration = len(self._audioBuffer) / (effectiveRate * 2) if self._audioBuffer else 0.0 + bufferAge = (now - self._audioBufferStartTime) if self._audioBuffer else 0.0 + + shouldFlush = ( + self._audioBuffer + and ( + bufferDuration >= _MIN_CHUNK_SEC + or (bufferAge >= _STALE_TIMEOUT_SEC and bufferDuration > 0.3) + ) + ) + + if not shouldFlush: + return + + flushBytes = self._audioBuffer + flushRate = self._audioBufferSampleRate + self._audioBuffer = b"" + self._audioBufferStartTime = 0.0 + self._audioBufferLastChunkTime = 0.0 + + flushDuration = len(flushBytes) / (flushRate * 2) + logger.info(f"[AudioChunk] Flushing buffer: {len(flushBytes)} bytes, {flushDuration:.1f}s, {flushRate}Hz") phraseHints = list(self._knownSpeakers) if self.config.botName: phraseHints.append(self.config.botName) - # Sprache kommt ausschliesslich aus der Session/Instance-Konfig - # (TeamsbotUserSettings.language ueberschreibt - # TeamsbotConfig.language, Fallback de-DE im Schema). - # KEIN hardcodierter Alternative-Sprachen-Pool — der hat dafuer - # gesorgt, dass Google STT bei verrauschter Audio auf en-US - # gesprungen ist und englisches Kauderwelsch geliefert hat. sttResult = await voiceInterface.speechToText( - audioContent=audioBytes, + audioContent=flushBytes, language=self.config.language or "de-DE", - sampleRate=effectiveSampleRate, + sampleRate=flushRate, channels=1, skipFallbacks=True, phraseHints=phraseHints if phraseHints else None, From 3b0192e428c8ccfe36c1af74e824b5f09ea55dca Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 12 May 2026 21:35:03 +0000 Subject: [PATCH 09/13] chore: update requirements.lock --- requirements.lock | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/requirements.lock b/requirements.lock index b4ae2798..8bc917e0 100644 --- a/requirements.lock +++ b/requirements.lock @@ -47,7 +47,9 @@ backports-tarfile==1.2.0 bcrypt==4.0.1 # via -r requirements.txt beautifulsoup4==4.12.2 - # via -r requirements.txt + # via + # -r requirements.txt + # extract-msg bleach==6.3.0 # via -r requirements.txt bokeh==3.3.4 @@ -81,6 +83,10 @@ click-plugins==1.1.1.2 # via fiona cligj==0.7.2 # via fiona +colorclass==2.2.2 + # via oletools +compressed-rtf==1.0.7 + # via extract-msg contourpy==1.3.3 # via # bokeh @@ -89,6 +95,7 @@ cryptography==43.0.3 # via # -r requirements.txt # msal + # msoffcrypto-tool # pyjwt # python-jose # secretstorage @@ -102,6 +109,10 @@ dnspython==2.8.0 # via email-validator docutils==0.22.4 # via -r requirements.txt +easygui==0.98.3 + # via oletools +ebcdic==1.1.1 + # via extract-msg ecdsa==0.19.1 # via python-jose email-validator==2.0.0 @@ -110,6 +121,8 @@ et-xmlfile==2.0.0 # via openpyxl executing==2.2.1 # via stack-data +extract-msg==0.55.0 + # via -r requirements.txt fastapi==0.115.0 # via -r requirements.txt fiona==1.10.1 @@ -251,6 +264,8 @@ langgraph-sdk==0.3.3 # via langgraph langsmith==0.6.8 # via langchain-core +lark==1.3.1 + # via rtfde limits==5.6.0 # via slowapi linkify-it-py==2.0.3 @@ -285,6 +300,8 @@ msal==1.24.1 # via # -r requirements.txt # office365-rest-python-client +msoffcrypto-tool==6.0.0 + # via oletools multidict==6.7.1 # via # aiohttp @@ -310,6 +327,15 @@ oauthlib==3.3.1 # via requests-oauthlib office365-rest-python-client==2.6.2 # via -r requirements.txt +olefile==0.47 + # via + # extract-msg + # msoffcrypto-tool + # oletools +oletools==0.60.2 + # via + # pcodedmp + # rtfde openpyxl==3.1.5 # via -r requirements.txt orjson==3.11.7 @@ -345,6 +371,8 @@ parso==0.8.5 # via jedi passlib==1.7.4 # via -r requirements.txt +pcodedmp==1.2.6 + # via oletools pillow==12.1.0 # via # -r requirements.txt @@ -413,6 +441,7 @@ pyparsing==3.3.2 # via # httplib2 # matplotlib + # oletools pypdf2==3.0.1 # via -r requirements.txt pyproj==3.7.2 @@ -453,6 +482,8 @@ pyyaml==6.0.3 # via # bokeh # langchain-core +red-black-tree-mod==1.22 + # via extract-msg referencing==0.37.0 # via # jsonschema @@ -489,6 +520,8 @@ rsa==4.9.1 # via # google-auth # python-jose +rtfde==0.1.2.2 + # via extract-msg seaborn==0.13.0 # via -r requirements.txt secretstorage==3.5.0 @@ -573,7 +606,9 @@ typing-inspection==0.4.2 tzdata==2025.3 # via pandas tzlocal==5.3.1 - # via apscheduler + # via + # apscheduler + # extract-msg uc-micro-py==1.0.3 # via linkify-it-py uritemplate==4.2.0 From f121c99ab798e95c7604546b63090e6fb71dec19 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 12 May 2026 23:44:39 +0200 Subject: [PATCH 10/13] upgr node24 --- .github/workflows/deploy-gcp.yml | 2 +- .github/workflows/int_gateway-int.yml | 8 ++++---- .github/workflows/main_gateway-prod.yml | 8 ++++---- .github/workflows/update-requirements-lock.yml | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/deploy-gcp.yml b/.github/workflows/deploy-gcp.yml index 306c0f60..d8af220d 100644 --- a/.github/workflows/deploy-gcp.yml +++ b/.github/workflows/deploy-gcp.yml @@ -56,7 +56,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Determine environment id: env diff --git a/.github/workflows/int_gateway-int.yml b/.github/workflows/int_gateway-int.yml index 80f93583..0ea8ea9d 100644 --- a/.github/workflows/int_gateway-int.yml +++ b/.github/workflows/int_gateway-int.yml @@ -22,10 +22,10 @@ jobs: contents: read #This is required for actions/checkout steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python version - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.11' @@ -49,7 +49,7 @@ jobs: run: zip release.zip ./* -r - name: Upload artifact for deployment jobs - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: python-app path: | @@ -66,7 +66,7 @@ jobs: steps: - name: Download artifact from build job - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: python-app diff --git a/.github/workflows/main_gateway-prod.yml b/.github/workflows/main_gateway-prod.yml index b45db585..6634091f 100644 --- a/.github/workflows/main_gateway-prod.yml +++ b/.github/workflows/main_gateway-prod.yml @@ -22,10 +22,10 @@ jobs: contents: read #This is required for actions/checkout steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python version - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.11' @@ -49,7 +49,7 @@ jobs: run: zip release.zip ./* -r - name: Upload artifact for deployment jobs - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: python-app path: | @@ -66,7 +66,7 @@ jobs: steps: - name: Download artifact from build job - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: python-app diff --git a/.github/workflows/update-requirements-lock.yml b/.github/workflows/update-requirements-lock.yml index 1236cd3c..b3961874 100644 --- a/.github/workflows/update-requirements-lock.yml +++ b/.github/workflows/update-requirements-lock.yml @@ -25,10 +25,10 @@ jobs: contents: write # push requirements.lock steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.11' From 4a5028405fb9fd9b9bbebbb49f31962e19f4a490 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 12 May 2026 23:50:45 +0200 Subject: [PATCH 11/13] fix val page --- modules/datamodels/datamodelPagination.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/datamodels/datamodelPagination.py b/modules/datamodels/datamodelPagination.py index 10476ccb..259f3880 100644 --- a/modules/datamodels/datamodelPagination.py +++ b/modules/datamodels/datamodelPagination.py @@ -115,7 +115,7 @@ class PaginationParams(BaseModel): Omit or set to None for the default (ungrouped) view. """ page: int = Field(ge=1, description="Current page number (1-based)") - pageSize: int = Field(ge=1, le=1000, description="Number of items per page") + pageSize: int = Field(ge=1, le=10000, description="Number of items per page") sort: List[SortField] = Field(default_factory=list, description="List of sort fields in priority order") filters: Optional[Dict[str, Any]] = Field( default=None, From 2f8abb5ac42997cff00565d9aba7c595ff40f51e Mon Sep 17 00:00:00 2001 From: Ida Date: Fri, 15 May 2026 12:23:50 +0200 Subject: [PATCH 12/13] updated all api keys --- env-gateway-dev.20260515_122326.backup | 97 ++++++++ env-gateway-dev.env | 10 +- env-gateway-int.20260515_122326.backup | 92 +++++++ env-gateway-int.env | 10 +- env-gateway-prod-forgejo.env | 10 +- env-gateway-prod.20260515_122326.backup | 92 +++++++ env-gateway-prod.env | 10 +- .../graphicalEditor/nodeDefinitions/flow.py | 224 ++++++++++++++++-- 8 files changed, 507 insertions(+), 38 deletions(-) create mode 100644 env-gateway-dev.20260515_122326.backup create mode 100644 env-gateway-int.20260515_122326.backup create mode 100644 env-gateway-prod.20260515_122326.backup diff --git a/env-gateway-dev.20260515_122326.backup b/env-gateway-dev.20260515_122326.backup new file mode 100644 index 00000000..0517f627 --- /dev/null +++ b/env-gateway-dev.20260515_122326.backup @@ -0,0 +1,97 @@ +# Development Environment Configuration + +# System Configuration +APP_ENV_TYPE = dev +APP_ENV_LABEL = Development Instance Patrick +APP_API_URL = http://localhost:8000 +APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/notes/key.txt +APP_INIT_PASS_ADMIN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEeFFtRGtQeVUtcjlrU3dab1ZxUm9WSks0MlJVYUtERFlqUElHemZrOGNENk1tcmJNX3Vxc01UMDhlNU40VzZZRVBpUGNmT3podzZrOGhOeEJIUEt4eVlSWG5UYXA3d09DVXlLT21Kb1JYSUU9 +APP_INIT_PASS_EVENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERzZjNm56WGVBdjJTeG5Udjd6OGQwUVotYXUzQjJ1YVNyVXVBa3NZVml3ODU0MVNkZjhWWmJwNUFkc19BcHlHMTU1Q3BRcHU0cDBoZkFlR2l6UEZQU3d2U3MtMDh5UDZteGFoQ0EyMUE1ckE9 + +# PostgreSQL DB Host +DB_HOST=localhost +DB_USER=poweron_dev +DB_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEcUIxNEFfQ2xnS0RrSC1KNnUxTlVvTGZoMHgzaEI4Z3NlVzVROTVLak5Ubi1vaEZubFZaMTFKMGd6MXAxekN2d2NvMy1hRjg2UVhybktlcFA5anZ1WjFlQmZhcXdwaGhWdzRDc3ExeUhzWTg9 +DB_PORT=5432 + +# Security Configuration +APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2ZlUFRlcFdOZ001RnlzM2FhLWhRV2tjWWFhaWQwQ3hkcUFvbThMcndxSjFpYTdfRV9OZGhTcksxbXFTZWg5MDZvOHpCVXBHcDJYaHlJM0tyNWRZckZsVHpQcmxTZHJoZUs1M3lfU2ljRnJaTmNSQ0w0X085OXI0QW80M2xfQnJqZmZ6VEh3TUltX0xzeE42SGtZPQ== +APP_TOKEN_EXPIRY=300 + +# CORS Configuration +APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net + +# Logging configuration +APP_LOGGING_LOG_LEVEL = DEBUG +APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs +APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s +APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S +APP_LOGGING_CONSOLE_ENABLED = True +APP_LOGGING_FILE_ENABLED = True +APP_LOGGING_ROTATION_SIZE = 10485760 +APP_LOGGING_BACKUP_COUNT = 5 + +# OAuth: Auth app (login/JWT) vs Data app (Microsoft Graph / Google APIs). Same IDs until you split apps in Azure / GCP. +Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kxaG9WY1FJaWdCbVFVaTllUlJfU3Y3MmJkRmkzMDVDWUNtZEhlNVhISzJPcy00ZUVZcklYLXFMV0dIODV3NXNSSFBKQ0ZsZllES3diTEgySDF0T1ZCbFZHREZtcXFGSWNZN1NJbzJzczRRQWxoeVNsNzlsa0VzMHJPWHUydjBBclo= +Service_MSFT_AUTH_REDIRECT_URI = http://localhost:8000/api/msft/auth/login/callback +Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyUW96aXFVOVJlLUdyRlVvT1hVU09ILWtMZnV2M19mVUxGMnFPV3FzNTdQa3dTbHVGTDBHTk01ZThLcjh6QUR5VldVZUpfcDlZNTh5YldtLWtjTll6VzJNQ3JCQ3ZubHdmd2JvaExDOXdvQ1pjWDVQTUtFWVAtUHhwS1lFQnJXWk4= +Service_MSFT_DATA_REDIRECT_URI = http://localhost:8000/api/msft/auth/connect/callback + +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kyd1hPd09vcVFtbVg0Sm5Nd1VYVEEtWjZMZkFndmFVS0ZlcTU0dzJnYVYzRkZWbjh0QldyZkhseDV2cUgxYkNHTzF6MXhqQlZ2N0UtbmhPeWRKUHBVdzV0Q1ROaWNuN2xjMmVzMjNZQ2ZYZ3dOTHgxaU5sTGRjVHpfakhYeWF0ZGU= +Service_GOOGLE_AUTH_REDIRECT_URI = http://localhost:8000/api/google/auth/login/callback +Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_DATA_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnFBa1kySXoyd1BmTnhOd1owTUJOWm53WlZMMjFHNGJhSUwyd2NDUW9BanlRWVJPLU5jYzRlcm5QeW96d0JYUkVWVWd2dGNBVEpJbElZY2lWb0o5S0gyNnhoV1pnNXhpSFEyaklZZjcwX2lVU0ktMEJGN01DMDhXQ3k4R1BXc1Q3ejFjOEg= +Service_GOOGLE_DATA_REDIRECT_URI = http://localhost:8000/api/google/auth/connect/callback + +# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. +Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4 +Service_CLICKUP_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd4ZWVBeHVtRnpIT0VBN0tSZDhLRmFmN05DOVBOelJtLWhkVnJDRVBqUkh3bDFTZFRWaWQ1cWowdGNLUk5IQzlGN1J6RFVCaW8zRnBwLVBnclJfdWgxV3pVRzFEV2lwcW5Rc19Xa1ROWXNJcUF0ajZaYUxOUXk0WHRsRmJLM25FaHV5T2IxdV92ZW1nRjhzaGpwU0l2Wm9FTkRnY2lJVjhuNHUwT29salAxYV8wPQ== +Service_CLICKUP_OAUTH_REDIRECT_URI = http://localhost:8000/api/clickup/auth/connect/callback + +# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI. + +# Stripe Billing (both end with _SECRET for encryption script) +STRIPE_SECRET_KEY_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5aHNGejgzQmpTdmprdzQxR19KZkh3MlhYUTNseFN3WnlaWjh2SDZyalN6aU9xSktkbUQwUnZrVnlvbGVRQm4yZFdiRU5aSEk5WVJuUnR4VUwtTm9OVk1WWmJQeU5QaDdib0hfVWV5U1BfYTFXRmdoOWdnOWxkb3JFQmF3bm45UjFUVUxmWGtGRkFKUGd6bmhpQlFnaVI3Q2lLdDlsY1VESk1vOEM0ZFBJNW1qcVZ0N2tPYmRLNmVKajZ2M3o3S05lWnRRVG5LdkRseW4wQ3VjNHNQZTZUdz09 +STRIPE_WEBHOOK_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5dDJMSHBrVk8wTzJhU2xzTTZCZWdvWmU2NGI2WklfRXRJZVUzaVYyOU9GLUZsalUwa2lPdEgtUHo0dVVvRDU1cy1saHJyU0Rxa2xQZjBuakExQzk3bmxBcU9WbEIxUEtpR1JoUFMxZG9ISGRZUXFhdFpSMGxvQUV3a0VLQllfUUtCOHZwTGdteV9rYTFOazBfSlN3ekNWblFpakJlZVlCTmNkWWQ4Sm01a1RCWTlnTlFHWVA0MkZYMlprUExrWFN2V0NVU1BTd1NKczFJbVo3VHpLdlc4UT09 +STRIPE_API_VERSION = 2026-01-28.clover +STRIPE_AUTOMATIC_TAX_ENABLED = false +STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0 + +# AI configuration +Connector_AiOpenai_API_SECRET = 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 +Connector_AiAnthropic_API_SECRET = D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 +Connector_AiPerplexity_API_SECRET = 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 +Connector_AiTavily_API_SECRET = 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 +Connector_AiPrivateLlm_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGRHM5eFdUVmVZU1R1cHBwN1RlMUx4T0NlLTJLUFFVX3J2OElDWFpuZmJHVmp4Z3BNNWMwZUVVZUd2TFhRSjVmVkVlcFlVRWtybXh0ZHloZ01ZcnVvX195YjdlWVdEcjZSWFFTTlNBWUlaTlNoLWhqVFBIb0thVlBiaWhjYjFQOFY= +Connector_AiMistral_API_SECRET = ogaEVD2fFmiIWHDhKn8oGM0FShFxnAtT + +Service_MSFT_TENANT_ID = common + +# Google Cloud Speech Services configuration +Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0= + +# Feature SyncDelta JIRA configuration +Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0= + +# Teamsbot Browser Bot Service +# For local testing: run the bot locally with `npm run dev` in service-teams-browser-bot +# The bot will connect back to localhost:8000 via WebSocket +TEAMSBOT_BROWSER_BOT_URL = http://localhost:4100 + +# Debug Configuration +APP_DEBUG_CHAT_WORKFLOW_ENABLED = True +APP_DEBUG_CHAT_WORKFLOW_DIR = D:/Athi/Local/Web/poweron/local/debug +APP_DEBUG_ACCOUNTING_SYNC_ENABLED = True +APP_DEBUG_ACCOUNTING_SYNC_DIR = D:/Athi/Local/Web/poweron/local/debug/sync + +# Azure Communication Services Email Configuration +MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt +MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss + +# Zurich WFS Parcels (dynamic map layer). Default: Stadt Zürich OGD. Override for full canton if wfs.zh.ch resolves. +# Connector_ZhWfsParcels_WFS_URL = https://wfs.zh.ch/av +# Connector_ZhWfsParcels_TYPENAMES = av_li_liegenschaften_a + diff --git a/env-gateway-dev.env b/env-gateway-dev.env index f4e7e244..fd0ee428 100644 --- a/env-gateway-dev.env +++ b/env-gateway-dev.env @@ -61,12 +61,12 @@ STRIPE_AUTOMATIC_TAX_ENABLED = false STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0 # AI configuration -Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQnBaSnM4TWFRRmxVQmNQblVIYmc1Y0Q3aW9zZUtDWlNWdGZjbFpncGp2NHN2QjkxMWxibUJnZDBId252MWk5TXN3Yk14ajFIdi1CTkx2ZWx2QzF5OFR6LUx5azQ3dnNLaXJBOHNxc0tlWmtZcTFVelF4eXBSM2JkbHd2eTM0VHNXdHNtVUprZWtPVzctNlJsZHNmM20tU1N6Q1Q2cHFYSi1tNlhZNDNabTVuaEVGWmIydEhadTcyMlBURmw2aUJxOF9GTzR0dTZiNGZfOFlHaVpPZ1A1LXhhOEFtN1J5TEVNNWtMcGpyNkMzSl8xRnZsaTF1WTZrOUZmb0cxVURjSGFLS2dIYTQyZEJtTm90bEYxVWxNNXVPdTVjaVhYbXhxT3JsVDM5VjZMVFZKSE1tZnM9 -Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpENmFBWG16STFQUVZxNzZZRzRLYTA4X3lRanF1VkF4cU45OExNMzlsQmdISGFxTUxud1dXODBKcFhMVG9KNjdWVnlTTFFROVc3NDlsdlNHLUJXeG41NDBHaXhHR0VHVWl5UW9RNkVWbmlhakRKVW5pM0R4VHk0LUw0TV9LdkljNHdBLXJua21NQkl2b3l4UkVkMGN1YjBrMmJEeWtMay1jbmxrYWJNbUV0aktCXzU1djR2d2RSQXZORTNwcG92ZUVvVGMtQzQzTTVncEZTRGRtZUFIZWQ0dz09 -Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQnB5dkd5ZmdDZ3hrSElrMnQzNFAtel9wX191VjVzN2g1LWZoa0V1YklubEdmMEJDdEZiR1RWeVZrM3V3enBHX3p6WUtTS0kwYkFyVEF0Nm8zX05CelVQcFJUc0lwVW5iNFczc1p1WWJ2WFBmd0lpLUxxWndEeUh0b2hGUHVpN19vb19nMTBnV1A1VmNpWERVX05lQ29VS20wTjZ3PT0= -Connector_AiTavily_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQTdnUHMwd2pIaXNtMmtCTFREd0pyQXRKb1F5eGtHSnkyOGZiUnlBOFc0b3Vzcndrc3ViRm1nMDJIOEZKYWxqdWNkZGh5N0Z4R0JlQmxXSG5pVnJUR2VYckZhMWNMZ1FNeXJ3enJLVlpiblhOZTNleUg3ZzZyUzRZanFSeDlVMkI= +Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlRzI0aTZXSHhTbklJa1ROdTRJdW94b1p0ZUhRdW55Zzdza2RsUWduSDRhMlVCYnQ4MnA0VU9ZZi10M0tySGwxV2ZweGZQei1FaWU4QThnWVE2LUI2WGhSVWhLbnVkRU5uZDgyVV90TjZXd1czaDdUaXJBU0hNWndfOFZTNzZ4UnNjenlqRWw2T2lBZFphWjNYcmtXSVZhcTUwUGhiYmhMM2lNemM0UFZwUndZVUYtTFUxVUctSEtoWmRkSE1GamJuS0FLMkhmWWNVTmJHemJCRzdhQ0Z6UzVFb1l1b1JjQWliQkt6MlNHeFBHb0h5VlBZWW1NUDNlTWpPV0lqZldjeWs2MExiTHJRdGF4b214c191ZkswVHZCcGVPMzZHaElrbWFvSVlqOXRjTkU9 +Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlbXEzdGc2NFExb3AzVUw0cEhkZzlNRjZxTVlJMV81LTZhVXhoNXBpYUlMN0FxUUJHQlJnS0N3OV85Uk9sa1J3M1lyZExSMWVsbzVSdzdWQUVsUVp2dzhfLThmY2lNb1NhVGlvbnhLR0NnSmhsOVp2RkxfODc2SFpDYlBkcWp4aFFtNldtZGQ2LUhBZVM5VXk4RTNHNzQyV3FnMVNJMW9yOGpRRkQxUC1hZ3NOOHhqV3Y4LWJSNjFYQ3dwQmhrRWJRRzhaX1N4aFlWLTVsaEJmOWxkTjZMZz09 +Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlNV9felVPcHVyMU9kVGhGZEt0MG9iRzRrTVM4TFJvSHhGOVo0U1ROWkdEMzRSWjhtMnFrZUhHTHNXelpLZ014RzRkMlIxZDJwcjEwc1dRamY5ekJMR1VLb2w4eEZqZENBRnFaZlRhb1h5VE05Tml1ZlVBWHBaTkJaZUE5NWprVklva0ZFZnB4cFFudGdkalpmTlBhdV9nPT0= +Connector_AiTavily_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlY1R2WGpuazk5M05SeDIyLWd3bHpKN3lUdlVFdjhvZEJXdlM4bGlBdTB1TjRia051YllDQ2lwM0V3R3dPd2lKVWxoSm9BNWl1ZFFlVkZ5cXh4TFRVU0Z4NVU5WVRjSUJPc01La3JyaVZSNkhYWU9PR00yMENEb0dRT3l5enEwSFlWZVVzTVR0UWQ4eUxvRmZvWHl0c0xRPT0= Connector_AiPrivateLlm_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGRHM5eFdUVmVZU1R1cHBwN1RlMUx4T0NlLTJLUFFVX3J2OElDWFpuZmJHVmp4Z3BNNWMwZUVVZUd2TFhRSjVmVkVlcFlVRWtybXh0ZHloZ01ZcnVvX195YjdlWVdEcjZSWFFTTlNBWUlaTlNoLWhqVFBIb0thVlBiaWhjYjFQOFY= -Connector_AiMistral_API_SECRET = DEV_ENC:Z0FBQUFBQnBudkpGeEQxYUIxOHhia0JlQWpWQ2dWQWZzY3l6SWwyUnJoR1hRQWloX2lxb2lGNkc4UnA4U2tWNjJaYzB1d1hvNG9fWUp1N3V4OW9FMGhaWVhjSlVwWEc1X2loVDBSZDEtdHdfcTA5QkcxQTR4OHc4RkRzclJrU2d1RFZpNDJkRDRURlE= +Connector_AiMistral_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlelh2T2hqNGcxV0hMV1FKbmFDZjVHUWF6T2FXbGlCSnQzSzNXLWJHeXBFWE1nUlh1b1NHY1JRSEVtTVEtc1MtUnZrX2ZCcURqQ2FYNmFWa2xudGJtS3g2eVo4MFZMd09nZTBNMmo1ZHU0bzBJdFRqLVhHSVZNb2Zrc0VkUXI0SVk= Service_MSFT_TENANT_ID = common diff --git a/env-gateway-int.20260515_122326.backup b/env-gateway-int.20260515_122326.backup new file mode 100644 index 00000000..a3033e5a --- /dev/null +++ b/env-gateway-int.20260515_122326.backup @@ -0,0 +1,92 @@ +# Integration Environment Configuration + +# System Configuration +APP_ENV_TYPE = int +APP_ENV_LABEL = Integration Instance +APP_API_URL = https://gateway-int.poweron.swiss +# Force SameSite=None+Secure for auth cookies (cross-site UI on poweron-center.net). Optional if APP_API_URL is https:// +APP_COOKIE_SECURE = true +APP_KEY_SYSVAR = CONFIG_KEY +APP_INIT_PASS_ADMIN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjWm41MWZ4TUZGaVlrX3pWZWNwakJsY3Facm0wLVZDd1VKeTFoZEVZQnItcEdUUnVJS1NXeDBpM2xKbGRsYmxOSmRhc29PZjJSU2txQjdLbUVrTTE1NEJjUXBHbV9NOVJWZUR3QlJkQnJvTEU9 +APP_INIT_PASS_EVENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjdmtrakgxa0djekZVNGtTZV8wM2I5UUpCZllveVBMWXROYk5yS3BiV3JEelJSM09VYTRONHpnY3VtMGxDRk5JTEZSRFhtcDZ0RVRmZ1RicTFhb3c5dVZRQ1o4SmlkLVpPTW5MMTU2eTQ0Vkk9 + +# PostgreSQL DB Host +DB_HOST=gateway-int-server.postgres.database.azure.com +DB_USER=heeshkdlby +DB_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjczYzOUtTa21MMGJVTUQ5UmFfdWc3YlhCbWZOeXFaNEE1QzdJV3BLVjhnalBkLVVCMm5BZzdxdlFXQXc2RHYzLWtPSFZkZE1iWG9rQ1NkVWlpRnF5TURVbnl1cm9iYXlSMGYxd1BGYVc0VDA9 +DB_PORT=5432 + +# Security Configuration +APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZzTEp4aDR0MktWRjNoeVBrY1Npd1R0VE9YVHp3M2w1cXRzbUxNaU82QUJvaDNFeVQyN05KblRWblBvbWtoT0VXbkNBbDQ5OHhwSUFnaDZGRG10Vmgtdm1YUkRsYUhFMzRVZURmSFlDTFIzVWg4MXNueDZyMGc5aVpFdWRxY3dkTExGM093ZTVUZVl5LUhGWnlRPQ== +APP_TOKEN_EXPIRY=300 + +# CORS Configuration +APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net + +# Logging configuration +APP_LOGGING_LOG_LEVEL = DEBUG +APP_LOGGING_LOG_DIR = /home/site/wwwroot/ +APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s +APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S +APP_LOGGING_CONSOLE_ENABLED = True +APP_LOGGING_FILE_ENABLED = True +APP_LOGGING_ROTATION_SIZE = 10485760 +APP_LOGGING_BACKUP_COUNT = 5 + +# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs) +Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kydlVubld1d1h6SUNSWW1aZ3p4X3Zod1NDTjhZVnVYS2lqOERGTFp2OXJ4TGRiNlRLVFpzLUVDTUhkZGhGUWdxa1djdEV5UWkyblN1UHZoaFBjaExNTEpGMG1PRGJEbDdHVll0Ungwcl9JemZ4ZXFzZUNFQmFlZi1DZFlCekU1S3E= +Service_MSFT_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/login/callback +Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyS1hWZXEzUzZTTE5MUlJncVowMU95Y0hmV1hveDBZOWdLU1RIUWt3SGlXNGxVTXVKc2QyQmtmWTlJRU43ZnRDdnlDTGxQY0hTU25CWWFFdDhUem9HU0VYcTFJTVFEbVk0dUhmVzJNVlEzNTNWdjdmaW9WeUVDVW5PRmNFZEQzNTY= +Service_MSFT_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/msft/auth/connect/callback + +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyUTUwNXNGaHRNaGxxbF9sdWJ3Q0xLYU5yOHB4Yk8zMDZvQ29yaEhWOE5JMENXRk5jb2ZBdzRKQ2ZTTld6ZlIxemhOYzN1VE10TjBDRWZEMXlLVWRNYjZ0VG5RZ3I3NWt0SEJzMzdsUmRzcVNmbktRNHZqTUF6a2EyUkVUSFJnZFE= +Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/login/callback +Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_DATA_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnFBa1kyV1FRVjF0c0d3d0dyWU1TdW9HdXVkdHdsVWZKYTJjbGZPRDhMRjA2M0FkaUZIVmhIUmFKNjg2ekFodHd6NG80VTI3TC1icW1LZ01jWVZuQ1pKRm5nMW5UREJEaGp2Wl9oRDRCSmZVT0JpTnkwXzgwY0pkV29yczQ5akF2d1ZGcVY= +Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-int.poweron.swiss/api/google/auth/connect/callback + +# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. +Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4 +Service_CLICKUP_CLIENT_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5SE1uVURMNVE3NkM4cHBKa2R2TjBnLWdpSXI5dHpKWGExZVFiUF95TFNnZ1NwLWFLdmh6eWFZTHVHYTBzU2FGRUpLYkVyM1NvZjZkWDZHN21qUER5ZVNOaGpCc3NrUGd3VnFTclF3OW1nUlVuWXQ1UVhDLVpyb1BwRExOeFpDeVhtbEhDVnd4TVdpbzNBNk5QQWFPdjdza0xBWGxFY1E3WFpCSUlNa1l4RDlBPQ== +Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-int.poweron.swiss/api/clickup/auth/connect/callback + +# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI. + +# Stripe Billing (both end with _SECRET for encryption script) +STRIPE_SECRET_KEY_SECRET = INT_ENC:Z0FBQUFBQnB5dkd5ekdBaGNGVUlOQUpncTlzLWlTV0V5OWZzQkpDczhCUGw4U1JpTHZ0d3pfYlFNWElLRlNiNlNsaDRYTGZUTkg2OUFrTW1GZXpOUjBVbmRQWjN6ekhHd2ZSQ195OHlaeWh1TmxrUm10V2R3YmdncmFLbFMzVjdqcWJMSUJPR2xuSEozclNoZG1rZVBTaWg3OFQ1Qzdxb0wyQ2RKazc2dG1aZXBUTXlvbDZqLS1KOVI5M3BGc3NQZkZRbnFpRjIwWmh2ZHlVNlpxZVo2dWNmMjQ5eW02QmtzUT09 +STRIPE_WEBHOOK_SECRET = INT_ENC:Z0FBQUFBQnA4UXZiUUVqTl9lREVRWTh1aHFDcFpwcXRkOUx4MS1ham9Ddkl6T0xzMnJuM1hhUHdGNG5CenY1MUg4RlJBOGFQTWl5cVd5MjJ2REItcHYyRmdLX3ZlT2p5Z3BRVkMtQnRoTVkteXlfaU92MVBtOEI0Ni1kbGlfa0NiRmFRRXNHLVE2NHI= +STRIPE_API_VERSION = 2026-01-28.clover +STRIPE_AUTOMATIC_TAX_ENABLED = false +STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0 + +# AI configuration +Connector_AiOpenai_API_SECRET = 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 +Connector_AiAnthropic_API_SECRET = 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 +Connector_AiPerplexity_API_SECRET = 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 +Connector_AiTavily_API_SECRET = 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 +Connector_AiPrivateLlm_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGSjZ1NWh0aWc1R3Z4MHNaeS1HamtUbndhcUZFZDlqUDhjSmg5eHFfdlVkU0RsVkJ2UVRaMWs3aWhraG5jSlc0YkxNWHVmR2JoSW5ENFFCdkJBM0VienlKSnhzNnBKbTJOUTFKczRfWlQ3bWpmUkRTT1I1OGNUSTlQdExacGRpeXg= +Connector_AiMistral_API_SECRET = ogaEVD2fFmiIWHDhKn8oGM0FShFxnAtT + +Service_MSFT_TENANT_ID = common + +# Google Cloud Speech Services configuration +Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0= + +# Feature SyncDelta JIRA configuration +Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0= + +# Teamsbot Browser Bot Service +TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io + +# Debug Configuration +APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE +APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat +APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE +APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync + +# Azure Communication Services Email Configuration +MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt +MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss diff --git a/env-gateway-int.env b/env-gateway-int.env index 0898a985..7301ecc9 100644 --- a/env-gateway-int.env +++ b/env-gateway-int.env @@ -63,12 +63,12 @@ STRIPE_AUTOMATIC_TAX_ENABLED = false STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQd14OUoIL0Osj7A0ZQlr0 # AI configuration -Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQnBaSnM4MENkQ2xJVmE5WFZKUkh2SHJFby1YVXN3ZmVxRkptS3ZWRmlwdU93ZEJjSjlMV2NGbU5mS3NCdmFfcmFYTEJNZXFIQ3ozTWE4ZC1pemlQNk9wbjU1d3BPS0ZCTTZfOF8yWmVXMWx0TU1DamlJLVFhSTJXclZsY3hMVWlPcXVqQWtMdER4T252NHZUWEhUOTdIN1VGR3ltazEweXFqQ0lvb0hYWmxQQnpxb0JwcFNhRDNGWXdoRTVJWm9FalZpTUF5b1RqZlRaYnVKYkp0NWR5Vko1WWJ0Wmg2VWJzYXZ0Z3Q4UkpsTldDX2dsekhKMmM4YjRoa2RwemMwYVQwM2cyMFlvaU5mOTVTWGlROU8xY2ZVRXlxZzJqWkxURWlGZGI2STZNb0NpdEtWUnM9 -Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjT1ZlRWVJdVZMT3ljSFJDcFdxRFBRVkZhS204NnN5RDBlQ0tpenhTM0FFVktuWW9mWHNwRWx2dHB0eDBSZ0JFQnZKWlp6c01pVGREWHd1eGpERnU0Q2xhaks1clQ1ZXVsdnd2ZzhpNXNQS1BhY3FjSkdkVEhHalNaRGR4emhpakZncnpDQUVxOHVXQzVUWmtQc0FsYmFwTF9TSG5FOUFtWk5Ick1NcHFvY2s1T1c2WXlRUFFJZnh6TWhuaVpMYmppcDR0QUx0a0R6RXlwbGRYb1R4dzJkUT09 -Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQnB5dkd6UkhtU3lhYmZMSlo0bklQZ2s3UTFBSkprZTNwWkg5Q2lVa0wtenhxWXpva21xVDVMRjdKSmhpTmxWS05IUTRoRHdCbktSRVVjcVFnY1RfV0N2S2dyV0dTMlhxQlRFVm41RkFTWVQzQThuVkZwdlNuVC05QlVRVXB6Qjk3akNpYmY1MFR6R1ByMzlIMllRZlRRYVVRN2ZBPT0= -Connector_AiTavily_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkdkJMTDY0akhXNzZDWHVYSEt1cDZoOWEzSktneHZEV2JndTNmWlNSMV9KbFNIZmQzeVlrNE5qUEIwcUlBSGM1a0hOZ3J6djIyOVhnZzI3M1dIUkdicl9FVXF3RGktMmlEYmhnaHJfWTdGUkktSXVUSGdQMC1vSEV6VE8zR2F1SVk= +Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlYUZpRDFqLWhQajZxSElqMEMzdGZIRm5TeDBSSFlqenpZYVJEa1BtRXM1M21pd3hjTGZvSDJPcGJoY2gyQlNncWNwNkNIR0NFQnpjXzA5U2t6Zm1DWWNNVEZrTE5DVzRQVGdlZzRldGoyRWhaeTJfYjBHd0ludWpGcWdqd3hKTHJ5T0piVE15Tk1YZUZnSnE4OXdKOUhXd292dHpWMkxlR3dNclc1N2t0ckFoMmd5WTlBci11MXRGNV9UTlFCSmdOOE83bGJyODFUQ3E2NXJpRHJWZUM0cHFHekNJa0FlN3hjd2VFQ1Nqa1JFQ2NFdjlMWW1TbEV4TVZBeDFEZVVnUWlBVUV1Z0NUNHV0RE1fTEJaLTQxQksyVE1LSE1sSG0ycG9fTS1hNzh4dTQ9 +Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlRHFpNThJb3g3UU05cUw4SVJpOXBTblU5QzU1WFItZ2JkNXVILVN4VHp0Umh2RjJyZXJMNVp5OWFxLWhjRjhub3cxajkxMVRQMnZQdVBGT21obWN0Q0NlOU80MVhMMXRWb1l3cWNpR2Ytc1d0WnVlRUN1TTZ4NjFQcDd0Wll4cFN6dzk1OU5SZGNJck54WmNoeElITzEzejJrczVSQnp6ZTBINGtENHFiT3NnWjdUME9xXzJ5Y0N3dHk5QnpBRkpyVTgxOE0xTVllR2JMUC0yTkwyWWxHQT09 +Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFla1h1R1M3QlQ5XzJhS0x4eXFpTkZ3WHpLMWVZZldRMGpMX2psMFZ2RmpETTZMZ3ZXblo2MnhyemxYWXRsMHN1LXdZU3k5ampEMjMtdzcyb1J4Ri1rTmxPOWhJMF9MMEtzZ3d5dFZxSFY3TjNac3ZpTVJxUFFmUVpXeHEtbVBTUmtiR0lhQjhVcjM3U1NNX1ZHY1NxUFJ3PT0= +Connector_AiTavily_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlbmRSZVRjTzVKRklFbFgwdVZJaE5jNVoyX3dVTVlRUFVUenc4X1JOX2laOHRoTU9mN1lTUVRzb2xNZjJXVjhEYnVIaXdkSWN4NEpJbTFJZFN2cmkwUkJ0ZXNKT2NidktjdDFJX1BkZ3QwU3dQRzg0aG9aNmtxc1FZZ1ZBRjQyM3lOSS1EYkpqWmxoV0xWWE1Fc01uN3RnPT0= Connector_AiPrivateLlm_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGSjZ1NWh0aWc1R3Z4MHNaeS1HamtUbndhcUZFZDlqUDhjSmg5eHFfdlVkU0RsVkJ2UVRaMWs3aWhraG5jSlc0YkxNWHVmR2JoSW5ENFFCdkJBM0VienlKSnhzNnBKbTJOUTFKczRfWlQ3bWpmUkRTT1I1OGNUSTlQdExacGRpeXg= -Connector_AiMistral_API_SECRET = INT_ENC:Z0FBQUFBQnBudkpGZTNtZ1E4TWIxSEU1OUlreUpxZkJIR0Vxcm9xRHRUbnBxbTQ1cXlkbnltWkJVdTdMYWZ4c3Fsam42TERWUTVhNzZFMU9xVjdyRGFCYml6bmZsZFd2YmJzemlrSWN6Q3o3X0NXX2xXNUQteTNONHdKYzJ5YVpLLWdhU2JhSTJQZnI= +Connector_AiMistral_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlU2tMLTFnQWhET2Nia2pTcVpBakRaSVFDdUpHRzZ1bkhGVVhMeEVlSnFZU3F3UFRBUkNMMU4tQU92OUdTeDlpM2VZbXJzLURQZ1lPLVB3azgxSDZabkhkSHJ5Y005aWhtcDJzajk3a2JDQUxCZlNKRGw5elJuSzJMUUpTZ2hiSlU= Service_MSFT_TENANT_ID = common diff --git a/env-gateway-prod-forgejo.env b/env-gateway-prod-forgejo.env index 80a175e6..b22a5c87 100644 --- a/env-gateway-prod-forgejo.env +++ b/env-gateway-prod-forgejo.env @@ -62,12 +62,12 @@ STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah # AI configuration -Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3UxLUdwY2hhdGYxRGp4VFJqYXZIcmkxM1ZyOWV2M0Z4MHdFNkVYQ0ROb1d6LUZFUEdvMHhLMEtXYVBCRzM5TlYyY3ROYWtJRk41cDZxd0tYYi00MjVqMTh4QVcyTXl0bmVocEFHbXQwREpwNi1vODdBNmwzazE5bkpNelE2WXpvblIzWlQwbGdEelI2WXFqT1RibXVHcjNWbVhwYzBOM25XTzNmTDAwUjRvYk4yNjIyZHc5c2RSZzREQUFCdUwyb0ZuOXN1dzI2c2FKdXI4NGxEbk92czZWamJXU3ZSbUlLejZjRklRRk4tLV9aVUFZekI2bTU4OHYxNTUybDg3RVo0ZTh6dXNKRW5GNXVackZvcm9laGI0X3R6V3M9 -Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09 -Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6NG5CTm9QOFZRV1BIVC0tV2RKTGtCQWFOUXlpRnhEdjN1U2x3VUdDamtIZV9CQzQ5ZmRmcUh3ZUVUa0NxbGhlenVVdWtaYjdpcnhvUlNFLXZfOWh2dWFZai0xUGU5cWpuYmpnRVRWakh0RVNUUTFyX0w5V0NXVWFrQlZuOTd5TkI0eVRoQ0ZBSm9HYUlYamoyY1FCMmlBPT0= -Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg= +Connector_AiOpenai_API_SECRET = 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 +Connector_AiAnthropic_API_SECRET = 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 +Connector_AiPerplexity_API_SECRET = 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 +Connector_AiTavily_API_SECRET = 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 Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo= -Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGc2tQc2lvMk1YZk01Q1dob1U5cnR0dG03WWE3WkpoOWo0SEpvLU9Rc2lCNDExdy1wZExaN3lpT2FEQkxnaHRmWmZUUUZUUUJmblZreGlpaFpOdnFhbzlEd1RsVVJtX216cmhxTm5BcTN2eUZ2T054cDE5bmlEamJ3NGR6MVpFQnA= +Connector_AiMistral_API_SECRET = H55rGkR3ojIhcp4YMMlgUStgvz7Wym5c Service_MSFT_TENANT_ID = common diff --git a/env-gateway-prod.20260515_122326.backup b/env-gateway-prod.20260515_122326.backup new file mode 100644 index 00000000..8ed1c612 --- /dev/null +++ b/env-gateway-prod.20260515_122326.backup @@ -0,0 +1,92 @@ +# Production Environment Configuration + +# System Configuration +APP_ENV_TYPE = prod +APP_ENV_LABEL = Production Instance +APP_KEY_SYSVAR = CONFIG_KEY +APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3UnJRV0sySFlDblpXUlREclREaW1WbUt6bGtQYkdrNkZDOXNOLXFua1hqeFF2RHJnRXJ5VlVGV3hOZm41QjZOMlNTb0duYXNxZi05dXVTc2xDVkx0SVBFLUhncVo5T0VUZHE0UTZLWWw3ck09 +APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3QVpIY19DQVZSSzJmc2F0VEZvQlU1cHBhTEgxdHdnR3g4eW01aTEzYTUxc1gxTDR1RVVpSHRXYjV6N1BLZUdCUGlfOW1qdy0xSHFVRkNBcGZvaGlSSkZycXRuUllaWnpyVGRoeFg1dGEyNUk9 +APP_API_URL = https://gateway-prod.poweron.swiss +APP_COOKIE_SECURE = true + +# PostgreSQL DB Host +DB_HOST=gateway-prod-server.postgres.database.azure.com +DB_USER=gzxxmcrdhn +DB_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3Y1JScGxjZG9TdUkwaHRzSHZhRHpNcDV3N1U2TnIwZ21PRG5TWFFfR1k0N3BiRk5WelVadjlnXzVSTDZ6NXFQNFpqbnJ1R3dNVkJocm1zVEgtSk0xaDRiR19zNDBEbVIzSk51ekNlQ0Z3b0U9 +DB_PORT=5432 + +# Security Configuration +APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3elhfV0Rnd2pQRjlMdkVwX1FnSmRhSzNZUlV5SVpaWXBNX1hpa2xPZGdMSWpnN2ZINHQxeGZnNHJweU5pZjlyYlY5Qm9zOUZEbl9wUEgtZHZXd1NhR19JSG9kbFU4MnFGQnllbFhRQVphRGQyNHlFVWR5VHQyUUpqN0stUmRuY2QyTi1oalczRHpLTEJqWURjZWs4YjZvT2U5YnFqcXEwdEpxV05fX05QMmtrPQ== +APP_TOKEN_EXPIRY=300 + +# CORS Configuration +APP_ALLOWED_ORIGINS=http://localhost:8080,http://localhost:5176,https://nyla.poweron.swiss,https://nyla-int.poweron.swiss,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net + +# Logging configuration +APP_LOGGING_LOG_LEVEL = DEBUG +APP_LOGGING_LOG_DIR = /home/site/wwwroot/ +APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s +APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S +APP_LOGGING_CONSOLE_ENABLED = True +APP_LOGGING_FILE_ENABLED = True +APP_LOGGING_ROTATION_SIZE = 10485760 +APP_LOGGING_BACKUP_COUNT = 5 + +# OAuth: Auth app (login/JWT) vs Data app (Graph / Google APIs) +Service_MSFT_AUTH_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kySFR2NjBKM084QTNpeUlyUmM4R0N0SU1BZ2x4MmVTZTVHQkVzRE9GdmFkV041MzhudFhobjU0RWNnd3lqeXpKUXA5aGtNZkhtYU12QjBtX0NjemVmdEZBdC1TbXVBSXJTcF9vMlJXd0ZNRTRKRFBMUXNjTF85eTBxakR4RVNfYmU= +Service_MSFT_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/login/callback +Service_MSFT_DATA_CLIENT_ID = 840b759a-4d79-4a7a-9598-f3ed204d99d8 +Service_MSFT_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyNVU4cVRIZFdjS3l2S1RJVTVlc1ozQ1liZXZDX1VwdFZQUzFtS0N6UWYyeGxkNGNmY1hoaWxEUDBXVU5QR2t3Vi1ZV1A2QkxqbnpobzJwOXdzYTBZaFZYdnNkeDE1VVl0bm4weHFiLXdON2gtZzAwMTkxNWRoZldFM2djSkNHVS0= +Service_MSFT_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/msft/auth/connect/callback + +Service_GOOGLE_AUTH_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_AUTH_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyUmJleVpTOF9OaFV3NGVfcWVBX2oxSjUwMWRGOFZRWFRIN1FZRzZ6U3VQMlg5a21RY1drTHh3U254LW4zM1A1cXQ1TTFWYlNoek9hSHJIeE4tbm1wU1lKRXlKNU5HVWI4VGZwTVE0VnJGaV8wZmNvdkVrMjJGeXdmZ3UyNmVXN1E= +Service_GOOGLE_AUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/login/callback +Service_GOOGLE_DATA_CLIENT_ID = 813678306829-3f23dnf1cs4aaftubjfickt46tlmkgjm.apps.googleusercontent.com +Service_GOOGLE_DATA_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnFBa1kyY2pxMDh0U0RqWERianBMTTNtSUZPSzhKUzh4S0RTenR2MmxnRDlvQzJjbDVTczRWLUJtVnhxWTE2MmUxQjJia2xJcVUzVlFlUnpma040NFdHRzVNRUt0OXR0c2JkTkRmQ1RIYllXbXFFaExIQWNycFVHbUxHbmtYOVhOVUV2MFY= +Service_GOOGLE_DATA_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/google/auth/connect/callback + +# ClickUp OAuth (Verbindungen / automation). Create an app in ClickUp: Settings → Apps → API; set redirect URL to Service_CLICKUP_OAUTH_REDIRECT_URI exactly. +Service_CLICKUP_CLIENT_ID = O3FX3H602A30MQN4I4SBNGJLIDBD5SL4 +Service_CLICKUP_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6VGw5WDdhdDRsVENSalhSSUV0OFFxbEx0V1l6aktNV0E5Y18xU3JHLUlqMWVJdmxyajAydVZRaDJkZzJOVXhxRV9ROFRZbWxlRjh4c3NtQnRFMmRtZWpzTWVsdngtWldlNXRKTURHQjJCOEt6alMwQlkwOFYyVVJWNURJUGJIZDIxYVlfNnBrMU54M0Q3TVdVbFZqRkJKTUtqa05wUkV4eGZvbXNsVi1nNVdBPQ== +Service_CLICKUP_OAUTH_REDIRECT_URI = https://gateway-prod.poweron.swiss/api/clickup/auth/connect/callback + +# Infomaniak: no OAuth client. Users paste a Personal Access Token (kdrive + mail) per UI. + +# Stripe Billing (both end with _SECRET for encryption script) +STRIPE_SECRET_KEY_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6aVA3R3VRS3VHMUgzUEVjYkR4eUZKWFhPUzFTTVlHNnBvT3FienNQaUlBWVpPLXJyVGpGMWk4LXktMXphX0J6ZTVESkJxdjNNa3ZJbF9wX2ppYzdjYlF0cmdVamlEWWJDSmJYYkJseHctTlh4dnNoQWs4SG5haVl2TTNDdXpuaFpqeDBtNkFCbUxMa0RaWG14dmxyOEdILTNrZ2licmNpbXVkN2lFSWoxZW1BODNpV0ZTQ0VaeXRmR1d4RjExMlVFS3MtQU9zZXZlZE1mTmY3OWctUXJHdz09 +STRIPE_WEBHOOK_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGNUpTWldsakYydFhFelBrR1lSaWxYT3kyMENOMUljZTJUZHBWcEhhdWVCMzYxZXQ5b3VlTFVRalFiTVdsbGxrdUx0RDFwSEpsOC1sTDJRTEJNQlA3S3ZaQzBtV1h6bWp5VnlMZUgwUlF3cXYxcnljZVE5SWdzLVg3V0syOWRYS08= +STRIPE_API_VERSION = 2026-01-28.clover +STRIPE_AUTOMATIC_TAX_ENABLED = false +STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah + + +# AI configuration +Connector_AiOpenai_API_SECRET = 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 +Connector_AiAnthropic_API_SECRET = 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 +Connector_AiPerplexity_API_SECRET = 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 +Connector_AiTavily_API_SECRET = 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 +Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo= +Connector_AiMistral_API_SECRET = H55rGkR3ojIhcp4YMMlgUStgvz7Wym5c + +Service_MSFT_TENANT_ID = common + +# Google Cloud Speech Services configuration +Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4NFQxaF9uN3h1cVB6dnZid1c1R1VfNDlSQ1NHMEVDZWtKanpMQ29CLXc1MXBqRm1hQ0YtWVhaejBMY1ZTOEFEVlpWQ3hrYkFza1E2RDNsYkdMMndNR0VGNTMwVDRGdURJY3hyaVFxVjEtSEYwNHJzeWM3WmlpZW9jU2E3NTgycEV2allqQ3dJRTNyRFAzaDJ6dklKeXpNRkJhYjFzUkptN2dpbkNpMklrcGxuZl9vTkt3T0JvNm1YTXd5UlkwZWptUXdWVFpnV2J4X3J2WUhIUlFkSElFVnlqMnlJRnNHTnlpMWs2R1dZc2ROWjNYZG85cndmd1E5cUZnVmZRYnVjTG43dXFmSWd2bGFfVWFWSmtpWkpndWNlSUNwcnFNU2NqZXFaV0xsY3l3SElLRkVHcHZGZERKV1ltcGhTS0dhTko1VTJLYzNoZjRkSGVEX3dTMWVVTmdDczV5cE1JQUdSbUJGUm11eFhTVjJHbkt0SzB4UG1Dc2xmbnp1Y041Y2RTeWRuWGdmQy1sTGx0MGtnM2VJQ3EyLXViRlNhTU9ybzZkR1N1bXE5SXhlZENWRFpWSGlYOWx4SUQ3UlR0ZEVxQkxNakRUVFRiUmFnbklOalphLUZkRFVVaXBRUk5NZW5PaUZydTFmQkNPSTdTVTNZd0plWXllNVFJdmN4MVcyTGlwMGFtVjBzOGRxR1FjbzhfYW5zdTB0ZEZBTTJhakltazh1dktNMUZsOUItdFdTb1pIaUxySllXNkdlY20zUS0wTnpFNTB2SU5acG1VcXhyaHBmME8takw3RDh5T043T2VGOV92TzNya2pWSlpYVjZDdXlZcjM3a0hPTlhkaW9oQmxqQlpGRFYyTTY4WmZmT3k4Tk1tdXRuSGdTUVpNT2NKenhXb05PdXBfSEdhMTNxNjdpNXlKUUI2YUgydFFPX1VvXzVJb0UxWTU2YVNiNDQ0QndZanhMMHR1cGdHWGhvcEg1QXEtSXZJdTdZUE12ZEVVWkF4QmtsQS1GYnY3SFIxSHlsOGVfcEpGS1A4QUVEQWNEOFZYYlljQ3ByTU03YU16Y0UzUnJQZEprSWNjT1ZXVEtDWi03Y3ZzRVdYUTlabXJISEo5THRHVXVuM0xqbzA4bGVlZVpOMk1QMmptb21tV0pTMlVoOXdWVU95UW1iQmttc2w1RG9mMWwxXzg1T2IxYUVmTUJEZkpUdTFDTzZ3RlBFeUFiX01iRTZNWkNaSG45TkFOM2pzbUJRZ2N0VFpoejJUTG1RODY3TzZpSzVkYUQzaEpfY2pSTkRzU0VpanlkdXVQQmJ2WU5peno4QWNLTDVxZTlhSHI3NnNiM0k0Y3JkQ0xaOU05bGtsQl8zQklvaktWSDZ4aVp2MHlYelJuUDJyTU9CZC1OZjJxNFc1dDcwSUlxaVh1LTMyWWFwU0IwUU9kOUFpMWpnOERtLTh1VmJiNGVwcXBMbU5fMjVZc0hFbmxQT2puSFd1ZGpyTkphLU5sVlBZWWxrWEZrWGJQWmVkN19tZFZfZ1l1V3pSWlA0V0ZxM2lrWnl2NU9WeTdCbDROSmhfeENKTFhMVXk1d195S2JMUFJoRXZjcVo4V2g0MTNKRnZhUE1wRkNPM3FZOGdVazJPeW5PSGpuZnFGTTdJMkRnam5rUlV6NFlqODlIelRYaEN5VjdJNnVwbllNODNCTFRHMWlXbmM1VlRxbXB3Wm9LRjVrQUpjYzRNMThUMWwwSVhBMUlyamtPZnE4R0o4bEdHay1zMjR5RDJkZ1lYRHZaNHVHU2otR3ZpN25LZlEySEU0UmdTNzJGVHNWQXMyb0dVMV9WUE13ODhZWUFaakxGOWZieGNXZkNYRnV5djEyWTZLcmdrajRBLU1rS1Z0VVRkOWlDMU9fMGVmYXFhZXJGMUhpNkdmb2hkbzZ1OWV6VlNmVzNISjVYTFh6SjJNdWR5MWZidE8yVEo2dnRrZXhMRXBPczUwTG13OGhNUVpIQm0zQmRKRnJ0Nl8wNW1Ob0dHRDVpU0NWREV3TkY2SjktdVBkMFU1ZXBmSFpHQ3FHNTRZdTJvaExpZVEtLTU4YTVyeFBpNDdEajZtWUc4c1dBeUJqQ3NIY1NLS0FIMUxGZzZxNFNkOG9ORGNHWWJCVnZuNnJVTEtoQi1mRTZyUl81ZWJJMi1KOGdERzBhNVRZeHRYUUlqY2JvMFlaNHhWMU9pWFFiZjdaLUhkaG15TTBPZVlkS2R5UVdENTI4QVFiY1RJV0ZNZnlpVWxfZmlnN1BXbGdrbjFGUkhzYl9qeHBxVVJacUE4bjZETENHVFpSamh0NVpOM2hMYTZjYzBuS3J0a3hhZGxSM1V5UHd2OTU3ZHY0Yy1xWDBkWUk0Ymp0MWVrS3YzSktKODhQZnY3QTZ1Wm1VZkZJbS1jamdreks1ZlhpQjFOUDFiOHJ2Nm9NcmdTdU5LQXV2RkZWZEFNZnVKUjVwcVY3dDdhQnpmRVJ6SmlvVXpDM0ZiYXh5bGE2X04tTE9qZ3BiTnN3TF9ZaFRxSUpjNjB1dXZBcy1TZHRHTjFjSUR3WUl4cE9VNzB5Rkk4U3Z1SVZYTl9sYXlZVk83UnFrMlVmcnBpam9lRUlCY19DdVJwOXl2TVVDV1pMRFZTZk9MY3Z1eXA0MnhGazc5YllQaWtOeTc4NjlOa2lGY05RRzY1cG9nbGpYelc4c3FicWxWRkg0YzRSamFlQ19zOU14YWJreU9pNDREZVJ3a0REMUxGTzF1XzI1bEF3VXVZRjlBeWFiLXJsOXgza3VZem1WckhWSnVNbDBNcldadU8xQ3RwOTl5NGgtVlR0QklCLWl5WkE4V1FlQTBCOVU1RE9sQlRrYUNZOGdfUmEwbEZvUTFGUEFWVmQ4V1FhOU9VNjZqemRpZm1sUDhZQTJ0YVBRbWZldkF5THV4QXpfdUtNZ0tlcGdSRFM3c0lDOTNQbnBxdmxYYWNpTmI3MW9BMlZIdTQ5RldudHpNQWQ5NDNPLVVTLXVVNzdHZXh4UXpZa3dVa2J4dTFDV1RkYjRnWXU2M3lJekRYWGNMcWU5OVh6U2xZWDh6MmpqcnpiOHlnMjA5S3RFQm1NZjNSM21adkVnTUpSYVhkTzNkNnJCTmljY0x1cl9kMkx3UHhySjZEdHREanZERzNEUTFlTkR0NWlBczAtdmFGTjdZNVpTMlkxV2czYW5RN2lqemg4eUViZDV6RjdKNXdFcUlvcVhoNkJ6eVJkR1pua1hnNzQwOEs2TXJYSlpGcW9qRDU2QjBOWFFtdXBJRkRKbmdZUF9ZSmRPVEtvUjVhLTV1NjdXQjRhS0duaEtJb2FrQnNjUTRvdFMxdkdTNk1NYlFHUFhhYTJ1eUN3WHN4UlJ4UjdrZjY0SzFGYWVFN1k0cGJnc1RjNmFUenR4NHljbVhablZSWHZmUVN3cXRHNjhsX1BSZWEzdTJUZFA0S2pTaU9YMnZIQ1ZPcGhWMFJqZkVEMWRMR1h3SnU0Z2FzZ3VGM3puNzdhVjhaQXNIWHFsbjB0TDVYSFdSNV9rdWhUUUhSZHBGYkJIVDB5SDdlMC13QTVnS0g5Qkg5RGNxSGJlelVndUhPcEQ0QkRKMTJTZUM1OXJhVm0zYjU0OVY2dk9MQVBheklIQXpVNW9Yc0ROVjEzaFZTWmVxYlBWMlNlSzladzJ6TmNuMG5FVVZkN1VZN1pfS2ZHa0lQcE80S24wSnQtVlJVV09OVWJ3M09YMkZpV2ktVF9ENHhKU2dfYUQ2aUVyamk0VHJHQmVfVHU4clpUTFoteW5aSWRPV1M0RDRMTms4NGRoYmJfVE82aUl2X3VieVJOdDhBQmRwdzdnRTVBNzZwaW93dUlZb3ZRYUtOeG9ULWxvNVp5a0haSjdkcUhRb3d6UGIxRUpCVkVYX2d6TkRqQVozUWxkNGFoc1FXYVd2YWNkME9Qclo0bjYxMFRWTy1nbnI5NTBJNzRMMDluUXRKYTFqQUN4d0d5aHVlamN3Tkk3NWJXeXR0TW9BeUg5Vnp4Q2RnZUY3b3AtMDlrNmlrSGR0eGRtbUdUd2lFRWg4MklEeWJHN2wwZEpVSXMxNDNOWjRFS0tPdWxhMmFCckhfRENIY184aEFDZXNrRDl2dHQtQW12UnRuQXJjaDJoTUpiYkNWQUtfRG9GMUZoNWM4UnBYZ29RWWs2NHcyUm5kdTF3Vk1GeFpiRUJLaVZ2UGFjbi1jV3lMV0N2ZDl4VERPN295X01NNG56ZjZkRzZoYUtmY1E5NlVXemx2SnVfb19iSXg0R2M3Mjd1a2JRPT0= + +# Feature SyncDelta JIRA configuration +Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4d3Z4d2x6N1FhUktMU0RKbkxfY2pTQkRzXzJ6UXVEbDNCaFM3UHMtQVFGYzNmYWs4N0lMM1R2SFJuZTVFVmx6MGVEbXc5U3NOTnY1TWN0ZDNaamlHQWloalM3VldmREJNSHQ1TlVkSVFJMTVhQWVGSVRMTGw4UTBqNGlQZFVuaHp4WUlKemR5UnBXZlh0REJFLXJ4ejR3PT0= + +# Teamsbot Browser Bot Service +TEAMSBOT_BROWSER_BOT_URL = https://cae-poweron-shared.redwater-53d21339.switzerlandnorth.azurecontainerapps.io + +# Debug Configuration +APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE +APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat +APP_DEBUG_ACCOUNTING_SYNC_ENABLED = FALSE +APP_DEBUG_ACCOUNTING_SYNC_DIR = ./debug/sync + +# Azure Communication Services Email Configuration +MESSAGING_ACS_CONNECTION_STRING = endpoint=https://mailing-poweron-prod.switzerland.communication.azure.com/;accesskey=4UizRfBKBgMhDgQ92IYINM6dJsO1HIeL6W1DvIX9S0GtaS1PjIXqJQQJ99CAACULyCpHwxUcAAAAAZCSuSCt +MESSAGING_ACS_SENDER_EMAIL = DoNotReply@poweron.swiss diff --git a/env-gateway-prod.env b/env-gateway-prod.env index 6f4cfab0..c6979c1c 100644 --- a/env-gateway-prod.env +++ b/env-gateway-prod.env @@ -63,12 +63,12 @@ STRIPE_TAX_RATE_ID_CH_VAT = txr_1TOQZG8WqlVsabrfFEu49pah # AI configuration -Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnBaSnM4TWJOVm4xVkx6azRlNDdxN3UxLUdwY2hhdGYxRGp4VFJqYXZIcmkxM1ZyOWV2M0Z4MHdFNkVYQ0ROb1d6LUZFUEdvMHhLMEtXYVBCRzM5TlYyY3ROYWtJRk41cDZxd0tYYi00MjVqMTh4QVcyTXl0bmVocEFHbXQwREpwNi1vODdBNmwzazE5bkpNelE2WXpvblIzWlQwbGdEelI2WXFqT1RibXVHcjNWbVhwYzBOM25XTzNmTDAwUjRvYk4yNjIyZHc5c2RSZzREQUFCdUwyb0ZuOXN1dzI2c2FKdXI4NGxEbk92czZWamJXU3ZSbUlLejZjRklRRk4tLV9aVUFZekI2bTU4OHYxNTUybDg3RVo0ZTh6dXNKRW5GNXVackZvcm9laGI0X3R6V3M9 -Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3TnhYdlhSLW5RbXJyMHFXX0V0bHhuTDlTaFJsRDl2dTdIUTFtVFAwTE8tY3hLbzNSMnVTLXd3RUZualN3MGNzc1kwOTIxVUN2WW1rYi1TendFRVVBSVNqRFVjckEzNExyTGNaUkJLMmozazUwemI1cnhrcEtZVXJrWkdaVFFramp3MWZ6RmY2aGlRMXVEYjM2M3ZlbmxMdnNCRDM1QWR0Wmd6MWVnS1I1c01nV3hRLXg3d2NTZXVfTi1Wdm16UnRyNGsyRTZ0bG9TQ1g1OFB5Z002bmQ3QT09 -Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnB5dkd6NG5CTm9QOFZRV1BIVC0tV2RKTGtCQWFOUXlpRnhEdjN1U2x3VUdDamtIZV9CQzQ5ZmRmcUh3ZUVUa0NxbGhlenVVdWtaYjdpcnhvUlNFLXZfOWh2dWFZai0xUGU5cWpuYmpnRVRWakh0RVNUUTFyX0w5V0NXVWFrQlZuOTd5TkI0eVRoQ0ZBSm9HYUlYamoyY1FCMmlBPT0= -Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z3NmItcDh6V0JpcE5Jc0NlUWZqcmllRHB5eDlNZmVnUlNVenhNTm5xWExzbjJqdE1GZ0hTSUYtb2dvdWNhTnlQNmVWQ2NGVDgwZ0MwMWZBMlNKWEhzdlF3TlZzTXhCZWM4Z1Uwb18tSTRoU1JBVTVkSkJHOTJwX291b3dPaVphVFg= +Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmcDVyOGNwbVkwWFJCWmFkZS12RkhLaFhLSF9kWWpEZ0d0NDBqV2FnWlpnYmpSckdLSGpjbmh6aHJXVUZxMElwY1MzcVg1MzBOdURUZXhnZ3pqNEZyQ1JWMVA0YmxhNWJlenNpa1A3TjZkYVZSclFONjU4MF9jMTJaS2d0ZDNnXzJKSmhSRVhyckJpTUlDa0RRWHN5cWVkOUJMTUp5aFRHcDV5Z1A1aWhSUnFNOHBJTDFPdzAzcVJ3bmhueTBmVkJDZTdJakhMOEFRdHBvWFduUzdRV2dNQVdpaXdFSVlHMDJ4NnZRUTBZZ3pOakxPLUdjNlNNQnJQMXpfSWR3NmFodDdDbkEtVmRjdVBhMjRWT1NOV1BYbU15VHRSWFR0UVBBMWtKRTRkS25KMFk9 +Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmMGhla2xoZWowNjJzc1EzMWJYRXRTcGdWWWctU3hhcXNUbVVaOTJiRFJuSGM5S3ZGZ0M4RFotTGxOQ3loa3l4aVZ2T3FsRVVMck83RTlURFNOdWxHb0JfNVEtRGJ4X193dV9Bd0EtNlVGV0h4SWk2bldfWThxNVVnOGctSkNFR3FXa2pmY2ROcV9EVE1oMndFY1d4MjdLeWtUd0VEeW5CTlFwX2FOcW9DaWVXYWVfMy1ZUnFFUEZnanFOUGZILUpUZU8yUHNSODE3OXBSWVJFNlpBdTJtUT09 +Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmRm9saTZuR1VSZV9pQllKRGFURmN4cDNNanpsVFM3TVItdDNtNWdoWC1zVllrLUVPeGZDRXF1S3Rxd0tVUGV6bl9Ob0JMa3U5ZUNlRjRVQ1dRWXZDTXlsRU13b2o2R1paalU4RXB6SWxYVEJPa2NmaDRFdzExRXU1X2VnNDlhQzQ3cTE1RlJrSlB5elRMZ2w3NmxlV2l3PT0= +Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmZGdyWkJibS03akJtSjF0U2doYXZVVDM1em1kY2ZpRGJISmVCUURfVkw3c2Z3OEFQd1h1SzE0cTExSUtVejRPY3VmWF9XT1ZyS3RxRmVRYktJeDR6OWhYaEM0bkNLVEI1cl9VZ1VFOG9IRTFWc2FUemh0UmNHTGprQ0FweThlSGpSSDAyZmw2YmR0OFREQWxpNERHWm1nPT0= Connector_AiPrivateLlm_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGanZ6U3pzZWkwXzVPWGtIQ040XzFrTXc5QWRnazdEeEktaUJ0akJmNnEzbWUzNHczLTJfc2dIdzBDY0FTaXZYcDhxNFdNbTNtbEJTb2VRZ0ZYd05hdlNLR1h6SUFzVml2Z1FLY1BjTl90UWozUGxtak1URnhhZmNDRWFTb0dKVUo= -Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnBudkpGc2tQc2lvMk1YZk01Q1dob1U5cnR0dG03WWE3WkpoOWo0SEpvLU9Rc2lCNDExdy1wZExaN3lpT2FEQkxnaHRmWmZUUUZUUUJmblZreGlpaFpOdnFhbzlEd1RsVVJtX216cmhxTm5BcTN2eUZ2T054cDE5bmlEamJ3NGR6MVpFQnA= +Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnFCdlFmcEVpVmFuWkk4eTJTc3VtRFg4cE9QU3R5NVg0eVFIR29RSVhmXy1rR0pPTm4wbFhIVFFpckx5UmhvSGxqSWV4S0xoTzdESE55R2k5eHowZEprdGhrbEU3eG5JWGpaNWJIdDRqT05zZGNCQVpXd2xTek1teHRBS3NRU2FuUTlSQ2Q= Service_MSFT_TENANT_ID = common diff --git a/modules/features/graphicalEditor/nodeDefinitions/flow.py b/modules/features/graphicalEditor/nodeDefinitions/flow.py index 49a3dcaf..f1efa0ec 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/flow.py +++ b/modules/features/graphicalEditor/nodeDefinitions/flow.py @@ -3,9 +3,147 @@ from modules.shared.i18nRegistry import t +LOOP_DONE_DATA_PICK_OPTIONS = [ + { + "path": ["bodyResults"], + "pickerLabel": t("Alle Schleifen-Ergebnisse"), + "detail": t( + "Ausgabe des letzten Schrittes im Schleifen-Rumpf pro Iteration als Liste, " + "ein Eintrag pro Durchlauf. Ideal als Eingabe fuer Kontext zusammenfuehren." + ), + "recommended": True, + "type": "List[Any]", + }, + { + "path": ["items"], + "pickerLabel": t("Iterierte Elemente"), + "detail": t( + "Liste der Schleifen-Elemente nach gewähltem Iterationsmodus (Kopie der Eingabeliste, gefiltert)." + ), + "recommended": False, + "type": "List[Any]", + }, + { + "path": ["count"], + "pickerLabel": t("Anzahl Durchläufe"), + "detail": t("Wie viele Iterationen die Schleife ausgeführt hat."), + "recommended": False, + "type": "int", + }, +] + +LOOP_ITEM_DATA_PICK_OPTIONS = [ + { + "path": ["currentItem"], + "pickerLabel": t("Aktuelles Element"), + "detail": t("Das aktuelle Iterationselement."), + "recommended": True, + "type": "Any", + }, + { + "path": ["currentIndex"], + "pickerLabel": t("Aktueller Index"), + "detail": t("0-basierter Index der aktuellen Iteration."), + "recommended": False, + "type": "int", + }, + { + "path": ["items"], + "pickerLabel": t("Alle Elemente"), + "detail": t("Die vollständige Quellliste."), + "recommended": False, + "type": "List[Any]", + }, + { + "path": ["count"], + "pickerLabel": t("Gesamtanzahl"), + "detail": t("Anzahl der Elemente in der Schleife."), + "recommended": False, + "type": "int", + }, +] + +# Base paths when ``ActionResult.data`` uses envelope + ``_meta`` (context.extractContent-style clarity). +CONTEXT_ENVELOPE_DATA_PICK_OPTIONS = [ + { + "path": ["data"], + "pickerLabel": t("Vollständiges data-Objekt"), + "detail": t( + "Versionierter Kontext-Umschlag: ``schemaVersion``, ``kind``, Nutzdatenfelder, ``_meta``." + ), + "recommended": True, + "type": "Dict", + }, + { + "path": ["data", "_meta"], + "pickerLabel": t("Technische Metadaten (_meta)"), + "detail": t( + "`actionType`, Payload-Schema-Version; bei Transform/Merge keine großen Payloads." + ), + "recommended": False, + "type": "Any", + }, +] + +MERGE_RESULT_DATA_PICK_OPTIONS = [ + { + "path": ["merged"], + "pickerLabel": t("Zusammengeführt"), + "detail": t("Zusammengeführtes Ergebnis (je nach Modus)."), + "recommended": True, + "type": "Dict", + }, + { + "path": ["first"], + "pickerLabel": t("Erster Zweig"), + "detail": t("Daten vom ersten verbundenen Eingang (Modus „first“)."), + "recommended": False, + "type": "Any", + }, + { + "path": ["inputs"], + "pickerLabel": t("Alle Eingänge"), + "detail": t("Dict der Eingabeobjekte nach Port-Index."), + "recommended": False, + "type": "Dict[int,Any]", + }, +] + +# Extended picker for ``context.mergeContext`` (ActionResult + ``surfaceDataAsTopLevel``): same +# merge keys as ``flow.merge`` plus ``count`` from the action payload. +CONTEXT_MERGE_ACTION_RESULT_DATA_PICK_OPTIONS = [ + *CONTEXT_ENVELOPE_DATA_PICK_OPTIONS, + *MERGE_RESULT_DATA_PICK_OPTIONS, + { + "path": ["count"], + "pickerLabel": t("Anzahl Einträge"), + "detail": t("Wie viele Einträge zusammengeführt wurden."), + "recommended": False, + "type": "int", + }, +] + +_CONTEXT_BRANCH_DATA_PICK_OPTIONS = [ + { + "path": ["items"], + "pickerLabel": t("Gefilterte Elemente"), + "detail": t("Empfohlen für Schleifen: je Eintrag ein Durchlauf (z. B. Bild-Slots)."), + "recommended": True, + "type": "List[Any]", + }, + { + "path": ["data"], + "pickerLabel": t("Kontext (data)"), + "detail": t("Gefilterter Presentation-Umschlag oder unveränderter Eingang auf dem Sonst-Zweig."), + "recommended": False, + "type": "Dict", + }, +] + # Ports, die typische Schritt-Ausgaben durchreichen (nicht nur leerer Transit). _FLOW_INPUT_SCHEMAS = [ "Transit", + "ContextBranch", "FormPayload", "AiResult", "TextResult", @@ -31,12 +169,23 @@ FLOW_NODES = [ "Die Daten vom Eingangskanal werden an den gewählten Ausgang durchgereicht." ), "parameters": [ + { + "name": "Item", + "type": "Any", + "required": True, + "frontendType": "dataRef", + "description": t("Item, das auf die Bedingung getestet wird"), + }, { "name": "condition", "type": "json", "required": True, "frontendType": "condition", - "description": t("Bedingung: Feld aus einem vorherigen Schritt und Vergleich"), + "frontendOptions": { + "dependsOn": "Item", + "operatorCatalog": "condition", + }, + "description": t("Bedingung auf das gewählte Item"), }, ], "inputs": 1, @@ -52,8 +201,10 @@ FLOW_NODES = [ "category": "flow", "label": t("Switch"), "description": t( - "Mehrere Zweige nach einem Wert aus einem vorherigen Schritt (Data Picker). " - "Definiere Fälle mit Vergleichsoperator; der Eingang wird an den ersten passenden Zweig durchgereicht." + "Mehrere Zweige nach einem Wert aus einem vorherigen Schritt. " + "Jeder Fall hat einen eigenen Ausgang mit passend gefiltertem Inhalt in ``items``; " + "mehrere Kontext-Filter können gleichzeitig zutreffen (z. B. Text und Bilder). " + "Der letzte Ausgang (Sonst) reicht den unveränderten Eingang durch." ), "parameters": [ { @@ -68,13 +219,22 @@ FLOW_NODES = [ "type": "array", "required": False, "frontendType": "caseList", - "description": t("Fälle: Operator und Vergleichswert"), + "frontendOptions": { + "dependsOn": "value", + "operatorCatalog": "condition", + }, + "description": t("Fälle: Operator und Vergleichswert (abhängig vom gewählten Wert)"), }, ], "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": list(_FLOW_INPUT_SCHEMAS)}}, - "outputPorts": {0: {"schema": "Transit"}}, + "outputPorts": { + 0: { + "schema": "ContextBranch", + "dataPickOptions": _CONTEXT_BRANCH_DATA_PICK_OPTIONS, + }, + }, "executor": "flow", "meta": {"icon": "mdi-swap-horizontal", "color": "#FF9800", "usesAi": False}, }, @@ -83,8 +243,10 @@ FLOW_NODES = [ "category": "flow", "label": t("Schleife / Für jedes"), "description": t( - "Iteriert über ein Array aus einem vorherigen Schritt (z. B. documente, Zeilen, Listeneinträge). " - "Optional: UDM-Ebene für strukturierte Dokumente." + "Zwei Ausgänge: „Schleife“ verbindet den Rumpf (pro Element); optional führt der Rumpf " + "mit einem Rücklauf-Pfeil wieder zum **gleichen Eingang** wie der vorherige Schritt (wie in n8n). " + "„Fertig“ führt genau einmal fort, wenn alle Iterationen beendet sind. " + "Die zu durchlaufende Liste wählen Sie wie bisher; UDM-/Strukturdaten werden automatisch sinnvoll in Elemente aufgelöst." ), "parameters": [ { @@ -95,13 +257,27 @@ FLOW_NODES = [ "description": t("Liste oder Sammlung zum Durchlaufen (im Data Picker wählen)"), }, { - "name": "level", + "name": "iterationMode", "type": "str", "required": False, "frontendType": "select", - "frontendOptions": {"options": ["auto", "documents", "structuralNodes", "contentBlocks"]}, - "description": t("Nur bei UDM-Daten: welche Strukturebene als Elemente verwendet wird"), - "default": "auto", + "frontendOptions": { + "options": ["all", "first", "last", "every_second", "every_third", "every_nth"], + }, + "description": t( + "Welche Elemente die Schleife besucht: alle, nur das erste/letzte, jedes zweite/dritte " + "oder jedes n-te (Schritt dann unter „Schrittweite“)." + ), + "default": "all", + }, + { + "name": "iterationStride", + "type": "int", + "required": False, + "frontendType": "number", + "frontendOptions": {"min": 2, "max": 100}, + "description": t("Nur bei „jedes n-te“: Schrittweite (z. B. 5 = jedes 5. Element ab Index 0)."), + "default": 2, }, { "name": "concurrency", @@ -114,12 +290,18 @@ FLOW_NODES = [ }, ], "inputs": 1, - "outputs": 1, - "inputPorts": {0: {"accepts": [ - "Transit", "UdmDocument", "EmailList", "DocumentList", "FileList", "TaskList", - "ActionResult", "AiResult", "QueryResult", "FormPayload", - ]}}, - "outputPorts": {0: {"schema": "LoopItem"}}, + "outputs": 2, + "outputLabels": [t("Schleife"), t("Fertig")], + "inputPorts": { + 0: {"accepts": [ + "Transit", "ContextBranch", "UdmDocument", "EmailList", "DocumentList", "FileList", "TaskList", + "ActionResult", "AiResult", "QueryResult", "FormPayload", "LoopItem", + ]}, + }, + "outputPorts": { + 0: {"schema": "LoopItem", "dataPickOptions": LOOP_ITEM_DATA_PICK_OPTIONS}, + 1: {"schema": "Transit", "dataPickOptions": LOOP_DONE_DATA_PICK_OPTIONS}, + }, "executor": "flow", "meta": {"icon": "mdi-repeat", "color": "#FF9800", "usesAi": False}, }, @@ -151,13 +333,19 @@ FLOW_NODES = [ "default": 2, }, ], + # ``inputs: 2`` is the static minimum / default topology. ``inputCount`` is a + # frontend hint: the editor adds/removes input ports dynamically when the user + # changes the value. ``FlowExecutor._merge`` collects whatever ports exist in + # ``inputSources`` at runtime, so extra ports (3–5) work without further changes + # to this definition. ``inputPorts`` below only type-declares the two minimum + # ports; additional ports inherit the same ``_FLOW_INPUT_SCHEMAS`` accepts list. "inputs": 2, "outputs": 1, "inputPorts": { 0: {"accepts": list(_FLOW_INPUT_SCHEMAS)}, 1: {"accepts": list(_FLOW_INPUT_SCHEMAS)}, }, - "outputPorts": {0: {"schema": "MergeResult"}}, + "outputPorts": {0: {"schema": "MergeResult", "dataPickOptions": MERGE_RESULT_DATA_PICK_OPTIONS}}, "executor": "flow", "meta": {"icon": "mdi-call-merge", "color": "#FF9800", "usesAi": False}, }, From 7c4c5e079a748c022462b1636940124b0202ba34 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Sat, 16 May 2026 22:55:43 +0200 Subject: [PATCH 13/13] rag enhancements --- app.py | 5 + modules/aicore/aicorePluginOpenai.py | 110 +-- modules/connectors/connectorDbPostgre.py | 5 +- modules/datamodels/datamodelAi.py | 7 +- modules/features/trustee/mainTrustee.py | 29 +- modules/features/trustee/trusteeOntology.py | 295 +++++++ modules/routes/routeAdminSttBenchmark.py | 217 ++++++ modules/routes/routeDataConnections.py | 23 +- modules/routes/routeDataSources.py | 26 +- modules/routes/routeRagInventory.py | 77 +- .../services/serviceAgent/agentLoop.py | 51 +- .../serviceAgent/coreTools/_workspaceTools.py | 58 +- .../services/serviceAgent/datamodelAgent.py | 34 + .../serviceAgent/datamodelOntology.py | 203 +++++ .../services/serviceAgent/featureDataAgent.py | 164 +++- .../serviceAgent/ontologyToPromptCompiler.py | 140 ++++ .../services/serviceAgent/queryValidator.py | 311 ++++++++ .../services/serviceAgent/sandboxExecutor.py | 7 +- .../services/serviceAi/subAiCallLooping.py | 16 +- .../mainBackgroundJobService.py | 98 ++- .../services/serviceChat/mainServiceChat.py | 10 +- .../subConnectorIngestConsumer.py | 24 +- .../subConnectorSyncClickup.py | 76 +- .../subConnectorSyncGdrive.py | 65 +- .../serviceKnowledge/subConnectorSyncGmail.py | 134 ++-- .../subConnectorSyncKdrive.py | 439 +++++++++++ .../subConnectorSyncOutlook.py | 131 ++-- .../subConnectorSyncSharepoint.py | 82 +- .../serviceKnowledge/subWalkerHelpers.py | 116 +++ modules/shared/aiAuditLogger.py | 5 + modules/system/mainSystem.py | 10 + tests/eval/__init__.py | 3 + tests/eval/fakeFeatureDataProvider.py | 246 ++++++ tests/eval/runTrusteeBenchmark.py | 735 ++++++++++++++++++ tests/fixtures/trusteeBenchmark/__init__.py | 16 + .../loadTrusteeBenchmarkFixture.py | 275 +++++++ .../fixtures/trusteeBenchmark/questions.yaml | 226 ++++++ .../test_agentTrace_repairCounters.py | 112 +++ .../services/test_featureDataAgent_schema.py | 132 +++- tests/unit/services/test_queryValidator.py | 295 +++++++ tests/unit/services/test_trusteeOntology.py | 199 +++++ 41 files changed, 4809 insertions(+), 398 deletions(-) create mode 100644 modules/features/trustee/trusteeOntology.py create mode 100644 modules/routes/routeAdminSttBenchmark.py create mode 100644 modules/serviceCenter/services/serviceAgent/datamodelOntology.py create mode 100644 modules/serviceCenter/services/serviceAgent/ontologyToPromptCompiler.py create mode 100644 modules/serviceCenter/services/serviceAgent/queryValidator.py create mode 100644 modules/serviceCenter/services/serviceKnowledge/subConnectorSyncKdrive.py create mode 100644 modules/serviceCenter/services/serviceKnowledge/subWalkerHelpers.py create mode 100644 tests/eval/__init__.py create mode 100644 tests/eval/fakeFeatureDataProvider.py create mode 100644 tests/eval/runTrusteeBenchmark.py create mode 100644 tests/fixtures/trusteeBenchmark/__init__.py create mode 100644 tests/fixtures/trusteeBenchmark/loadTrusteeBenchmarkFixture.py create mode 100644 tests/fixtures/trusteeBenchmark/questions.yaml create mode 100644 tests/unit/serviceAgent/test_agentTrace_repairCounters.py create mode 100644 tests/unit/services/test_queryValidator.py create mode 100644 tests/unit/services/test_trusteeOntology.py diff --git a/app.py b/app.py index 73a64064..7a4ed4d4 100644 --- a/app.py +++ b/app.py @@ -404,8 +404,10 @@ async def lifespan(app: FastAPI): try: from modules.serviceCenter.services.serviceBackgroundJobs.mainBackgroundJobService import ( recoverInterruptedJobs, + registerZombieKillerScheduler, ) recoverInterruptedJobs() + registerZombieKillerScheduler(intervalMinutes=5) except Exception as e: logger.warning(f"BackgroundJob recovery failed (non-critical): {e}") @@ -607,6 +609,9 @@ app.include_router(connectionsRouter) from modules.routes.routeRagInventory import router as ragInventoryRouter app.include_router(ragInventoryRouter) +from modules.routes.routeAdminSttBenchmark import router as sttBenchmarkRouter +app.include_router(sttBenchmarkRouter) + from modules.routes.routeTableViews import router as tableViewsRouter app.include_router(tableViewsRouter) diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py index 259ca117..bfea82f7 100644 --- a/modules/aicore/aicorePluginOpenai.py +++ b/modules/aicore/aicorePluginOpenai.py @@ -319,25 +319,24 @@ class AiOpenai(BaseConnectorAi): calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.00013 ), AiModel( - name="dall-e-3", - displayName="OpenAI DALL-E 3", + name="gpt-image-1", + displayName="OpenAI GPT Image", connectorType="openai", apiUrl="https://api.openai.com/v1/images/generations", - temperature=0.0, # Image generation doesn't use temperature - maxTokens=0, # Image generation doesn't use tokens + temperature=0.0, + maxTokens=0, contextLength=0, costPer1kTokensInput=0.04, costPer1kTokensOutput=0.0, - speedRating=5, # Slow for image generation - qualityRating=9, # High quality art generation - # capabilities removed (not used in business logic) + speedRating=5, + qualityRating=9, functionCall=self.generateImage, priority=PriorityEnum.QUALITY, processingMode=ProcessingModeEnum.DETAILED, operationTypes=createOperationTypeRatings( (OperationTypeEnum.IMAGE_GENERATE, 10) ), - version="dall-e-3", + version="gpt-image-1", calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.04 ) ] @@ -653,105 +652,82 @@ class AiOpenai(BaseConnectorAi): ) async def generateImage(self, modelCall: AiModelCall) -> AiModelResponse: - """ - Generate an image using DALL-E 3 using standardized pattern. - - Args: - modelCall: AiModelCall with messages and generation options - - Returns: - AiModelResponse with generated image data - """ + """Generate an image using GPT Image model (gpt-image-1).""" try: - # Extract parameters from modelCall - messages = modelCall.messages - model = modelCall.model - options = modelCall.options - - # Get prompt from messages - promptContent = messages[0]["content"] if messages else "" - - # Parse prompt using AiCallPromptImage model import json - + + messages = modelCall.messages + options = modelCall.options + promptContent = messages[0]["content"] if messages else "" + try: - # Try to parse as JSON promptData = json.loads(promptContent) promptModel = AiCallPromptImage(**promptData) - except: - # If not JSON, use plain text prompt + except Exception: promptModel = AiCallPromptImage( prompt=promptContent, - size=options.size if options and hasattr(options, 'size') else "1024x1024", - quality=options.quality if options and hasattr(options, 'quality') else "standard", - style=options.style if options and hasattr(options, 'style') else "vivid" + size=options.size if options and hasattr(options, "size") else "1024x1024", + quality=options.quality if options and hasattr(options, "quality") else "auto", ) - - # Extract parameters from Pydantic model + prompt = promptModel.prompt size = promptModel.size or "1024x1024" - quality = promptModel.quality or "standard" - style = promptModel.style or "vivid" - + rawQuality = promptModel.quality or "auto" + quality = {"standard": "auto", "hd": "high"}.get(rawQuality, rawQuality) + logger.debug(f"Starting image generation with prompt: '{prompt[:100]}...'") - - # DALL-E 3 API endpoint - dalle_url = "https://api.openai.com/v1/images/generations" - + payload = { - "model": "dall-e-3", + "model": "gpt-image-1", "prompt": prompt, "size": size, "quality": quality, - "style": style, "n": 1, - "response_format": "b64_json" # Get base64 data directly instead of URLs } - - # Use existing httpClient to benefit from connection pooling - # This avoids TLS connection issues that can occur with fresh clients + response = await self.httpClient.post( - dalle_url, - json=payload + "https://api.openai.com/v1/images/generations", + json=payload, ) - + if response.status_code != 200: - logger.error(f"DALL-E API error: {response.status_code} - {response.text}") + logger.error(f"Image generation API error: {response.status_code} - {response.text}") return AiModelResponse( content="", success=False, - error=f"DALL-E API error: {response.status_code} - {response.text}" + error=f"Image generation API error: {response.status_code} - {response.text}", ) - + responseJson = response.json() - + if "data" in responseJson and len(responseJson["data"]) > 0: - image_data = responseJson["data"][0]["b64_json"] - - logger.info(f"Successfully generated image: {len(image_data)} characters") + imageData = responseJson["data"][0].get("b64_json", "") + if not imageData: + imageData = responseJson["data"][0].get("url", "") + + logger.info(f"Successfully generated image: {len(imageData)} characters") return AiModelResponse( - content=image_data, + content=imageData, success=True, - modelId="dall-e-3", + modelId="gpt-image-1", metadata={ "size": size, "quality": quality, - "style": style, - "response_id": responseJson.get("id", "") - } + "response_id": responseJson.get("id", ""), + }, ) else: - logger.error("No image data in DALL-E response") + logger.error("No image data in generation response") return AiModelResponse( content="", success=False, - error="No image data in DALL-E response" + error="No image data in generation response", ) - + except Exception as e: logger.error(f"Error during image generation: {str(e)}", exc_info=True) return AiModelResponse( content="", success=False, - error=f"Error during image generation: {str(e)}" + error=f"Error during image generation: {str(e)}", ) \ No newline at end of file diff --git a/modules/connectors/connectorDbPostgre.py b/modules/connectors/connectorDbPostgre.py index 9f16b1f4..a6893396 100644 --- a/modules/connectors/connectorDbPostgre.py +++ b/modules/connectors/connectorDbPostgre.py @@ -311,7 +311,10 @@ class DatabaseConnector: # Establish connection to the database self._connect() - logger.info("PostgreSQL database system initialized successfully") + logger.debug( + "PostgreSQL database system initialized (db=%s, host=%s, port=%s)", + self.dbDatabase, self.dbHost, self.dbPort, + ) except Exception as e: logger.error(f"FATAL ERROR: Database system initialization failed: {e}") raise diff --git a/modules/datamodels/datamodelAi.py b/modules/datamodels/datamodelAi.py index 786eea7d..cd481c9a 100644 --- a/modules/datamodels/datamodelAi.py +++ b/modules/datamodels/datamodelAi.py @@ -245,11 +245,10 @@ class AiCallPromptWebCrawl(BaseModel): class AiCallPromptImage(BaseModel): """Structured prompt format for image generation.""" - + prompt: str = Field(description="Text description of the image to generate") - size: Optional[str] = Field(default="1024x1024", description="Image size (1024x1024, 1792x1024, 1024x1792)") - quality: Optional[str] = Field(default="standard", description="Image quality (standard, hd)") - style: Optional[str] = Field(default="vivid", description="Image style (vivid, natural)") + size: Optional[str] = Field(default="1024x1024", description="Image size (1024x1024, 1536x1024, 1024x1536)") + quality: Optional[str] = Field(default="auto", description="Image quality (auto, high, medium, low)") class AiProcessParameters(BaseModel): diff --git a/modules/features/trustee/mainTrustee.py b/modules/features/trustee/mainTrustee.py index b8ab853d..8f725d2f 100644 --- a/modules/features/trustee/mainTrustee.py +++ b/modules/features/trustee/mainTrustee.py @@ -754,14 +754,35 @@ ANTI-PATTERNS (do NOT do this): """ +# Parked for one release as a fallback while the ontology-based path rolls +# out (see `trusteeOntology.getTrusteeOntology()`). Remove together with the +# legacy ``_loadFeatureDomainHints`` path once Phase 2 is the only supplier +# of the trustee prompt block. +_AGENT_DOMAIN_HINTS_LEGACY = _AGENT_DOMAIN_HINTS + + def getAgentDomainHints() -> str: """Return Trustee-specific guidance for the Feature Data Sub-Agent. - The text is appended verbatim to the sub-agent's system prompt by - ``featureDataAgent._buildSchemaContext``. Keep it concise and - pattern-driven — every line costs tokens on every sub-agent call. + Deprecated as of Phase 2 (2026-05). Prefer ``getAgentOntology()`` -> + ``ontologyToPromptCompiler.compileOntologyToPrompt(...)``. The legacy + text remains available so callers that still go through + ``_buildSchemaContext()`` keep working during the migration window. """ - return _AGENT_DOMAIN_HINTS + return _AGENT_DOMAIN_HINTS_LEGACY + + +def getAgentOntology(): + """Return the structured ontology used by the Feature Data Sub-Agent. + + Discovered by ``featureDataAgent._buildSchemaContext`` (Phase 2 path): + when this hook is present, the agent compiles its domain block from + the ontology instead of using the legacy free-text hints. The same + descriptor feeds the validator's NEVER_AGGREGATE constraints, so + prompt and validator stay in sync. + """ + from modules.features.trustee.trusteeOntology import getTrusteeOntology + return getTrusteeOntology() def registerFeature(catalogService) -> bool: diff --git a/modules/features/trustee/trusteeOntology.py b/modules/features/trustee/trusteeOntology.py new file mode 100644 index 00000000..c5b117d7 --- /dev/null +++ b/modules/features/trustee/trusteeOntology.py @@ -0,0 +1,295 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Trustee feature ontology (Phase 2 pilot). + +Replaces the hand-written ``_AGENT_DOMAIN_HINTS`` block with a structured +ontology so the Feature Data Sub-Agent's QueryValidator AND the prompt +compiler share the same source of truth: account-group conventions, +period-bucket semantics, the NEVER_AGGREGATE constraints on already- +aggregated columns, and canonical tool-call templates for the most +frequent user intents. + +Both the validator (deterministic enforcement) and the prompt compiler +(LLM steering) read from this descriptor, so an LLM that follows the +prompt patterns will never trigger a validator failure -- and one that +ignores them gets a structured repair hint pointing back at the same +constraint. + +The legacy ``_AGENT_DOMAIN_HINTS_LEGACY`` block stays parked in +``mainTrustee.py`` for one release as a fallback during rollout. +""" + +from __future__ import annotations + +from modules.serviceCenter.services.serviceAgent.datamodelOntology import ( + CanonicalQueryPattern, + Cardinality, + Constraint, + ConstraintRule, + Entity, + Invariant, + OntologyDescriptor, + Relation, + SemanticType, +) + + +# --------------------------------------------------------------------------- +# Entities +# --------------------------------------------------------------------------- + +_ENTITIES = [ + Entity( + name="Account", + pythonClass="TrusteeDataAccount", + semanticType=SemanticType.ACCOUNT, + description=( + "Chart-of-accounts row (Konto). One row per accountNumber per " + "mandate. Identifies the account, never holds balances." + ), + invariants=[ + Invariant(description="accountNumber is a stable string identifier (e.g. '1020', '5400')."), + Invariant(description="accountType is one of: asset / liability / revenue / expense."), + ], + ), + Entity( + name="BankAccount", + pythonClass="TrusteeDataAccount", + semanticType=SemanticType.ACCOUNT, + parentEntity="Account", + description="Account subgroup with accountNumber LIKE '102%' (ZKB, PostFinance, UBS, ...).", + ), + Entity( + name="CashAccount", + pythonClass="TrusteeDataAccount", + semanticType=SemanticType.ACCOUNT, + parentEntity="Account", + description="Account subgroup with accountNumber LIKE '100%' (Hauptkasse, Nebenkassen).", + ), + Entity( + name="AccountBalance", + pythonClass="TrusteeDataAccountBalance", + semanticType=SemanticType.BALANCE_SNAPSHOT, + description=( + "Period-bucketed snapshot: one row per (account, year, month). " + "closingBalance is THE balance at end of period -- already aggregated." + ), + invariants=[ + Invariant(description="periodMonth=0 means annual total of periodYear (use for 'per 31.12.YYYY')."), + Invariant(description="periodMonth in 1..12 means month-end snapshot."), + Invariant(description="closingBalance is the balance at period end; openingBalance at period start."), + Invariant(description="debitTotal/creditTotal are turnovers for the period, NOT balances."), + ], + ), + Entity( + name="JournalEntry", + pythonClass="TrusteeDataJournalEntry", + semanticType=SemanticType.TRANSACTION, + description="One booking header (Beleg). Has a bookingDate (unix seconds float) and totalAmount.", + invariants=[ + Invariant(description="bookingDate is a UTC unix-seconds float; never compare against ISO strings."), + ], + ), + Entity( + name="JournalLine", + pythonClass="TrusteeDataJournalLine", + semanticType=SemanticType.TRANSACTION, + description="One booking line of a JournalEntry. Each line debits or credits exactly one account.", + invariants=[ + Invariant(description="Per line either debitAmount > 0 (Soll) or creditAmount > 0 (Haben), not both."), + ], + ), +] + + +# --------------------------------------------------------------------------- +# Relations +# --------------------------------------------------------------------------- + +_RELATIONS = [ + Relation(fromEntity="AccountBalance", toEntity="Account", cardinality=Cardinality.MANY_TO_ONE, via="accountNumber"), + Relation(fromEntity="JournalLine", toEntity="JournalEntry", cardinality=Cardinality.MANY_TO_ONE, via="journalEntryId"), + Relation(fromEntity="JournalLine", toEntity="Account", cardinality=Cardinality.MANY_TO_ONE, via="accountNumber"), +] + + +# --------------------------------------------------------------------------- +# Constraints (validator-enforced) +# --------------------------------------------------------------------------- + +_CONSTRAINTS = [ + # closingBalance is the single biggest hallucination magnet -- it's a + # balance per period, summing it across periods or accounts is meaningless. + Constraint( + appliesTo="TrusteeDataAccountBalance.closingBalance", + rule=ConstraintRule.NEVER_AGGREGATE, + message=( + "closingBalance is per-period already; query with periodYear+periodMonth, never SUM/AVG it." + ), + ), + Constraint( + appliesTo="TrusteeDataAccountBalance.openingBalance", + rule=ConstraintRule.NEVER_AGGREGATE, + message="openingBalance is already a balance per period; do not SUM/AVG it across rows.", + ), + Constraint( + appliesTo="TrusteeDataAccountBalance.debitTotal", + rule=ConstraintRule.NEVER_AGGREGATE, + message=( + "debitTotal is the period's debit TURNOVER; do not SUM it without an explicit period filter." + ), + ), + Constraint( + appliesTo="TrusteeDataAccountBalance.creditTotal", + rule=ConstraintRule.NEVER_AGGREGATE, + message="creditTotal is a per-period turnover; do not SUM it across periods without an explicit period filter.", + ), + # AccountBalance queries without a period filter are almost always wrong -- + # they conflate annual and monthly snapshots. Phase 2 (REQUIRES_FILTER_ON) + # is wired through to the validator in a later iteration; for now this + # rule is rendered into the prompt compiler so the LLM sees it explicitly. + Constraint( + appliesTo="TrusteeDataAccountBalance", + rule=ConstraintRule.REQUIRES_FILTER_ON, + message=( + "Always filter on periodYear AND periodMonth (use periodMonth=0 for end-of-year)." + ), + params={"requiredFields": ["periodYear", "periodMonth"]}, + ), + Constraint( + appliesTo="TrusteeDataAccountBalance", + rule=ConstraintRule.PREFERRED_TABLE_FOR_INTENT, + message="For 'Saldo per ' and 'Stand ' questions, prefer AccountBalance over JournalLine.", + params={"intents": ["BANK_BALANCE_AT_DATE", "BALANCE_AT_YEAR_END"]}, + ), +] + + +# --------------------------------------------------------------------------- +# Canonical query patterns (worked examples for the LLM) +# --------------------------------------------------------------------------- + +_CANONICAL_PATTERNS = [ + CanonicalQueryPattern( + intent="BANK_BALANCE_AT_DATE", + description="Saldo eines Bankkontos per Jahresende.", + pattern={ + "tool": "queryTable", + "tableName": "TrusteeDataAccountBalance", + "filters": [ + {"field": "accountNumber", "op": "=", "value": ""}, + {"field": "periodYear", "op": "=", "value": ""}, + {"field": "periodMonth", "op": "=", "value": 0}, + ], + "fields": ["closingBalance", "currency"], + }, + ), + CanonicalQueryPattern( + intent="BANK_GROUP_TOTAL_AT_DATE", + description="Summe einer Kontogruppe (z. B. alle Bankkonten 102%) per Jahresende.", + pattern={ + "tool": "queryTable", + "tableName": "TrusteeDataAccountBalance", + "filters": [ + {"field": "accountNumber", "op": "LIKE", "value": "%"}, + {"field": "periodYear", "op": "=", "value": ""}, + {"field": "periodMonth", "op": "=", "value": 0}, + ], + "fields": ["accountNumber", "closingBalance", "currency"], + "_postProcessing": "Sum closingBalance values in your final answer; do NOT SUM via aggregateTable.", + }, + ), + CanonicalQueryPattern( + intent="BALANCE_HISTORY_PER_YEAR", + description="Saldo-Verlauf eines Kontos ueber mehrere Jahre.", + pattern={ + "tool": "queryTable", + "tableName": "TrusteeDataAccountBalance", + "filters": [ + {"field": "accountNumber", "op": "=", "value": ""}, + {"field": "periodMonth", "op": "=", "value": 0}, + ], + "fields": ["periodYear", "closingBalance", "currency"], + "orderBy": "periodYear", + }, + ), + CanonicalQueryPattern( + intent="MONTHLY_BALANCE_SNAPSHOT", + description="Saldo per Ende eines bestimmten Monats.", + pattern={ + "tool": "queryTable", + "tableName": "TrusteeDataAccountBalance", + "filters": [ + {"field": "accountNumber", "op": "=", "value": ""}, + {"field": "periodYear", "op": "=", "value": ""}, + {"field": "periodMonth", "op": "=", "value": ""}, + ], + "fields": ["closingBalance", "currency"], + }, + ), + CanonicalQueryPattern( + intent="ACCOUNT_LIST_BY_TYPE_OR_PREFIX", + description="Welche Konten gehoeren zu einer Gruppe (Typ oder Nummern-Prefix)?", + pattern={ + "tool": "queryTable", + "tableName": "TrusteeDataAccount", + "filters": [ + {"field": "accountNumber", "op": "LIKE", "value": "%"}, + ], + "fields": ["accountNumber", "label", "accountType"], + }, + ), + CanonicalQueryPattern( + intent="JOURNAL_SUM_AT_ACCOUNT", + description="Summe der Soll- oder Haben-Buchungen auf einem Konto.", + pattern={ + "tool": "aggregateTable", + "tableName": "TrusteeDataJournalLine", + "aggregate": "SUM", + "field": "debitAmount", + "filters": [ + {"field": "accountNumber", "op": "=", "value": ""}, + ], + }, + ), + CanonicalQueryPattern( + intent="COUNT_ROWS", + description="Anzahl Buchungen / Buchungszeilen / Konten.", + pattern={ + "tool": "aggregateTable", + "tableName": "", + "aggregate": "COUNT", + "field": "id", + }, + ), + CanonicalQueryPattern( + intent="JOURNAL_LINES_BY_AMOUNT", + description="Buchungszeilen mit einem Betrag groesser/kleiner als einer Schwelle.", + pattern={ + "tool": "queryTable", + "tableName": "TrusteeDataJournalLine", + "filters": [ + {"field": "debitAmount", "op": ">", "value": ""}, + ], + "fields": ["accountNumber", "debitAmount", "description"], + }, + ), +] + + +_TRUSTEE_ONTOLOGY = OntologyDescriptor( + featureCode="trustee", + entities=_ENTITIES, + relations=_RELATIONS, + constraints=_CONSTRAINTS, + canonicalPatterns=_CANONICAL_PATTERNS, +) + + +def getTrusteeOntology() -> OntologyDescriptor: + """Public accessor for the trustee ontology. + + Cached as a module-level singleton -- the descriptor is immutable and + has no per-call state. + """ + return _TRUSTEE_ONTOLOGY diff --git a/modules/routes/routeAdminSttBenchmark.py b/modules/routes/routeAdminSttBenchmark.py new file mode 100644 index 00000000..ae24e792 --- /dev/null +++ b/modules/routes/routeAdminSttBenchmark.py @@ -0,0 +1,217 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +"""STT Benchmark route — compare Speech-to-Text v1 (latest_long) vs v2 (Chirp 2). + +Sysadmin-only page for evaluating STT model quality and latency. +""" + +import json +import time +import logging +from typing import Any, Dict + +from fastapi import APIRouter, HTTPException, Depends, Request, UploadFile, File, Form +from modules.auth import limiter, getCurrentUser +from modules.datamodels.datamodelUam import User +from modules.shared.configuration import APP_CONFIG + +logger = logging.getLogger(__name__) + +router = APIRouter( + prefix="/api/admin/stt-benchmark", + tags=["Admin STT Benchmark"], + responses={401: {"description": "Unauthorized"}, 403: {"description": "Forbidden"}}, +) + + +def _requireSysAdmin(currentUser: User = Depends(getCurrentUser)) -> User: + if not getattr(currentUser, "isSysAdmin", False) and not getattr(currentUser, "isPlatformAdmin", False): + raise HTTPException(status_code=403, detail="SysAdmin required") + return currentUser + + +def _getCredentials(): + apiKey = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET") + if not apiKey or apiKey.startswith("YOUR_"): + raise HTTPException(status_code=500, detail="Google Speech API key not configured") + from google.oauth2 import service_account + return service_account.Credentials.from_service_account_info(json.loads(apiKey)) + + +def _runV1(audioBytes: bytes, language: str, model: str) -> Dict[str, Any]: + """Run Speech-to-Text v1 recognition.""" + from google.cloud import speech + credentials = _getCredentials() + client = speech.SpeechClient(credentials=credentials) + + config = speech.RecognitionConfig( + encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED, + language_code=language, + model=model, + enable_automatic_punctuation=True, + enable_word_time_offsets=True, + enable_word_confidence=True, + max_alternatives=3, + use_enhanced=True, + ) + audio = speech.RecognitionAudio(content=audioBytes) + + t0 = time.perf_counter() + response = client.recognize(config=config, audio=audio) + elapsed = time.perf_counter() - t0 + + results = [] + for r in response.results: + for alt in r.alternatives: + results.append({ + "transcript": alt.transcript, + "confidence": round(alt.confidence, 4), + "words": len(alt.words) if alt.words else 0, + }) + + return { + "api": "v1", + "model": model, + "latencyMs": round(elapsed * 1000, 1), + "results": results, + "resultCount": len(response.results), + } + + +def _runV2(audioBytes: bytes, language: str, model: str, location: str) -> Dict[str, Any]: + """Run Speech-to-Text v2 recognition (Chirp 2).""" + from google.cloud.speech_v2 import SpeechClient + from google.cloud.speech_v2.types import cloud_speech + + credentials = _getCredentials() + credInfo = json.loads(APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")) + projectId = credInfo.get("project_id", "") + + client = SpeechClient( + credentials=credentials, + client_options={"api_endpoint": f"{location}-speech.googleapis.com"}, + ) + + config = cloud_speech.RecognitionConfig( + auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(), + language_codes=[language], + model=model, + features=cloud_speech.RecognitionFeatures( + enable_automatic_punctuation=True, + enable_word_time_offsets=True, + enable_word_confidence=True, + ), + ) + + recognizer = f"projects/{projectId}/locations/{location}/recognizers/_" + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer, + config=config, + content=audioBytes, + ) + + t0 = time.perf_counter() + response = client.recognize(request=request) + elapsed = time.perf_counter() - t0 + + results = [] + for r in response.results: + for alt in r.alternatives: + results.append({ + "transcript": alt.transcript, + "confidence": round(alt.confidence, 4), + "words": len(alt.words) if alt.words else 0, + }) + + return { + "api": "v2", + "model": model, + "location": location, + "latencyMs": round(elapsed * 1000, 1), + "results": results, + "resultCount": len(getattr(response, "results", [])), + } + + +@router.post("/run") +@limiter.limit("10/minute") +async def runBenchmark( + request: Request, + file: UploadFile = File(...), + language: str = Form(default="de-DE"), + v1Model: str = Form(default="latest_long"), + v2Model: str = Form(default="chirp_2"), + v2Location: str = Form(default="europe-west4"), + currentUser: User = Depends(_requireSysAdmin), +) -> Dict[str, Any]: + """Upload audio and compare v1 vs v2 STT results.""" + audioBytes = await file.read() + if len(audioBytes) > 10 * 1024 * 1024: + raise HTTPException(status_code=400, detail="Audio file too large (max 10 MB)") + if len(audioBytes) < 100: + raise HTTPException(status_code=400, detail="Audio file too small") + + logger.info("STT benchmark: %s, %d bytes, language=%s, v1=%s, v2=%s@%s", + file.filename, len(audioBytes), language, v1Model, v2Model, v2Location) + + v1Result = None + v1Error = None + try: + v1Result = _runV1(audioBytes, language, v1Model) + except Exception as e: + v1Error = str(e) + logger.warning("STT v1 benchmark failed: %s", e) + + v2Result = None + v2Error = None + try: + v2Result = _runV2(audioBytes, language, v2Model, v2Location) + except Exception as e: + v2Error = str(e) + logger.warning("STT v2 benchmark failed: %s", e) + + return { + "filename": file.filename, + "fileSizeBytes": len(audioBytes), + "language": language, + "v1": v1Result or {"error": v1Error}, + "v2": v2Result or {"error": v2Error}, + } + + +@router.get("/models") +@limiter.limit("30/minute") +async def getAvailableModels( + request: Request, + currentUser: User = Depends(_requireSysAdmin), +) -> Dict[str, Any]: + """Return available STT models for the benchmark UI.""" + return { + "v1Models": [ + {"value": "latest_long", "label": "latest_long (default)"}, + {"value": "latest_short", "label": "latest_short"}, + {"value": "phone_call", "label": "phone_call"}, + {"value": "video", "label": "video"}, + {"value": "command_and_search", "label": "command_and_search"}, + ], + "v2Models": [ + {"value": "chirp_2", "label": "Chirp 2 (recommended)"}, + {"value": "chirp", "label": "Chirp (original)"}, + {"value": "long", "label": "long"}, + {"value": "short", "label": "short"}, + ], + "locations": [ + {"value": "europe-west4", "label": "Europe West (NL)"}, + {"value": "us-central1", "label": "US Central"}, + {"value": "asia-southeast1", "label": "Asia Southeast"}, + ], + "languages": [ + {"value": "de-DE", "label": "Deutsch (DE)"}, + {"value": "de-CH", "label": "Deutsch (CH)"}, + {"value": "en-US", "label": "English (US)"}, + {"value": "en-GB", "label": "English (GB)"}, + {"value": "fr-FR", "label": "Francais (FR)"}, + {"value": "it-IT", "label": "Italiano (IT)"}, + ], + } diff --git a/modules/routes/routeDataConnections.py b/modules/routes/routeDataConnections.py index 04f652fb..e2b08461 100644 --- a/modules/routes/routeDataConnections.py +++ b/modules/routes/routeDataConnections.py @@ -745,7 +745,7 @@ def _findOwnConnection(interface, userId: str, connectionId: str): @router.patch("/{connectionId}/knowledge-consent") @limiter.limit("10/minute") -def _updateKnowledgeConsent( +async def _updateKnowledgeConsent( request: Request, connectionId: str = Path(..., description="Connection ID"), enabled: bool = Body(..., embed=True), @@ -780,24 +780,13 @@ def _updateKnowledgeConsent( from modules.datamodels.datamodelDataSource import DataSource dataSources = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId, "ragIndexEnabled": True}) if dataSources: - import asyncio from modules.serviceCenter.services.serviceBackgroundJobs import startJob authority = connection.authority.value if hasattr(connection.authority, "value") else str(connection.authority or "") - - async def _enqueue(): - await startJob( - "connection.bootstrap", - {"connectionId": connectionId, "authority": authority.lower()}, - triggeredBy=str(currentUser.id), - ) - try: - loop = asyncio.get_event_loop() - if loop.is_running(): - loop.create_task(_enqueue()) - else: - loop.run_until_complete(_enqueue()) - except RuntimeError: - asyncio.run(_enqueue()) + await startJob( + "connection.bootstrap", + {"connectionId": connectionId, "authority": authority.lower()}, + triggeredBy=str(currentUser.id), + ) bootstrapEnqueued = True import json as _json diff --git a/modules/routes/routeDataSources.py b/modules/routes/routeDataSources.py index f7e5425d..ba398008 100644 --- a/modules/routes/routeDataSources.py +++ b/modules/routes/routeDataSources.py @@ -129,7 +129,7 @@ def _updateNeutralizeFields( @router.patch("/{sourceId}/rag-index") @limiter.limit("30/minute") -def _updateDataSourceRagIndex( +async def _updateDataSourceRagIndex( request: Request, sourceId: str = Path(..., description="ID of the DataSource"), ragIndexEnabled: bool = Body(..., embed=True), @@ -139,6 +139,10 @@ def _updateDataSourceRagIndex( true: sets flag + enqueues mini-bootstrap for this DataSource only. false: sets flag + synchronously purges all chunks from this DataSource. + + Must be `async def` so `await startJob(...)` registers `_runJob` in the + main event loop. Sync route → worker thread → temporary loop closes + before the task runs → job stays stuck forever. """ try: from modules.interfaces.interfaceDbApp import getRootInterface @@ -152,7 +156,6 @@ def _updateDataSourceRagIndex( if ragIndexEnabled: from modules.serviceCenter.services.serviceBackgroundJobs import startJob - import asyncio connectionId = rec.get("connectionId") or rec.get("connection_id") or "" conn = rootIf.getUserConnectionById(connectionId) if connectionId else None @@ -160,20 +163,11 @@ def _updateDataSourceRagIndex( if conn: authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "") - async def _enqueue(): - await startJob( - "connection.bootstrap", - {"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": [sourceId]}, - triggeredBy=str(context.user.id), - ) - try: - loop = asyncio.get_event_loop() - if loop.is_running(): - loop.create_task(_enqueue()) - else: - loop.run_until_complete(_enqueue()) - except RuntimeError: - asyncio.run(_enqueue()) + await startJob( + "connection.bootstrap", + {"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": [sourceId]}, + triggeredBy=str(context.user.id), + ) else: from modules.interfaces.interfaceDbKnowledge import getInterface as getKnowledgeInterface purgeResult = getKnowledgeInterface(None).deleteFileContentIndexByDataSource(sourceId) diff --git a/modules/routes/routeRagInventory.py b/modules/routes/routeRagInventory.py index 37fb330b..074b5b85 100644 --- a/modules/routes/routeRagInventory.py +++ b/modules/routes/routeRagInventory.py @@ -39,20 +39,27 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L chunksByDs: Dict[str, int] = {} unassigned = 0 for idx in connIndexRows: - prov = (idx.get("provenance") if isinstance(idx, dict) else getattr(idx, "provenance", None)) or {} + struct = (idx.get("structure") if isinstance(idx, dict) else getattr(idx, "structure", None)) or {} + ingestion = struct.get("_ingestion") or {} if isinstance(struct, dict) else {} + prov = ingestion.get("provenance") or {} if isinstance(ingestion, dict) else {} dsIdRef = prov.get("dataSourceId", "") if isinstance(prov, dict) else "" if dsIdRef: chunksByDs[dsIdRef] = chunksByDs.get(dsIdRef, 0) + 1 else: unassigned += 1 + seen: Dict[str, bool] = {} dsItems = [] for ds in dataSources: dsId = ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "") + dsPath = ds.get("path") if isinstance(ds, dict) else getattr(ds, "path", "") + if dsPath in seen: + continue + seen[dsPath] = True dsItems.append({ "id": dsId, "label": ds.get("label") if isinstance(ds, dict) else getattr(ds, "label", ""), - "path": ds.get("path") if isinstance(ds, dict) else getattr(ds, "path", ""), + "path": dsPath, "sourceType": ds.get("sourceType") if isinstance(ds, dict) else getattr(ds, "sourceType", ""), "ragIndexEnabled": ds.get("ragIndexEnabled") if isinstance(ds, dict) else getattr(ds, "ragIndexEnabled", False), "neutralize": ds.get("neutralize") if isinstance(ds, dict) else getattr(ds, "neutralize", False), @@ -60,20 +67,43 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L "chunkCount": chunksByDs.get(dsId, 0), }) - if unassigned > 0 and len(dsItems) == 1: - dsItems[0]["chunkCount"] += unassigned + if unassigned > 0 and len(dsItems) > 0: + perDs = unassigned // len(dsItems) + remainder = unassigned % len(dsItems) + for i, item in enumerate(dsItems): + item["chunkCount"] += perDs + (1 if i < remainder else 0) - jobs = jobService.listJobs(jobType="connection.bootstrap", limit=5) + # Pull a wider window than the previous 5 so the "last successful + # sync" is found even if a connection has many recent jobs queued. + jobs = jobService.listJobs(jobType="connection.bootstrap", limit=50) connJobs = [j for j in jobs if (j.get("payload") or {}).get("connectionId") == connectionId] runningJobs = [ {"jobId": j["id"], "progress": j.get("progress", 0), "progressMessage": j.get("progressMessage", "")} for j in connJobs if j.get("status") in ("PENDING", "RUNNING") ] - lastError = None + lastError: Optional[Dict[str, Any]] = None + lastSuccess: Optional[Dict[str, Any]] = None for j in connJobs: - if j.get("status") == "ERROR": - lastError = {"jobId": j["id"], "errorMessage": j.get("errorMessage", "")} + status = j.get("status") + if status == "ERROR" and lastError is None: + lastError = { + "jobId": j["id"], + "errorMessage": j.get("errorMessage", ""), + "finishedAt": j.get("finishedAt"), + } + elif status == "SUCCESS" and lastSuccess is None: + result = j.get("result") or {} + lastSuccess = { + "jobId": j["id"], + "finishedAt": j.get("finishedAt"), + "indexed": result.get("indexed", 0), + "skippedDuplicate": result.get("skippedDuplicate", 0), + "skippedPolicy": result.get("skippedPolicy", 0), + "failed": result.get("failed", 0), + "durationMs": result.get("durationMs", 0), + } + if lastError and lastSuccess: break out.append({ @@ -86,6 +116,7 @@ def _buildConnectionInventory(connections, rootIf, knowledgeIf, jobService) -> L "totalChunks": connChunkTotal, "runningJobs": runningJobs, "lastError": lastError, + "lastSuccess": lastSuccess, }) return out @@ -182,7 +213,7 @@ def _getInventoryPlatform( @router.post("/reindex/{connectionId}") @limiter.limit("10/minute") -def _reindexConnection( +async def _reindexConnection( request: Request, connectionId: str, currentUser: User = Depends(getCurrentUser), @@ -190,12 +221,16 @@ def _reindexConnection( """Re-trigger bootstrap for a connection (re-index all ragIndexEnabled DataSources). Submits a new connection.bootstrap job, regardless of previous failures. + + Must be `async def` so `await startJob(...)` registers the `_runJob` task + in FastAPI's main event loop. A sync route would land in the worker + threadpool and `asyncio.run` would tear down the temporary loop right + after `create_task`, leaving the job stuck in PENDING forever. """ try: from modules.interfaces.interfaceDbApp import getRootInterface from modules.serviceCenter.services.serviceBackgroundJobs import startJob from modules.datamodels.datamodelDataSource import DataSource - import asyncio rootIf = getRootInterface() conn = rootIf.getUserConnectionById(connectionId) @@ -213,23 +248,13 @@ def _reindexConnection( authority = conn.authority.value if hasattr(conn.authority, "value") else str(conn.authority or "") dsIds = [(ds.get("id") if isinstance(ds, dict) else getattr(ds, "id", "")) for ds in ragDs] - async def _enqueue(): - return await startJob( - "connection.bootstrap", - {"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": dsIds}, - triggeredBy=str(currentUser.id), - ) - try: - loop = asyncio.get_event_loop() - if loop.is_running(): - future = asyncio.ensure_future(_enqueue()) - jobId = None - else: - jobId = loop.run_until_complete(_enqueue()) - except RuntimeError: - jobId = asyncio.run(_enqueue()) + jobId = await startJob( + "connection.bootstrap", + {"connectionId": connectionId, "authority": authority.lower(), "dataSourceIds": dsIds}, + triggeredBy=str(currentUser.id), + ) - logger.info("Reindex triggered for connection %s (%d DataSources)", connectionId, len(dsIds)) + logger.info("Reindex triggered for connection %s (%d DataSources, jobId=%s)", connectionId, len(dsIds), jobId) return {"status": "queued", "connectionId": connectionId, "dataSourceCount": len(dsIds), "jobId": jobId} except HTTPException: raise diff --git a/modules/serviceCenter/services/serviceAgent/agentLoop.py b/modules/serviceCenter/services/serviceAgent/agentLoop.py index c1571994..99f4dbd7 100644 --- a/modules/serviceCenter/services/serviceAgent/agentLoop.py +++ b/modules/serviceCenter/services/serviceAgent/agentLoop.py @@ -7,7 +7,7 @@ import logging import time import json import re -from typing import List, Dict, Any, Optional, AsyncGenerator, Callable, Awaitable +from typing import List, Dict, Any, Optional, AsyncGenerator, Callable, Awaitable, Tuple from modules.datamodels.datamodelAi import ( AiCallRequest, AiCallOptions, AiCallResponse, OperationTypeEnum @@ -360,12 +360,18 @@ async def runAgentLoop( state.totalToolCalls += len(results) for result in results: + validationCode = None + if isinstance(result.errorDetails, dict): + code = result.errorDetails.get("code") + if isinstance(code, str): + validationCode = code roundLog.toolCalls.append(ToolCallLog( toolName=result.toolName, args=next((tc.args for tc in toolCalls if tc.id == result.toolCallId), {}), success=result.success, durationMs=result.durationMs, error=result.error, + validationFailureCode=validationCode, resultData=result.data[:300] if result.data else "", )) if not result.success: @@ -443,6 +449,11 @@ async def runAgentLoop( trace.totalCostCHF = state.totalCostCHF trace.abortReason = state.abortReason + validationFailures, repairAttempts, successAfterRepair = _computeRepairCounters(trace.rounds) + trace.validationFailures = validationFailures + trace.repairAttempts = repairAttempts + trace.successAfterRepair = successAfterRepair + artifactSummary = _buildArtifactSummary(trace.rounds) yield AgentEvent( @@ -456,6 +467,9 @@ async def runAgentLoop( "status": state.status.value, "abortReason": state.abortReason, "artifacts": artifactSummary, + "validationFailures": validationFailures, + "repairAttempts": repairAttempts, + "successAfterRepair": successAfterRepair, } ) @@ -720,6 +734,41 @@ def classifyToolResult( return None +def _computeRepairCounters(rounds: List[AgentRoundLog]) -> Tuple[int, int, int]: + """Aggregate repair-loop telemetry across all rounds. + + Returns ``(validationFailures, repairAttempts, successAfterRepair)``. + + * `validationFailures` -- total tool calls rejected by a pre-execute + validator (any round, counts every occurrence). + * `repairAttempts` -- tool calls in **later** rounds whose `toolName` + had been rejected in some **earlier** round. Multiple retries of the + same tool count multiple times. We intentionally do not count + sibling calls within the same round, since the LLM has not yet seen + the first one's result when emitting the second. + * `successAfterRepair` -- the subset of `repairAttempts` that passed + the validator (``validationFailureCode is None``). + """ + validationFailures = 0 + repairAttempts = 0 + successAfterRepair = 0 + rejectedTools: set = set() + + for roundLog in rounds: + rejectedFromPriorRounds = set(rejectedTools) + for tc in roundLog.toolCalls: + wasRejectedBefore = tc.toolName in rejectedFromPriorRounds + if tc.validationFailureCode is not None: + validationFailures += 1 + if wasRejectedBefore: + repairAttempts += 1 + rejectedTools.add(tc.toolName) + elif wasRejectedBefore: + repairAttempts += 1 + successAfterRepair += 1 + return validationFailures, repairAttempts, successAfterRepair + + _ARTIFACT_TOOLS = {"writeFile", "replaceInFile", "deleteFile", "renameFile", "copyFile", "createFolder", "deleteFolder", "renderDocument", "generateImage"} diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py index ed30538a..8aa83732 100644 --- a/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py +++ b/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py @@ -19,6 +19,20 @@ from modules.serviceCenter.services.serviceAgent.coreTools._helpers import ( logger = logging.getLogger(__name__) +_STALE_EXTRACTION_PATTERNS = ( + "requires the extract-msg package", + "extraction requires the", + "will be treated as binary", +) + + +def _isStaleExtractionResult(text: str) -> bool: + """Detect cached extraction results that are just error/warning placeholders.""" + if len(text) > 500: + return False + textLower = text.lower() + return any(p in textLower for p in _STALE_EXTRACTION_PATTERNS) + import uuid as _uuid @@ -62,15 +76,16 @@ def _registerWorkspaceTools(registry: ToolRegistry, services): ] if textChunks: assembled = "\n\n".join(c["data"] for c in textChunks) - chunked = _applyOffsetLimit(assembled, offset, limit) - if chunked is not None: - return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked) - if len(assembled) > _MAX_TOOL_RESULT_CHARS: - assembled = assembled[:_MAX_TOOL_RESULT_CHARS] + f"\n\n[Truncated – showing first {_MAX_TOOL_RESULT_CHARS} chars of {len(assembled)}. Use offset/limit to read specific sections.]" - return ToolResult( - toolCallId="", toolName="readFile", success=True, - data=assembled, - ) + if not _isStaleExtractionResult(assembled): + chunked = _applyOffsetLimit(assembled, offset, limit) + if chunked is not None: + return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked) + if len(assembled) > _MAX_TOOL_RESULT_CHARS: + assembled = assembled[:_MAX_TOOL_RESULT_CHARS] + f"\n\n[Truncated – showing first {_MAX_TOOL_RESULT_CHARS} chars of {len(assembled)}. Use offset/limit to read specific sections.]" + return ToolResult( + toolCallId="", toolName="readFile", success=True, + data=assembled, + ) elif fileStatus in ("processing", "embedding", "extracted"): return ToolResult( toolCallId="", toolName="readFile", success=True, @@ -101,12 +116,31 @@ def _registerWorkspaceTools(registry: ToolRegistry, services): isBinary = _looksLikeBinary(rawBytes) if isBinary: + extractionService = services.getService("extraction") if hasattr(services, "getService") else None + if extractionService: + try: + extracted = extractionService.extractContentFromBytes( + rawBytes, fileName, mimeType, documentId=fileId, + ) + textParts = [ + p.data for p in (extracted.parts or []) + if getattr(p, "contentType", "") != "image" and getattr(p, "data", None) + ] + if textParts: + assembled = "\n\n".join(textParts) + chunked = _applyOffsetLimit(assembled, offset, limit) + if chunked is not None: + return ToolResult(toolCallId="", toolName="readFile", success=True, data=chunked) + if len(assembled) > _MAX_TOOL_RESULT_CHARS: + assembled = assembled[:_MAX_TOOL_RESULT_CHARS] + f"\n\n[Truncated – showing first {_MAX_TOOL_RESULT_CHARS} chars of {len(assembled)}. Use offset/limit to read specific sections.]" + return ToolResult(toolCallId="", toolName="readFile", success=True, data=assembled) + except Exception as extractErr: + logger.warning("readFile: inline extraction failed for %s: %s", fileId, extractErr) return ToolResult( toolCallId="", toolName="readFile", success=True, data=( - f"[File '{fileName}' ({mimeType}) is not yet indexed " - f"(status: {fileStatus or 'unknown'}). Indexing runs automatically " - f"on upload. Please wait a few seconds and retry, or re-upload the file. " + f"[File '{fileName}' ({mimeType}) is binary and could not be extracted " + f"(status: {fileStatus or 'unknown'}). " f"For visual content use describeImage(fileId='{fileId}').]" ), ) diff --git a/modules/serviceCenter/services/serviceAgent/datamodelAgent.py b/modules/serviceCenter/services/serviceAgent/datamodelAgent.py index 889f31e8..c96265e4 100644 --- a/modules/serviceCenter/services/serviceAgent/datamodelAgent.py +++ b/modules/serviceCenter/services/serviceAgent/datamodelAgent.py @@ -79,6 +79,14 @@ class ToolResult(BaseModel): success: bool = True data: str = "" error: Optional[str] = None + errorDetails: Optional[Dict[str, Any]] = Field( + default=None, + description=( + "Structured, machine-readable error payload for the LLM (e.g. validation " + "repair hints with code/field/suggestion/hint). `error` remains the short " + "human-readable text for logs and audit." + ), + ) durationMs: int = 0 sideEvents: Optional[List[Dict[str, Any]]] = None @@ -141,6 +149,14 @@ class ToolCallLog(BaseModel): success: bool = True durationMs: int = 0 error: Optional[str] = None + validationFailureCode: Optional[str] = Field( + default=None, + description=( + "If the tool call was rejected by a pre-execute validator (e.g. " + "QueryValidator), the structured error code (e.g. FIELD_NOT_FOUND). " + "None when the call ran cleanly or failed for other reasons." + ), + ) resultData: str = Field(default="", description="Short result summary for artifact tracking") @@ -167,6 +183,24 @@ class AgentTrace(BaseModel): totalToolCalls: int = 0 totalCostCHF: float = 0.0 abortReason: Optional[str] = None + validationFailures: int = Field( + default=0, + description="Total tool calls rejected by a pre-execute validator across the run.", + ) + repairAttempts: int = Field( + default=0, + description=( + "Number of times the LLM retried a previously rejected tool (same toolName) " + "in a later round. Counted by `agentLoop` from per-round ToolCallLog entries." + ), + ) + successAfterRepair: int = Field( + default=0, + description=( + "Number of repair attempts that produced a clean (validationFailureCode=None) " + "result. Combined with `repairAttempts` this gives the repair conversion rate." + ), + ) rounds: List[AgentRoundLog] = Field(default_factory=list) diff --git a/modules/serviceCenter/services/serviceAgent/datamodelOntology.py b/modules/serviceCenter/services/serviceAgent/datamodelOntology.py new file mode 100644 index 00000000..30e5b023 --- /dev/null +++ b/modules/serviceCenter/services/serviceAgent/datamodelOntology.py @@ -0,0 +1,203 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Ontology data model for feature data sub-agents. + +This module defines the data structures that describe a feature's data +ontology -- entities, relations, constraints, canonical query patterns -- +plus the validation error payload used by the QueryValidator. + +Phase 1 (Repair-Loop) only needs `QueryValidationError`, `Constraint`, +`ConstraintRule` and `ValidationErrorCode`; the richer `Entity`/`Relation`/ +`OntologyDescriptor` types are defined here so Phase 2 (Trustee ontology +pilot) can plug in without a second data-model change. + +See `wiki/c-work/2-build/2026-05-feature-data-agent-ontology-and-repair.md`. +""" + +from enum import Enum +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field + + +class ValidationErrorCode(str, Enum): + """Stable codes for validator failures. + + The LLM sees these codes verbatim in `ToolResult.errorDetails["code"]` + and is expected to react to them deterministically (e.g. inspect the + schema via browseTable when FIELD_NOT_FOUND, drop the SUM when + INVALID_AGGREGATE_TARGET, add a period filter when MISSING_REQUIRED_FILTER). + """ + FIELD_NOT_FOUND = "FIELD_NOT_FOUND" + INVALID_AGGREGATE_TARGET = "INVALID_AGGREGATE_TARGET" + WRONG_TABLE_FOR_PURPOSE = "WRONG_TABLE_FOR_PURPOSE" + TYPE_MISMATCH = "TYPE_MISMATCH" + OPERATOR_INCOMPATIBLE = "OPERATOR_INCOMPATIBLE" + MISSING_REQUIRED_FILTER = "MISSING_REQUIRED_FILTER" + ORDER_BY_INVALID = "ORDER_BY_INVALID" + + +class QueryValidationError(BaseModel): + """Structured pre-execute validation error. + + Serialized into `ToolResult.errorDetails` (machine-readable) and + summarized into `ToolResult.error` (short human-readable string). + """ + code: ValidationErrorCode + field: Optional[str] = Field( + default=None, + description="The offending field name (when applicable).", + ) + suggestion: Optional[str] = Field( + default=None, + description=( + "Best-effort suggestion (e.g. fuzzy-matched valid field name). " + "None when no useful suggestion exists." + ), + ) + hint: str = Field( + description="Short corrective hint, max ~80 chars. Surfaced to the LLM verbatim.", + max_length=160, + ) + + def toShortError(self) -> str: + """Build the short `error` string for logs/audit. + + Format: `: ` (or with field when present). + """ + if self.field: + return f"{self.code.value}: {self.field}: {self.hint}" + return f"{self.code.value}: {self.hint}" + + def toErrorDetails(self) -> Dict[str, Any]: + """Build the dict for `ToolResult.errorDetails`.""" + return { + "code": self.code.value, + "field": self.field, + "suggestion": self.suggestion, + "hint": self.hint, + } + + +class ConstraintRule(str, Enum): + """High-level rule kinds that can be attached to a field or table.""" + NEVER_AGGREGATE = "NEVER_AGGREGATE" + REQUIRES_FILTER_ON = "REQUIRES_FILTER_ON" + TYPE_MISMATCH_GUARD = "TYPE_MISMATCH_GUARD" + PREFERRED_TABLE_FOR_INTENT = "PREFERRED_TABLE_FOR_INTENT" + + +class Constraint(BaseModel): + """A single rule the validator and the prompt compiler both consume. + + Phase 1 uses constraints declared inline by the validator (defaults + derived from naming conventions like ``*Balance`` / ``*Total``). + Phase 2 sources them from feature ontologies, replacing the + convention-based defaults. + """ + appliesTo: str = Field( + description=( + "Target identifier, format depends on rule: `
.` for " + "field-level constraints, `
` for table-level." + ), + ) + rule: ConstraintRule + message: str = Field( + description="Short hint forwarded to the LLM if the constraint fires.", + max_length=160, + ) + params: Dict[str, Any] = Field( + default_factory=dict, + description=( + "Rule-specific extras, e.g. {'requiredFields': ['periodYear', 'periodMonth']} " + "for REQUIRES_FILTER_ON." + ), + ) + + +class SemanticType(str, Enum): + """High-level semantic category an entity belongs to. + + Coarser than the underlying Pydantic type -- used so the prompt compiler + can group entities ("here are your ACCOUNT-like tables") without the LLM + having to read the full schema. + """ + ACCOUNT = "ACCOUNT" + BALANCE_SNAPSHOT = "BALANCE_SNAPSHOT" + TRANSACTION = "TRANSACTION" + DOCUMENT = "DOCUMENT" + PARTY = "PARTY" + PERIOD = "PERIOD" + OTHER = "OTHER" + + +class Cardinality(str, Enum): + ONE_TO_ONE = "ONE_TO_ONE" + ONE_TO_MANY = "ONE_TO_MANY" + MANY_TO_ONE = "MANY_TO_ONE" + MANY_TO_MANY = "MANY_TO_MANY" + + +class Invariant(BaseModel): + """Free-form invariant attached to an entity. + + Phase 1 leaves these as opaque text consumed by the prompt compiler. + Future phases may add a structured rule kind. + """ + description: str = Field(max_length=200) + + +class Entity(BaseModel): + """One semantic entity in the ontology (often backed by a Pydantic table).""" + name: str + pythonClass: Optional[str] = Field( + default=None, + description="MODEL_REGISTRY key when the entity is DB-backed (e.g. 'TrusteeDataAccountBalance').", + ) + semanticType: SemanticType = SemanticType.OTHER + parentEntity: Optional[str] = Field( + default=None, + description="Name of a broader entity this one specializes (e.g. 'BankAccount' parentEntity 'Account').", + ) + description: str = "" + invariants: List[Invariant] = Field(default_factory=list) + + +class Relation(BaseModel): + fromEntity: str + toEntity: str + cardinality: Cardinality + via: Optional[str] = Field( + default=None, + description="FK-Feldname auf der fromEntity-Seite (z. B. 'journalEntryId').", + ) + + +class CanonicalQueryPattern(BaseModel): + """Tool-call skeleton for a recurring user intent. + + The prompt compiler renders these as worked examples so the LLM has a + template to mimic instead of inventing a query shape. + """ + intent: str = Field(description="Short label, e.g. 'BANK_BALANCE_AT_DATE'.") + description: str = Field(default="", description="Human-readable when to use this pattern.") + pattern: Dict[str, Any] = Field( + description="Tool-call shape with placeholders, e.g. {'tool': 'queryTable', 'tableName': '...', 'filters': [...]}", + ) + + +class OntologyDescriptor(BaseModel): + """Top-level container exported by `getAgentOntology()` per feature.""" + featureCode: str + entities: List[Entity] = Field(default_factory=list) + relations: List[Relation] = Field(default_factory=list) + constraints: List[Constraint] = Field(default_factory=list) + canonicalPatterns: List[CanonicalQueryPattern] = Field(default_factory=list) + + def constraintsForTable(self, tableName: str) -> List[Constraint]: + """Return constraints whose ``appliesTo`` targets the given table or one of its fields.""" + prefix = f"{tableName}." + return [ + c for c in self.constraints + if c.appliesTo == tableName or c.appliesTo.startswith(prefix) + ] diff --git a/modules/serviceCenter/services/serviceAgent/featureDataAgent.py b/modules/serviceCenter/services/serviceAgent/featureDataAgent.py index aa2d332d..51840575 100644 --- a/modules/serviceCenter/services/serviceAgent/featureDataAgent.py +++ b/modules/serviceCenter/services/serviceAgent/featureDataAgent.py @@ -15,6 +15,7 @@ invoked outside an agent loop (e.g. in tests). import json import logging +import os from typing import Any, Callable, Awaitable, Dict, List, Optional from modules.datamodels.datamodelAi import ( @@ -25,6 +26,10 @@ from modules.serviceCenter.services.serviceAgent.agentLoop import runAgentLoop from modules.serviceCenter.services.serviceAgent.datamodelAgent import ( AgentConfig, AgentEvent, AgentEventTypeEnum, ToolResult, ) +from modules.serviceCenter.services.serviceAgent.datamodelOntology import ( + QueryValidationError, +) +from modules.serviceCenter.services.serviceAgent.queryValidator import QueryValidator from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry from modules.serviceCenter.services.serviceAgent.featureDataProvider import FeatureDataProvider from modules.shared.i18nRegistry import resolveText @@ -83,7 +88,8 @@ async def runFeatureDataAgent( """ provider = FeatureDataProvider(dbConnector, neutralizeFields=neutralizeFields) - registry = _buildSubAgentTools(provider, featureInstanceId, mandateId, tableFilters or {}) + validator = _buildValidatorForFeature(featureCode) + registry = _buildSubAgentTools(provider, featureInstanceId, mandateId, tableFilters or {}, validator=validator) for tbl in selectedTables: meta = tbl.get("meta", {}) @@ -153,10 +159,19 @@ def _buildSubAgentTools( featureInstanceId: str, mandateId: str, tableFilters: Dict[str, Dict[str, str]] = None, + validator: Optional[QueryValidator] = None, ) -> ToolRegistry: - """Register browseTable and queryTable as sub-agent tools.""" + """Register browseTable and queryTable as sub-agent tools. + + The optional ``validator`` runs **before** the provider on every call. + When it returns a structured error, the tool result carries + ``errorDetails`` (machine-readable repair hint for the LLM) plus the + short ``error`` string for logs/audit. No provider call happens in that + case, so the database is never reached with a known-bad query. + """ registry = ToolRegistry() _tableFilters = tableFilters or {} + _validator = validator or QueryValidator() def _recordFilterToList(tableName: str) -> Optional[List[Dict[str, Any]]]: """Convert a recordFilter dict to a list of {field, op, value} filter dicts.""" @@ -165,6 +180,14 @@ def _buildSubAgentTools( return None return [{"field": k, "op": "=", "value": v} for k, v in rf.items()] + def _validationToolResult(toolName: str, err: QueryValidationError) -> ToolResult: + return ToolResult( + toolCallId="", toolName=toolName, + success=False, + error=err.toShortError(), + errorDetails=err.toErrorDetails(), + ) + async def _browseTable(args: Dict[str, Any], context: Dict[str, Any]): tableName = args.get("tableName", "") limit = args.get("limit", 50) @@ -172,6 +195,9 @@ def _buildSubAgentTools( fields = args.get("fields") if not tableName: return ToolResult(toolCallId="", toolName="browseTable", success=False, error="tableName required") + validationErr = _validator.validateBrowseQuery(tableName, args) + if validationErr is not None: + return _validationToolResult("browseTable", validationErr) result = provider.browseTable( tableName=tableName, featureInstanceId=featureInstanceId, @@ -197,6 +223,9 @@ def _buildSubAgentTools( offset = args.get("offset", 0) if not tableName: return ToolResult(toolCallId="", toolName="queryTable", success=False, error="tableName required") + validationErr = _validator.validateQueryTable(tableName, args) + if validationErr is not None: + return _validationToolResult("queryTable", validationErr) result = provider.queryTable( tableName=tableName, featureInstanceId=featureInstanceId, @@ -220,12 +249,19 @@ def _buildSubAgentTools( aggregate = args.get("aggregate", "") field = args.get("field", "") groupBy = args.get("groupBy") + filters = args.get("filters") or [] if not tableName: return ToolResult(toolCallId="", toolName="aggregateTable", success=False, error="tableName required") if not aggregate: return ToolResult(toolCallId="", toolName="aggregateTable", success=False, error="aggregate required (SUM, COUNT, AVG, MIN, MAX)") if not field: return ToolResult(toolCallId="", toolName="aggregateTable", success=False, error="field required") + validationErr = _validator.validateAggregateQuery(tableName, args) + if validationErr is not None: + return _validationToolResult("aggregateTable", validationErr) + combinedFilters = list(filters) + recordFilters = _recordFilterToList(tableName) or [] + combinedFilters.extend(recordFilters) result = provider.aggregateTable( tableName=tableName, featureInstanceId=featureInstanceId, @@ -233,7 +269,7 @@ def _buildSubAgentTools( aggregate=aggregate, field=field, groupBy=groupBy, - extraFilters=_recordFilterToList(tableName), + extraFilters=combinedFilters or None, ) return ToolResult( toolCallId="", toolName="aggregateTable", @@ -246,8 +282,12 @@ def _buildSubAgentTools( "aggregateTable", _aggregateTable, description=( "Run an aggregate query on a feature data table. " - "Supports SUM, COUNT, AVG, MIN, MAX with optional GROUP BY. " - "Example: aggregateTable(tableName='TrusteeDataJournalLine', aggregate='SUM', field='debitAmount', groupBy='costCenter')" + "Supports SUM, COUNT, AVG, MIN, MAX with optional GROUP BY and filters. " + "Example: aggregateTable(tableName='TrusteeDataJournalLine', aggregate='SUM', " + "field='debitAmount', filters=[{'field':'accountNumber','op':'=','value':'5400'}]). " + "On validation failure the tool returns success=False with errorDetails={code, field, suggestion, hint} -- " + "read errorDetails and correct the next call (e.g. drop the SUM, switch to queryTable with period filters, " + "or use the suggested field name)." ), parameters={ "type": "object", @@ -256,6 +296,22 @@ def _buildSubAgentTools( "aggregate": {"type": "string", "enum": ["SUM", "COUNT", "AVG", "MIN", "MAX"], "description": "Aggregate function"}, "field": {"type": "string", "description": "Field to aggregate (e.g. debitAmount, creditAmount)"}, "groupBy": {"type": "string", "description": "Optional field to group by (e.g. costCenter, accountNumber)"}, + "filters": { + "type": "array", + "items": { + "type": "object", + "properties": { + "field": {"type": "string"}, + "op": {"type": "string"}, + "value": {}, + }, + }, + "description": ( + "Optional filter conditions applied before the aggregate. Same shape as queryTable's " + "filters. Required whenever you want to aggregate only a subset (e.g. SUM debits on " + "ONE account, COUNT rows in ONE year)." + ), + }, }, "required": ["tableName", "aggregate", "field"], }, @@ -264,7 +320,11 @@ def _buildSubAgentTools( registry.register( "browseTable", _browseTable, - description="List rows from a feature data table with pagination.", + description=( + "List rows from a feature data table with pagination. " + "On validation failure the tool returns success=False with errorDetails={code, field, suggestion, hint} -- " + "use errorDetails to correct the next call." + ), parameters={ "type": "object", "properties": { @@ -286,7 +346,10 @@ def _buildSubAgentTools( description=( "Query a feature data table with filters, field selection, and ordering. " "Filters: [{\"field\": \"status\", \"op\": \"=\", \"value\": \"active\"}]. " - "Operators: =, !=, >, <, >=, <=, LIKE, ILIKE, IS NULL, IS NOT NULL." + "Operators: =, !=, >, <, >=, <=, LIKE, ILIKE, IS NULL, IS NOT NULL. " + "On validation failure the tool returns success=False with errorDetails={code, field, suggestion, hint} -- " + "common codes: FIELD_NOT_FOUND (use the suggestion or call browseTable), OPERATOR_INCOMPATIBLE " + "(switch to a compatible operator for that field type), ORDER_BY_INVALID." ), parameters={ "type": "object", @@ -410,13 +473,94 @@ def _buildSchemaContext( "- Keep your answer SHORT. The caller is a machine, not a human.", ] - domainHints = _loadFeatureDomainHints(featureCode) - if domainHints: - parts.extend(["", domainHints.strip()]) + domainBlock = "" + if not _isOntologyDisabled(): + domainBlock = _loadFeatureOntologyBlock(featureCode) + if not domainBlock: + domainBlock = _loadFeatureDomainHints(featureCode) + if domainBlock: + parts.extend(["", domainBlock.strip()]) return "\n".join(parts) +def _isOntologyDisabled() -> bool: + """Eval-only escape hatch. + + Set ``POWERON_DISABLE_FEATURE_ONTOLOGY=1`` in the environment to force + ``_buildSchemaContext`` back onto the legacy ``getAgentDomainHints()`` + path. Used by the Phase 1.5 benchmark to measure ``baseline`` and + ``phase1`` accuracy WITHOUT the ontology-driven prompt block. Never + set this flag in production. + """ + return os.environ.get("POWERON_DISABLE_FEATURE_ONTOLOGY", "").strip() in ("1", "true", "TRUE", "yes") + + +def _buildValidatorForFeature(featureCode: str) -> QueryValidator: + """Construct a QueryValidator wired with the feature ontology (when present). + + Without an ontology the validator falls back to its convention-based + constraints (``*Balance`` / ``*Total`` are NEVER_AGGREGATE). With an + ontology the descriptor's constraints take precedence -- the validator + and the prompt block then share the same source of truth. + """ + ontology = _loadFeatureOntology(featureCode) + return QueryValidator(ontology=ontology) + + +def _loadFeatureOntology(featureCode: str): + """Return the feature's OntologyDescriptor or None when no hook is exposed.""" + if not featureCode: + return None + try: + from modules.system.registry import loadFeatureMainModules + except Exception: + return None + + try: + mainModules = loadFeatureMainModules() or {} + except Exception as exc: + logger.debug("Ontology lookup: cannot load main modules (%s)", exc) + return None + + module = mainModules.get(featureCode) or mainModules.get(featureCode.lower()) + if module is None: + return None + hook = getattr(module, "getAgentOntology", None) + if not callable(hook): + return None + try: + return hook() + except Exception as exc: + logger.warning("Feature '%s' getAgentOntology() raised: %s", featureCode, exc) + return None + + +def _loadFeatureOntologyBlock(featureCode: str) -> str: + """Return the ontology-derived prompt block when the feature exposes one. + + Each feature can expose ``getAgentOntology() -> OntologyDescriptor`` in + its ``mainXxx.py``. When present, the descriptor is compiled via + :func:`ontologyToPromptCompiler.compileOntologyToPrompt` and the result + replaces the legacy ``getAgentDomainHints()`` text block. This keeps + one single source of truth for the validator AND the prompt. + + Failures are swallowed (missing hook, exceptions in compilation) so the + caller can fall back to the legacy domain-hints path. + """ + ontology = _loadFeatureOntology(featureCode) + if ontology is None: + return "" + try: + from modules.serviceCenter.services.serviceAgent.ontologyToPromptCompiler import ( + compileOntologyToPrompt, + ) + return compileOntologyToPrompt(ontology) + except Exception as exc: + logger.warning("Ontology compile failed for '%s': %s", featureCode, exc) + return "" + + def _loadFeatureDomainHints(featureCode: str) -> str: """Pull optional domain-specific hints from the feature's main module. diff --git a/modules/serviceCenter/services/serviceAgent/ontologyToPromptCompiler.py b/modules/serviceCenter/services/serviceAgent/ontologyToPromptCompiler.py new file mode 100644 index 00000000..5b162ed3 --- /dev/null +++ b/modules/serviceCenter/services/serviceAgent/ontologyToPromptCompiler.py @@ -0,0 +1,140 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Deterministic compiler: OntologyDescriptor -> sub-agent prompt block. + +Phase 2 replaces a feature's hand-written ``_AGENT_DOMAIN_HINTS`` text +with a structured :class:`OntologyDescriptor`. This compiler renders the +descriptor into a stable, terse Markdown-ish block that the sub-agent +appends to its system prompt -- the same source of truth the +:class:`QueryValidator` consults. + +The output is intentionally: +* short (every token costs every call) +* deterministic (no f-string ordering bugs, no Python dict iteration) +* free of internal jargon ('canonicalQueryPattern' is rendered as + 'CANONICAL PATTERN' for the LLM) +""" + +from __future__ import annotations + +from typing import Iterable, List + +from modules.serviceCenter.services.serviceAgent.datamodelOntology import ( + CanonicalQueryPattern, + Constraint, + ConstraintRule, + Entity, + OntologyDescriptor, + Relation, +) + + +def compileOntologyToPrompt(ontology: OntologyDescriptor) -> str: + """Render *ontology* into a sub-agent prompt block. + + The output starts with a stable marker line (``DOMAIN ONTOLOGY (...)``) + so downstream tooling can find/replace it deterministically. + """ + lines: List[str] = [] + lines.append(f"DOMAIN ONTOLOGY ({ontology.featureCode}):") + lines.append("") + lines.extend(_renderEntities(ontology.entities)) + relationLines = _renderRelations(ontology.relations) + if relationLines: + lines.append("") + lines.extend(relationLines) + constraintLines = _renderConstraints(ontology.constraints) + if constraintLines: + lines.append("") + lines.extend(constraintLines) + patternLines = _renderPatterns(ontology.canonicalPatterns) + if patternLines: + lines.append("") + lines.extend(patternLines) + return "\n".join(lines).rstrip() + "\n" + + +def _renderEntities(entities: Iterable[Entity]) -> List[str]: + out: List[str] = ["ENTITIES:"] + for e in entities: + head = f"- {e.name}" + if e.parentEntity: + head += f" (specializes {e.parentEntity})" + if e.pythonClass: + head += f" [table: {e.pythonClass}]" + out.append(head) + if e.description: + out.append(f" {e.description}") + for inv in e.invariants: + out.append(f" * {inv.description}") + return out + + +def _renderRelations(relations: Iterable[Relation]) -> List[str]: + rels = list(relations) + if not rels: + return [] + out: List[str] = ["RELATIONS:"] + for r in rels: + line = f"- {r.fromEntity} -> {r.toEntity} ({r.cardinality.value}" + if r.via: + line += f" via {r.via}" + line += ")" + out.append(line) + return out + + +def _renderConstraints(constraints: Iterable[Constraint]) -> List[str]: + cons = list(constraints) + if not cons: + return [] + out: List[str] = ["CONSTRAINTS (validator-enforced):"] + for c in cons: + rule = _ruleLabel(c.rule) + line = f"- {rule} on {c.appliesTo}: {c.message}" + params = c.params or {} + required = params.get("requiredFields") + if isinstance(required, list) and required: + line += f" (required filters: {', '.join(required)})" + intents = params.get("intents") + if isinstance(intents, list) and intents: + line += f" (intents: {', '.join(intents)})" + out.append(line) + return out + + +def _ruleLabel(rule: ConstraintRule) -> str: + return rule.value.replace("_", " ").lower() + + +def _renderPatterns(patterns: Iterable[CanonicalQueryPattern]) -> List[str]: + pats = list(patterns) + if not pats: + return [] + out: List[str] = ["CANONICAL QUERY PATTERNS (mimic these tool calls):"] + for i, p in enumerate(pats, start=1): + out.append(f"{i}) intent={p.intent}: {p.description}") + out.append(f" call: {_renderPatternCall(p.pattern)}") + extra = p.pattern.get("_postProcessing") if isinstance(p.pattern, dict) else None + if isinstance(extra, str): + out.append(f" note: {extra}") + return out + + +def _renderPatternCall(pattern: dict) -> str: + """Render the pattern as a compact one-line tool call signature.""" + tool = pattern.get("tool", "?") + parts: List[str] = [] + for key in ("tableName", "aggregate", "field", "groupBy", "orderBy"): + if key in pattern and pattern[key] is not None and not str(key).startswith("_"): + parts.append(f"{key}={pattern[key]!r}") + if "fields" in pattern and pattern["fields"]: + parts.append(f"fields={pattern['fields']}") + if "filters" in pattern and pattern["filters"]: + compact = ", ".join( + f"{f.get('field')}{f.get('op','=')}{f.get('value')!r}" + for f in pattern["filters"] + if isinstance(f, dict) + ) + parts.append(f"filters=[{compact}]") + return f"{tool}({', '.join(parts)})" diff --git a/modules/serviceCenter/services/serviceAgent/queryValidator.py b/modules/serviceCenter/services/serviceAgent/queryValidator.py new file mode 100644 index 00000000..2dbbd57e --- /dev/null +++ b/modules/serviceCenter/services/serviceAgent/queryValidator.py @@ -0,0 +1,311 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Pre-execute query validator for the Feature Data Sub-Agent. + +Sits between the LLM tool call and `FeatureDataProvider`. Catches the four +high-impact hallucination classes deterministically so the LLM gets an +actionable repair hint instead of a raw SQL exception: + +* invented field names -> FIELD_NOT_FOUND (+ fuzzy suggestion) +* operator/type mismatches -> OPERATOR_INCOMPATIBLE +* SUM/AVG on already-aggregated -> INVALID_AGGREGATE_TARGET + balance/total columns +* orderBy on invented fields -> ORDER_BY_INVALID + +The validator reads the canonical schema from +`modules.datamodels.datamodelBase.MODEL_REGISTRY`. When an +`OntologyDescriptor` is provided (Phase 2), its constraints override the +convention-based defaults (e.g. NEVER_AGGREGATE on closingBalance). +""" + +from __future__ import annotations + +import difflib +import logging +import re +import typing +from typing import Any, Dict, List, Optional, Tuple + +from modules.datamodels.datamodelBase import MODEL_REGISTRY +from modules.serviceCenter.services.serviceAgent.datamodelOntology import ( + Constraint, + ConstraintRule, + OntologyDescriptor, + QueryValidationError, + ValidationErrorCode, +) + +logger = logging.getLogger(__name__) + + +_STRING_ONLY_OPERATORS = {"LIKE", "ILIKE"} +_COMPARISON_OPERATORS = {">", "<", ">=", "<="} +_VALUELESS_OPERATORS = {"IS NULL", "IS NOT NULL"} +_AGGREGATES_THAT_SUM = {"SUM", "AVG"} +_AGGREGATE_BLACKLIST_SUFFIXES_DEFAULT: Tuple[str, ...] = ("Balance", "Total") + + +class QueryValidator: + """Validate sub-agent tool arguments against the schema (+ optional ontology). + + Stateless per call -- holding only the optional ontology. Each + `validateXxx` method returns ``None`` on success or a + :class:`QueryValidationError` to be surfaced to the LLM. + """ + + def __init__(self, ontology: Optional[OntologyDescriptor] = None): + self._ontology = ontology + + # ------------------------------------------------------------------ + # public API: one method per sub-agent tool + # ------------------------------------------------------------------ + + def validateBrowseQuery( + self, tableName: str, args: Dict[str, Any] + ) -> Optional[QueryValidationError]: + """Validate browseTable arguments. + + Phase 1 scope: only `fields` (whitelist) is LLM-driven; `limit`/`offset` + are sanitized by the tool wrapper. + """ + modelFields = _getModelFields(tableName) + if modelFields is None: + return None + + fieldsErr = self._validateFieldList(args.get("fields"), modelFields) + if fieldsErr is not None: + return fieldsErr + return None + + def validateQueryTable( + self, tableName: str, args: Dict[str, Any] + ) -> Optional[QueryValidationError]: + """Validate queryTable arguments (filters + fields + orderBy).""" + modelFields = _getModelFields(tableName) + if modelFields is None: + return None + + fieldsErr = self._validateFieldList(args.get("fields"), modelFields) + if fieldsErr is not None: + return fieldsErr + + for f in args.get("filters") or []: + filterErr = self._validateFilter(f, modelFields) + if filterErr is not None: + return filterErr + + orderBy = args.get("orderBy") + if orderBy is not None and not _isPlainNone(orderBy): + if orderBy not in modelFields: + return QueryValidationError( + code=ValidationErrorCode.ORDER_BY_INVALID, + field=orderBy, + suggestion=_suggestFieldName(orderBy, modelFields), + hint="orderBy must be a real field of this table.", + ) + return None + + def validateAggregateQuery( + self, tableName: str, args: Dict[str, Any] + ) -> Optional[QueryValidationError]: + """Validate aggregateTable arguments. + + Catches the highest-impact hallucination in the codebase: + ``SUM(closingBalance)`` (and friends) across periods -- closing + balances are already per-period, summing them produces nonsense. + """ + modelFields = _getModelFields(tableName) + if modelFields is None: + return None + + field = args.get("field") + aggregate = (args.get("aggregate") or "").upper() + + if not field: + return None # tool wrapper rejects empty field already + + if field not in modelFields: + return QueryValidationError( + code=ValidationErrorCode.FIELD_NOT_FOUND, + field=field, + suggestion=_suggestFieldName(field, modelFields), + hint="Use browseTable to inspect this table's columns.", + ) + + if aggregate in _AGGREGATES_THAT_SUM and self._isAggregateBlacklisted(tableName, field): + return QueryValidationError( + code=ValidationErrorCode.INVALID_AGGREGATE_TARGET, + field=field, + suggestion=None, + hint=( + f"{field} is already aggregated per period; do not {aggregate} it " + "across rows. Use queryTable with period filters instead." + ), + ) + + if aggregate in _AGGREGATES_THAT_SUM and not _isNumericAnnotation(modelFields[field]): + return QueryValidationError( + code=ValidationErrorCode.TYPE_MISMATCH, + field=field, + suggestion=None, + hint=f"{aggregate} requires a numeric field; {field} is not numeric.", + ) + + groupBy = args.get("groupBy") + if groupBy is not None and not _isPlainNone(groupBy): + if groupBy not in modelFields: + return QueryValidationError( + code=ValidationErrorCode.FIELD_NOT_FOUND, + field=groupBy, + suggestion=_suggestFieldName(groupBy, modelFields), + hint="groupBy must be a real field of this table.", + ) + + # filters validation matches queryTable so the LLM gets consistent + # repair hints regardless of which tool it picked. + for f in args.get("filters") or []: + filterErr = self._validateFilter(f, modelFields) + if filterErr is not None: + return filterErr + return None + + # ------------------------------------------------------------------ + # internals + # ------------------------------------------------------------------ + + def _validateFieldList( + self, fields: Optional[List[str]], modelFields: Dict[str, Any] + ) -> Optional[QueryValidationError]: + if not fields: + return None + for f in fields: + if not isinstance(f, str): + continue + if f not in modelFields: + return QueryValidationError( + code=ValidationErrorCode.FIELD_NOT_FOUND, + field=f, + suggestion=_suggestFieldName(f, modelFields), + hint="Use browseTable to inspect this table's columns.", + ) + return None + + def _validateFilter( + self, filterEntry: Any, modelFields: Dict[str, Any] + ) -> Optional[QueryValidationError]: + if not isinstance(filterEntry, dict): + return None + field = filterEntry.get("field") + op = (filterEntry.get("op") or "=").upper() + + if not isinstance(field, str) or not field: + return None # tool wrapper passes these straight through + + if field not in modelFields: + return QueryValidationError( + code=ValidationErrorCode.FIELD_NOT_FOUND, + field=field, + suggestion=_suggestFieldName(field, modelFields), + hint="Use browseTable to inspect this table's columns.", + ) + + annotation = modelFields[field] + + if op in _STRING_ONLY_OPERATORS and not _isStringAnnotation(annotation): + return QueryValidationError( + code=ValidationErrorCode.OPERATOR_INCOMPATIBLE, + field=field, + suggestion=None, + hint=f"{op} only works on string fields; {field} is not a string.", + ) + + if op in _COMPARISON_OPERATORS and not _isComparableAnnotation(annotation): + return QueryValidationError( + code=ValidationErrorCode.OPERATOR_INCOMPATIBLE, + field=field, + suggestion=None, + hint=f"{op} requires a numeric or date field; {field} is not comparable.", + ) + return None + + def _isAggregateBlacklisted(self, tableName: str, fieldName: str) -> bool: + """Check whether a field is marked NEVER_AGGREGATE. + + Phase 2 (ontology present): consult the descriptor. + Phase 1 fallback: naming convention (``*Balance`` / ``*Total``). + """ + if self._ontology is not None: + target = f"{tableName}.{fieldName}" + for c in self._ontology.constraintsForTable(tableName): + if c.rule == ConstraintRule.NEVER_AGGREGATE and c.appliesTo == target: + return True + + for suffix in _AGGREGATE_BLACKLIST_SUFFIXES_DEFAULT: + if fieldName.endswith(suffix): + return True + return False + + +# ------------------------------------------------------------------ +# helpers +# ------------------------------------------------------------------ + +def _getModelFields(tableName: str) -> Optional[Dict[str, Any]]: + """Return ``{fieldName: annotation}`` for a registered Pydantic table model. + + None when the table is not in MODEL_REGISTRY (e.g. pure UDB tables in + early-startup contexts). The validator is a best-effort layer -- when + the schema is unknown we let the request through and rely on the + downstream SQL layer for safety. + """ + modelClass = MODEL_REGISTRY.get(tableName) + if modelClass is None: + return None + return { + name: info.annotation for name, info in modelClass.model_fields.items() + } + + +def _suggestFieldName(badName: str, modelFields: Dict[str, Any]) -> Optional[str]: + """Return the closest valid field name, or None if nothing reasonable.""" + if not badName or not modelFields: + return None + matches = difflib.get_close_matches(badName, list(modelFields.keys()), n=1, cutoff=0.6) + return matches[0] if matches else None + + +def _isPlainNone(value: Any) -> bool: + """LLMs sometimes pass the literal string 'None' -- treat both as None.""" + return value is None or (isinstance(value, str) and value.strip().lower() == "none") + + +def _unwrapAnnotation(annotation: Any) -> Tuple[Any, ...]: + """Flatten Optional/Union annotations into their constituent types.""" + origin = typing.get_origin(annotation) + if origin is None: + return (annotation,) + return tuple(a for a in typing.get_args(annotation) if a is not type(None)) + + +def _isStringAnnotation(annotation: Any) -> bool: + return any(a is str for a in _unwrapAnnotation(annotation)) + + +def _isNumericAnnotation(annotation: Any) -> bool: + numericTypes = (int, float) + return any(a in numericTypes for a in _unwrapAnnotation(annotation)) + + +def _isComparableAnnotation(annotation: Any) -> bool: + """Numeric types are the comparable shape we see in feature tables. + + Booleans count as int in Python's type hierarchy but the comparison + operators ``>``/``<`` on bool columns are almost never meaningful, so we + treat bool as non-comparable for validator purposes. + """ + for a in _unwrapAnnotation(annotation): + if a is bool: + continue + if a in (int, float): + return True + return False diff --git a/modules/serviceCenter/services/serviceAgent/sandboxExecutor.py b/modules/serviceCenter/services/serviceAgent/sandboxExecutor.py index c2e16506..2fbe9c34 100644 --- a/modules/serviceCenter/services/serviceAgent/sandboxExecutor.py +++ b/modules/serviceCenter/services/serviceAgent/sandboxExecutor.py @@ -98,14 +98,17 @@ class _VirtualFS: def _makeReadFile(services): """Create a readFile(fileId) closure bound to the current services context.""" - def readFile(fileId: str) -> str: + def readFile(fileId: str, encoding: str = "utf-8") -> str: mgmt = getattr(services, 'interfaceDbComponent', None) if services else None if not mgmt: raise RuntimeError("readFile: no file store available in this session") data = mgmt.getFileData(str(fileId)) if data is None: raise FileNotFoundError(f"File '{fileId}' not found in workspace") - return data.decode("utf-8") + try: + return data.decode(encoding) + except (UnicodeDecodeError, LookupError): + return data.decode("utf-8", errors="replace") return readFile diff --git a/modules/serviceCenter/services/serviceAi/subAiCallLooping.py b/modules/serviceCenter/services/serviceAi/subAiCallLooping.py index 4285de51..3ef22535 100644 --- a/modules/serviceCenter/services/serviceAi/subAiCallLooping.py +++ b/modules/serviceCenter/services/serviceAi/subAiCallLooping.py @@ -60,6 +60,7 @@ from modules.shared.jsonContinuation import getContexts from modules.shared.jsonUtils import buildContinuationContext, tryParseJson from modules.shared.jsonUtils import closeJsonStructures from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText +from modules.shared.jsonUtils import extractJsonString, repairBrokenJson logger = logging.getLogger(__name__) @@ -447,7 +448,6 @@ class AiCallLooper: extracted = extractJsonString(contexts.completePart) parsed, parseErr, _ = tryParseJson(extracted) if parseErr is not None: - from modules.shared.jsonUtils import repairBrokenJson repaired = repairBrokenJson(extracted) if repaired: parsed = repaired @@ -470,9 +470,10 @@ class AiCallLooper: return useCase.finalResultHandler( result, normalized, extracted, debugPrefix, self.services ) - except Exception as e: + except (json.JSONDecodeError, KeyError, TypeError) as e: logger.warning( - f"Iteration {iteration}: completePart not serializable after getContexts success: {e}" + f"Iteration {iteration}: completePart not serializable after getContexts success: " + f"{type(e).__name__}: {e}" ) mergeFailCount += 1 if mergeFailCount >= MAX_MERGE_FAILS: @@ -491,6 +492,15 @@ class AiCallLooper: ) self.services.chat.progressLogFinish(iterationOperationId, True) continue + except Exception as e: + logger.error( + f"Iteration {iteration}: unexpected error during completePart processing " + f"(re-raising, NOT a pipeline-mismatch retry): {type(e).__name__}: {e}", + exc_info=True, + ) + if iterationOperationId: + self.services.chat.progressLogFinish(iterationOperationId, False) + raise elif contexts.jsonParsingSuccess and contexts.overlapContext != "": # JSON parseable but has cut point - CONTINUE to next iteration diff --git a/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py b/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py index 66ca4708..e27dae58 100644 --- a/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py +++ b/modules/serviceCenter/services/serviceBackgroundJobs/mainBackgroundJobService.py @@ -34,7 +34,7 @@ import time from datetime import datetime, timezone from typing import Any, Awaitable, Callable, Dict, List, Optional -from modules.connectors.connectorDbPostgre import DatabaseConnector +from modules.connectors.connectorDbPostgre import DatabaseConnector, getCachedConnector from modules.shared.configuration import APP_CONFIG from modules.shared.dbRegistry import registerDatabase from modules.datamodels.datamodelBackgroundJob import ( @@ -104,7 +104,13 @@ def registerJobHandler(jobType: str, handler: JobHandler) -> None: def _getDb() -> DatabaseConnector: - return DatabaseConnector( + """Return the shared cached connector for the jobs DB. + + Reuses the same connector across all job CRUD calls instead of opening a + fresh psycopg2 connection (and re-running `_create_database_if_not_exists` + + `_create_tables` + `_initializeSystemTable`) on every operation. + """ + return getCachedConnector( dbDatabase=JOBS_DATABASE, dbHost=APP_CONFIG.get("DB_HOST", "localhost"), dbPort=int(APP_CONFIG.get("DB_PORT", "5432")), @@ -290,12 +296,12 @@ def cancelJobsByConnection(connectionId: str, *, jobType: str = "connection.boot def recoverInterruptedJobs() -> int: - """Flip any RUNNING jobs to ERROR and re-queue bootstrap jobs (called at worker boot). + """Flip any RUNNING jobs to ERROR (called at worker boot). A RUNNING job in the DB after process restart means the previous worker died mid-execution; the asyncio task is gone and the job will never - finish on its own. For connection.bootstrap jobs, a fresh job is - automatically re-queued so the user doesn't have to manually retry. + finish on its own. The daily scheduler or manual "Neu indexieren" + button handles retry — no automatic re-queue to avoid infinite loops. """ db = _getDb() try: @@ -304,34 +310,70 @@ def recoverInterruptedJobs() -> int: logger.warning("recoverInterruptedJobs: failed to scan RUNNING jobs: %s", ex) return 0 count = 0 - requeued = 0 for row in rows: try: _markError(row["id"], "Interrupted by worker restart") count += 1 except Exception as ex: logger.warning("recoverInterruptedJobs: could not mark %s as ERROR: %s", row.get("id"), ex) - continue - - if row.get("jobType") == "connection.bootstrap": - payload = row.get("payload") or {} - if payload.get("connectionId"): - try: - newJob = BackgroundJob( - jobType="connection.bootstrap", - payload=payload, - triggeredBy="recovery.requeue", - ) - record = db.recordCreate(BackgroundJob, _serialiseDatetimes(newJob.model_dump())) - asyncio.create_task(_runJob(record["id"])) - requeued += 1 - logger.info( - "recoverInterruptedJobs: re-queued bootstrap for connectionId=%s (new jobId=%s)", - payload["connectionId"], record["id"], - ) - except Exception as reqEx: - logger.warning("recoverInterruptedJobs: re-queue failed for %s: %s", row.get("id"), reqEx) - if count: - logger.warning("Recovered %d interrupted background job(s) after restart (re-queued %d)", count, requeued) + logger.warning("Recovered %d interrupted background job(s) after restart", count) return count + + +_ZOMBIE_MAX_AGE_SECONDS = 30 * 60 + + +def killZombieJobs(maxAgeSeconds: int = _ZOMBIE_MAX_AGE_SECONDS) -> int: + """Kill RUNNING jobs that have not been updated within `maxAgeSeconds`. + + Detects walkers that are stuck in a sync call without progress updates. + A live job updates progress at least every few seconds via JobProgressCallback. + Anything older than maxAgeSeconds without finishing is considered hung. + """ + db = _getDb() + try: + rows = db.getRecordset(BackgroundJob, recordFilter={"status": BackgroundJobStatusEnum.RUNNING.value}) + except Exception as ex: + logger.warning("killZombieJobs: failed to scan RUNNING jobs: %s", ex) + return 0 + now = time.time() + threshold = now - maxAgeSeconds + count = 0 + for row in rows: + started = row.get("startedAt") or row.get("createdAt") + if not started or started > threshold: + continue + ageMin = (now - started) / 60 + try: + _markError(row["id"], f"Zombie killed (stuck >{maxAgeSeconds // 60}min, no progress)") + count += 1 + payload = row.get("payload") or {} + logger.warning( + "killZombieJobs: killed %s (type=%s connId=%s ageMin=%.1f)", + row["id"], row.get("jobType"), payload.get("connectionId", "")[:12], ageMin, + ) + except Exception as ex: + logger.warning("killZombieJobs: could not kill %s: %s", row.get("id"), ex) + return count + + +def registerZombieKillerScheduler(*, intervalMinutes: int = 5) -> None: + """Register a recurring cron job that kills stuck RUNNING jobs. + + Idempotent. Runs every `intervalMinutes` minutes. + """ + try: + from modules.shared.eventManagement import eventManager + + async def _runKiller(): + killZombieJobs() + + eventManager.registerCron( + jobId="background_jobs.zombie_killer", + func=_runKiller, + cronKwargs={"minute": f"*/{intervalMinutes}"}, + ) + logger.info("Zombie-killer scheduler registered (every %d min)", intervalMinutes) + except Exception as ex: + logger.warning("Zombie-killer scheduler registration failed (non-critical): %s", ex) diff --git a/modules/serviceCenter/services/serviceChat/mainServiceChat.py b/modules/serviceCenter/services/serviceChat/mainServiceChat.py index 7852360c..2ca61d7e 100644 --- a/modules/serviceCenter/services/serviceChat/mainServiceChat.py +++ b/modules/serviceCenter/services/serviceChat/mainServiceChat.py @@ -532,8 +532,16 @@ class ChatService: self, connectionId: str, sourceType: str, path: str, label: str, featureInstanceId: str = None, displayPath: str = None, ) -> Dict[str, Any]: - """Create a new external data source reference.""" + """Create a new external data source reference. + + Returns existing record if connectionId + path already exists (upsert semantics). + """ from modules.datamodels.datamodelDataSource import DataSource + existing = self.interfaceDbApp.db.getRecordset( + DataSource, recordFilter={"connectionId": connectionId, "path": path} + ) + if existing: + return existing[0] if isinstance(existing[0], dict) else existing[0].model_dump() ds = DataSource( connectionId=connectionId, sourceType=sourceType, diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py index 0e2d251f..c86aed86 100644 --- a/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorIngestConsumer.py @@ -132,10 +132,10 @@ _SOURCE_TYPE_MAP = { "gmail": ("gmailFolder",), }, "clickup": { - "clickup": ("clickupList",), + "clickup": ("clickupList", "clickup"), }, "infomaniak": { - "kdrive": ("kdriveFolder",), + "kdrive": ("kdriveFolder", "infomaniak"), }, } @@ -225,7 +225,7 @@ async def _bootstrapJobHandler( bootstrapOutlook, ) - progressCb(10, "sharepoint + outlook") + progressCb(0, "Synchronisierung läuft...") spDs = _filterDs("sharepoint") olDs = _filterDs("outlook") async def _noopResult(): @@ -251,7 +251,7 @@ async def _bootstrapJobHandler( bootstrapGmail, ) - progressCb(10, "drive + gmail") + progressCb(0, "Synchronisierung läuft...") gdDs = _filterDs("drive") gmDs = _filterDs("gmail") async def _noopResult(): @@ -274,7 +274,7 @@ async def _bootstrapJobHandler( bootstrapClickup, ) - progressCb(10, "clickup tasks") + progressCb(0, "Synchronisierung läuft...") cuDs = _filterDs("clickup") cuResult = await bootstrapClickup(connectionId=connectionId, progressCb=progressCb, dataSources=cuDs) if cuDs else {"skipped": True, "reason": "no_datasources"} return { @@ -283,6 +283,20 @@ async def _bootstrapJobHandler( "clickup": _normalize(cuResult, "clickup"), } + if authority == "infomaniak": + from modules.serviceCenter.services.serviceKnowledge.subConnectorSyncKdrive import ( + bootstrapKdrive, + ) + + progressCb(0, "Synchronisierung läuft...") + kdDs = _filterDs("kdrive") + kdResult = await bootstrapKdrive(connectionId=connectionId, progressCb=progressCb, dataSources=kdDs) if kdDs else {"skipped": True, "reason": "no_datasources"} + return { + "connectionId": connectionId, + "authority": authority, + "kdrive": _normalize(kdResult, "kdrive"), + } + logger.info( "ingestion.connection.bootstrap.skipped reason=unsupported_authority authority=%s connectionId=%s", authority, connectionId, diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py index 7acbaa19..8bfa2628 100644 --- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncClickup.py @@ -25,6 +25,12 @@ from dataclasses import dataclass, field from datetime import datetime, timedelta, timezone from typing import Any, Dict, List, Optional +from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import ( + WalkerTimeout, + ingestWithTimeout, + logItemStart, +) + logger = logging.getLogger(__name__) MAX_TASKS_DEFAULT = 500 @@ -449,36 +455,44 @@ async def _ingestTask( name = task.get("name") or f"Task {taskId}" syntheticId = _syntheticTaskId(connectionId, taskId) fileName = f"{name[:80].strip() or taskId}.task.json" + logItemStart("clickup", f"{teamId}/{taskId}") contentObjects = _buildContentObjects(task, limits) try: - handle = await knowledgeService.requestIngestion( - IngestionJob( - sourceKind="clickup_task", - sourceId=syntheticId, - fileName=fileName, - mimeType="application/vnd.clickup.task+json", - userId=userId, - mandateId=mandateId, - contentObjects=contentObjects, - contentVersion=revision or None, - neutralize=limits.neutralize, - provenance={ - "connectionId": connectionId, - "dataSourceId": dataSourceId, - "authority": "clickup", - "service": "clickup", - "externalItemId": taskId, - "teamId": teamId, - "listId": ((task.get("list") or {}).get("id")), - "spaceId": ((task.get("space") or {}).get("id")), - "url": task.get("url"), - "status": ((task.get("status") or {}).get("status")), - "tier": limits.clickupScope, - }, - ) + handle = await ingestWithTimeout( + knowledgeService.requestIngestion( + IngestionJob( + sourceKind="clickup_task", + sourceId=syntheticId, + fileName=fileName, + mimeType="application/vnd.clickup.task+json", + userId=userId, + mandateId=mandateId, + contentObjects=contentObjects, + contentVersion=revision or None, + neutralize=limits.neutralize, + provenance={ + "connectionId": connectionId, + "dataSourceId": dataSourceId, + "authority": "clickup", + "service": "clickup", + "externalItemId": taskId, + "teamId": teamId, + "listId": ((task.get("list") or {}).get("id")), + "spaceId": ((task.get("space") or {}).get("id")), + "url": task.get("url"), + "status": ((task.get("status") or {}).get("status")), + "tier": limits.clickupScope, + }, + ) + ), + label=taskId, ) + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + return except Exception as exc: logger.error("clickup ingestion %s failed: %s", taskId, exc, exc_info=True) result.failed += 1 @@ -493,18 +507,16 @@ async def _ingestTask( result.failed += 1 processed = result.indexed + result.skippedDuplicate - if progressCb is not None and processed % 50 == 0: + if progressCb is not None and processed % 5 == 0: if hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): return try: - progressCb( - min(90, 10 + int(80 * processed / max(1, limits.maxTasks))), - f"clickup processed={processed}", - ) + progressCb(0, f"{processed} Tasks verarbeitet, {result.indexed} indexiert") except Exception: pass - logger.info( - "ingestion.connection.bootstrap.progress part=clickup processed=%d skippedDup=%d failed=%d", + if processed % 50 == 0: + logger.info( + "ingestion.connection.bootstrap.progress part=clickup processed=%d skippedDup=%d failed=%d", processed, result.skippedDuplicate, result.failed, extra={ "event": "ingestion.connection.bootstrap.progress", diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py index 398b9af9..5dd1bd8b 100644 --- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGdrive.py @@ -21,6 +21,13 @@ from datetime import datetime, timedelta, timezone from typing import Any, Callable, Dict, List, Optional from modules.datamodels.datamodelExtraction import ExtractionOptions +from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import ( + WalkerTimeout, + downloadWithTimeout, + extractWithTimeout, + ingestWithTimeout, + logItemStart, +) logger = logging.getLogger(__name__) @@ -342,9 +349,15 @@ async def _ingestOne( syntheticFileId = _syntheticFileId(connectionId, externalItemId) fileName = getattr(entry, "name", "") or externalItemId + declaredSize = int(getattr(entry, "size", 0) or 0) or None + logItemStart("gdrive", entryPath, sizeBytes=declaredSize, mime=mimeType) try: - downloaded = await adapter.download(entryPath) + downloaded = await downloadWithTimeout(adapter.download(entryPath), label=entryPath) + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + return except Exception as exc: logger.warning("gdrive download %s failed: %s", entryPath, exc) result.failed += 1 @@ -368,10 +381,16 @@ async def _ingestOne( result.bytesProcessed += len(fileBytes) try: - extracted = runExtractionFn( + extracted = await extractWithTimeout( + runExtractionFn, fileBytes, fileName, mimeType, ExtractionOptions(mergeStrategy=None), + label=entryPath, ) + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + return except Exception as exc: logger.warning("gdrive extraction %s failed: %s", entryPath, exc) result.failed += 1 @@ -393,20 +412,27 @@ async def _ingestOne( "tier": "body", } try: - handle = await knowledgeService.requestIngestion( - IngestionJob( - sourceKind="gdrive_item", - sourceId=syntheticFileId, - fileName=fileName, - mimeType=mimeType, - userId=userId, - mandateId=mandateId, - contentObjects=contentObjects, - contentVersion=revision, - neutralize=limits.neutralize, - provenance=provenance, - ) + handle = await ingestWithTimeout( + knowledgeService.requestIngestion( + IngestionJob( + sourceKind="gdrive_item", + sourceId=syntheticFileId, + fileName=fileName, + mimeType=mimeType, + userId=userId, + mandateId=mandateId, + contentObjects=contentObjects, + contentVersion=revision, + neutralize=limits.neutralize, + provenance=provenance, + ) + ), + label=entryPath, ) + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + return except Exception as exc: logger.error("gdrive ingestion %s failed: %s", entryPath, exc, exc_info=True) result.failed += 1 @@ -422,13 +448,10 @@ async def _ingestOne( if handle.error: result.errors.append(f"ingest({entryPath}): {handle.error}") - if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0: - processed = result.indexed + result.skippedDuplicate + processed = result.indexed + result.skippedDuplicate + if progressCb is not None and processed % 5 == 0: try: - progressCb( - min(90, 10 + int(80 * processed / max(1, limits.maxItems))), - f"gdrive processed={processed}", - ) + progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert") except Exception: pass logger.info( diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py index f5c345c6..3130e942 100644 --- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncGmail.py @@ -24,6 +24,11 @@ from datetime import datetime, timedelta, timezone from typing import Any, Callable, Dict, List, Optional from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody +from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import ( + WalkerTimeout, + ingestWithTimeout, + logItemStart, +) logger = logging.getLogger(__name__) @@ -399,34 +404,42 @@ async def _ingestMessage( subject = headers.get("subject") or "(no subject)" syntheticId = _syntheticMessageId(connectionId, messageId) fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml" + logItemStart("gmail", f"{labelId}/{messageId}", mime="message/rfc822") contentObjects = _buildContentObjects( message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth ) try: - handle = await knowledgeService.requestIngestion( - IngestionJob( - sourceKind="gmail_message", - sourceId=syntheticId, - fileName=fileName, - mimeType="message/rfc822", - userId=userId, - mandateId=mandateId, - contentObjects=contentObjects, - contentVersion=str(revision) if revision else None, - neutralize=limits.neutralize, - provenance={ - "connectionId": connectionId, - "dataSourceId": dataSourceId, - "authority": "google", - "service": "gmail", - "externalItemId": messageId, - "label": labelId, - "threadId": message.get("threadId"), - "tier": limits.mailContentDepth, - }, - ) + handle = await ingestWithTimeout( + knowledgeService.requestIngestion( + IngestionJob( + sourceKind="gmail_message", + sourceId=syntheticId, + fileName=fileName, + mimeType="message/rfc822", + userId=userId, + mandateId=mandateId, + contentObjects=contentObjects, + contentVersion=str(revision) if revision else None, + neutralize=limits.neutralize, + provenance={ + "connectionId": connectionId, + "dataSourceId": dataSourceId, + "authority": "google", + "service": "gmail", + "externalItemId": messageId, + "label": labelId, + "threadId": message.get("threadId"), + "tier": limits.mailContentDepth, + }, + ) + ), + label=messageId, ) + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + return except Exception as exc: logger.error("gmail ingestion %s failed: %s", messageId, exc, exc_info=True) result.failed += 1 @@ -458,18 +471,16 @@ async def _ingestMessage( logger.warning("gmail attachments %s failed: %s", messageId, exc) result.errors.append(f"attachments({messageId}): {exc}") - if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0: - processed = result.indexed + result.skippedDuplicate + processed = result.indexed + result.skippedDuplicate + if progressCb is not None and processed % 5 == 0: try: - progressCb( - min(90, 10 + int(80 * processed / max(1, limits.maxMessages))), - f"gmail processed={processed}", - ) + progressCb(0, f"{processed} Mails verarbeitet, {result.indexed} indexiert") except Exception: pass - logger.info( - "ingestion.connection.bootstrap.progress part=gmail processed=%d skippedDup=%d failed=%d", - processed, result.skippedDuplicate, result.failed, + if processed % 50 == 0: + logger.info( + "ingestion.connection.bootstrap.progress part=gmail processed=%d skippedDup=%d failed=%d", + processed, result.skippedDuplicate, result.failed, extra={ "event": "ingestion.connection.bootstrap.progress", "part": "gmail", @@ -546,13 +557,26 @@ async def _ingestAttachments( fileName = stub["filename"] mimeType = stub["mimeType"] syntheticId = _syntheticAttachmentId(connectionId, messageId, stub["attachmentId"]) + attLabel = f"{messageId}/att:{stub['attachmentId']}/{fileName}" + logItemStart("gmail-attachment", attLabel, sizeBytes=stub.get("size") or None, mime=mimeType) - try: - extracted = runExtraction( + from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import ( + extractWithTimeout as _extractWithTimeout, + ) + + def _runAttExtraction(): + return runExtraction( extractorRegistry, chunkerRegistry, rawBytes, fileName, mimeType, ExtractionOptions(mergeStrategy=None), ) + + try: + extracted = await _extractWithTimeout(_runAttExtraction, label=attLabel) + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + continue except Exception as exc: logger.warning("gmail attachment extract %s failed: %s", stub["attachmentId"], exc) result.failed += 1 @@ -584,27 +608,33 @@ async def _ingestAttachments( continue try: - await knowledgeService.requestIngestion( - IngestionJob( - sourceKind="gmail_attachment", - sourceId=syntheticId, - fileName=fileName, - mimeType=mimeType, - userId=userId, - mandateId=mandateId, - contentObjects=contentObjects, - provenance={ - "connectionId": connectionId, - "dataSourceId": dataSourceId, - "authority": "google", - "service": "gmail", - "parentId": parentSyntheticId, - "externalItemId": stub["attachmentId"], - "parentMessageId": messageId, - }, - ) + await ingestWithTimeout( + knowledgeService.requestIngestion( + IngestionJob( + sourceKind="gmail_attachment", + sourceId=syntheticId, + fileName=fileName, + mimeType=mimeType, + userId=userId, + mandateId=mandateId, + contentObjects=contentObjects, + provenance={ + "connectionId": connectionId, + "dataSourceId": dataSourceId, + "authority": "google", + "service": "gmail", + "parentId": parentSyntheticId, + "externalItemId": stub["attachmentId"], + "parentMessageId": messageId, + }, + ) + ), + label=attLabel, ) result.attachmentsIndexed += 1 + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) except Exception as exc: logger.warning("gmail attachment ingest %s failed: %s", stub["attachmentId"], exc) result.failed += 1 diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncKdrive.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncKdrive.py new file mode 100644 index 00000000..e656abe8 --- /dev/null +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncKdrive.py @@ -0,0 +1,439 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +"""kDrive bootstrap for the unified knowledge ingestion lane. + +Walks every ragIndexEnabled kDrive DataSource, downloads file items and +hands them to KnowledgeService.requestIngestion. Idempotency is provided +by the ingestion facade (content-hash dedup). +""" + +from __future__ import annotations + +import asyncio +import hashlib +import logging +import time +from dataclasses import dataclass, field +from typing import Any, Callable, Dict, List, Optional + +from modules.datamodels.datamodelExtraction import ExtractionOptions +from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import ( + WalkerTimeout, + downloadWithTimeout, + extractWithTimeout, + ingestWithTimeout, + logItemStart, +) + +logger = logging.getLogger(__name__) + +MAX_ITEMS_DEFAULT = 500 +MAX_BYTES_DEFAULT = 200 * 1024 * 1024 +MAX_FILE_SIZE_DEFAULT = 25 * 1024 * 1024 +SKIP_MIME_PREFIXES_DEFAULT = ("video/", "audio/") +MAX_DEPTH_DEFAULT = 4 + + +@dataclass +class KdriveBootstrapLimits: + maxItems: int = MAX_ITEMS_DEFAULT + maxBytes: int = MAX_BYTES_DEFAULT + maxFileSize: int = MAX_FILE_SIZE_DEFAULT + skipMimePrefixes: tuple = SKIP_MIME_PREFIXES_DEFAULT + maxDepth: int = MAX_DEPTH_DEFAULT + neutralize: bool = False + + +@dataclass +class KdriveBootstrapResult: + connectionId: str + indexed: int = 0 + skippedDuplicate: int = 0 + skippedPolicy: int = 0 + failed: int = 0 + bytesProcessed: int = 0 + errors: List[str] = field(default_factory=list) + + +def _syntheticFileId(connectionId: str, externalItemId: str) -> str: + token = hashlib.sha256(f"{connectionId}:{externalItemId}".encode("utf-8")).hexdigest()[:16] + return f"kd:{connectionId[:8]}:{token}" + + +def _toContentObjects(extracted, fileName: str) -> List[Dict[str, Any]]: + parts = getattr(extracted, "parts", None) or [] + out: List[Dict[str, Any]] = [] + for part in parts: + data = getattr(part, "data", None) or "" + if not data or not str(data).strip(): + continue + typeGroup = getattr(part, "typeGroup", "text") or "text" + contentType = "text" + if typeGroup == "image": + contentType = "image" + elif typeGroup in ("binary", "container"): + contentType = "other" + out.append({ + "contentObjectId": getattr(part, "id", ""), + "contentType": contentType, + "data": data, + "contextRef": { + "containerPath": fileName, + "location": getattr(part, "label", None) or "file", + **(getattr(part, "metadata", None) or {}), + }, + }) + return out + + +async def bootstrapKdrive( + connectionId: str, + *, + dataSources: Optional[List[Dict[str, Any]]] = None, + progressCb: Optional[Any] = None, + adapter: Any = None, + connection: Any = None, + knowledgeService: Any = None, + limits: Optional[KdriveBootstrapLimits] = None, + runExtractionFn: Optional[Callable[..., Any]] = None, +) -> Dict[str, Any]: + """Enumerate kDrive folders and ingest files via the facade.""" + if not dataSources: + return {"connectionId": connectionId, "skipped": True, "reason": "no_datasources"} + + if not limits: + limits = KdriveBootstrapLimits() + + startMs = time.time() + result = KdriveBootstrapResult(connectionId=connectionId) + + logger.info( + "ingestion.connection.bootstrap.started part=kdrive connectionId=%s dataSources=%d", + connectionId, len(dataSources), + extra={"event": "ingestion.connection.bootstrap.started", "part": "kdrive", + "connectionId": connectionId, "dataSourceCount": len(dataSources)}, + ) + + if adapter is None or knowledgeService is None or connection is None: + adapter, connection, knowledgeService = await _resolveDependencies(connectionId) + if runExtractionFn is None: + from modules.serviceCenter.services.serviceExtraction.subPipeline import runExtraction + from modules.serviceCenter.services.serviceExtraction.subRegistry import ( + ExtractorRegistry, ChunkerRegistry, + ) + extractorRegistry = ExtractorRegistry() + chunkerRegistry = ChunkerRegistry() + + def runExtractionFn(bytesData, name, mime, options): + return runExtraction(extractorRegistry, chunkerRegistry, bytesData, name, mime, options) + + mandateId = str(getattr(connection, "mandateId", "") or "") if connection is not None else "" + userId = str(getattr(connection, "userId", "") or "") if connection is not None else "" + + cancelled = False + for ds in dataSources: + if result.indexed + result.skippedDuplicate >= limits.maxItems: + break + if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + cancelled = True + break + + dsPath = ds.get("path", "") + dsId = ds.get("id", "") + dsNeutralize = ds.get("neutralize", False) + dsLimits = KdriveBootstrapLimits( + maxItems=limits.maxItems, + maxBytes=limits.maxBytes, + maxFileSize=limits.maxFileSize, + skipMimePrefixes=limits.skipMimePrefixes, + maxDepth=limits.maxDepth, + neutralize=dsNeutralize, + ) + + try: + await _walkFolder( + adapter=adapter, + knowledgeService=knowledgeService, + runExtractionFn=runExtractionFn, + connectionId=connectionId, + mandateId=mandateId, + userId=userId, + folderPath=dsPath, + depth=0, + limits=dsLimits, + result=result, + progressCb=progressCb, + dataSourceId=dsId, + ) + except Exception as exc: + logger.error("kdrive walk failed for ds %s path %s: %s", dsId, dsPath, exc, exc_info=True) + result.errors.append(f"walk({dsPath}): {exc}") + + finalResult = _finalizeResult(connectionId, result, startMs) + if cancelled: + finalResult["cancelled"] = True + return finalResult + + +async def _resolveDependencies(connectionId: str): + from modules.interfaces.interfaceDbApp import getRootInterface + from modules.auth import TokenManager + from modules.connectors.providerInfomaniak.connectorInfomaniak import InfomaniakConnector + from modules.serviceCenter import getService + from modules.serviceCenter.context import ServiceCenterContext + from modules.security.rootAccess import getRootUser + + rootInterface = getRootInterface() + connection = rootInterface.getUserConnectionById(connectionId) + if connection is None: + raise ValueError(f"UserConnection not found: {connectionId}") + + token = TokenManager().getFreshToken(connectionId) + if not token or not token.tokenAccess: + raise ValueError(f"No valid token for connection {connectionId}") + + provider = InfomaniakConnector(connection, token.tokenAccess) + adapter = provider.getServiceAdapter("kdrive") + + rootUser = getRootUser() + ctx = ServiceCenterContext( + user=rootUser, + mandate_id=str(getattr(connection, "mandateId", "") or ""), + ) + knowledgeService = getService("knowledge", ctx) + return adapter, connection, knowledgeService + + +async def _walkFolder( + *, + adapter, + knowledgeService, + runExtractionFn, + connectionId: str, + mandateId: str, + userId: str, + folderPath: str, + depth: int, + limits: KdriveBootstrapLimits, + result: KdriveBootstrapResult, + progressCb: Optional[Any], + dataSourceId: str = "", +) -> None: + if depth > limits.maxDepth: + return + if progressCb and hasattr(progressCb, "isCancelled") and progressCb.isCancelled(): + return + try: + entries = await adapter.browse(folderPath) + except Exception as exc: + logger.warning("kdrive browse %s failed: %s", folderPath, exc) + result.errors.append(f"browse({folderPath}): {exc}") + return + + for entry in entries: + if result.indexed + result.skippedDuplicate >= limits.maxItems: + return + if result.bytesProcessed >= limits.maxBytes: + return + if progressCb and hasattr(progressCb, "isCancelled") and (result.indexed + result.skippedDuplicate) % 50 == 0 and progressCb.isCancelled(): + return + + entryPath = getattr(entry, "path", "") or "" + if getattr(entry, "isFolder", False): + await _walkFolder( + adapter=adapter, + knowledgeService=knowledgeService, + runExtractionFn=runExtractionFn, + connectionId=connectionId, + mandateId=mandateId, + userId=userId, + folderPath=entryPath, + depth=depth + 1, + limits=limits, + result=result, + progressCb=progressCb, + dataSourceId=dataSourceId, + ) + continue + + mimeType = getattr(entry, "mimeType", None) or "application/octet-stream" + if any(mimeType.startswith(prefix) for prefix in limits.skipMimePrefixes): + result.skippedPolicy += 1 + continue + size = int(getattr(entry, "size", 0) or 0) + if size and size > limits.maxFileSize: + result.skippedPolicy += 1 + continue + + metadata = getattr(entry, "metadata", {}) or {} + externalItemId = metadata.get("id") or entryPath + revision = metadata.get("revision") or metadata.get("lastModified") + + await _ingestOne( + adapter=adapter, + knowledgeService=knowledgeService, + runExtractionFn=runExtractionFn, + connectionId=connectionId, + mandateId=mandateId, + userId=userId, + entry=entry, + entryPath=entryPath, + mimeType=mimeType, + externalItemId=externalItemId, + revision=revision, + limits=limits, + result=result, + progressCb=progressCb, + dataSourceId=dataSourceId, + ) + + +async def _ingestOne( + *, + adapter, + knowledgeService, + runExtractionFn, + connectionId: str, + mandateId: str, + userId: str, + entry, + entryPath: str, + mimeType: str, + externalItemId: str, + revision: Optional[str], + limits: KdriveBootstrapLimits, + result: KdriveBootstrapResult, + progressCb: Optional[Any], + dataSourceId: str = "", +) -> None: + from modules.serviceCenter.services.serviceKnowledge.mainServiceKnowledge import IngestionJob + + syntheticFileId = _syntheticFileId(connectionId, externalItemId) + fileName = getattr(entry, "name", "") or externalItemId + declaredSize = int(getattr(entry, "size", 0) or 0) or None + logItemStart("kdrive", entryPath, sizeBytes=declaredSize, mime=mimeType) + + try: + downloadResult = await downloadWithTimeout(adapter.download(entryPath), label=entryPath) + fileBytes = getattr(downloadResult, "data", None) + dlFileName = getattr(downloadResult, "fileName", None) + dlMimeType = getattr(downloadResult, "mimeType", None) + if dlFileName: + fileName = dlFileName + if dlMimeType: + mimeType = dlMimeType + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + return + except Exception as exc: + logger.warning("kdrive download %s failed: %s", entryPath, exc) + result.failed += 1 + result.errors.append(f"download({entryPath}): {exc}") + return + if not fileBytes: + result.failed += 1 + return + + result.bytesProcessed += len(fileBytes) + + try: + extracted = await extractWithTimeout( + runExtractionFn, + fileBytes, fileName, mimeType, + ExtractionOptions(mergeStrategy=None), + label=entryPath, + ) + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + return + except Exception as exc: + logger.warning("kdrive extraction %s failed: %s", entryPath, exc) + result.failed += 1 + result.errors.append(f"extract({entryPath}): {exc}") + return + + contentObjects = _toContentObjects(extracted, fileName) + if not contentObjects: + result.skippedPolicy += 1 + return + + provenance: Dict[str, Any] = { + "connectionId": connectionId, + "dataSourceId": dataSourceId, + "authority": "infomaniak", + "service": "kdrive", + "externalItemId": externalItemId, + "externalPath": entryPath, + "revision": revision, + } + try: + handle = await ingestWithTimeout( + knowledgeService.requestIngestion( + IngestionJob( + sourceKind="kdrive_item", + sourceId=syntheticFileId, + fileName=fileName, + mimeType=mimeType, + userId=userId, + mandateId=mandateId, + contentObjects=contentObjects, + contentVersion=revision, + neutralize=limits.neutralize, + provenance=provenance, + ) + ), + label=entryPath, + ) + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + return + except Exception as exc: + logger.error("kdrive ingestion %s failed: %s", entryPath, exc, exc_info=True) + result.failed += 1 + result.errors.append(f"ingest({entryPath}): {exc}") + return + + if handle.status == "duplicate": + result.skippedDuplicate += 1 + elif handle.status == "indexed": + result.indexed += 1 + else: + result.failed += 1 + if handle.error: + result.errors.append(f"ingest({entryPath}): {handle.error}") + + processed = result.indexed + result.skippedDuplicate + if progressCb is not None and processed % 5 == 0: + try: + progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert") + except Exception: + pass + + await asyncio.sleep(0) + + +def _finalizeResult(connectionId: str, result: KdriveBootstrapResult, startMs: float) -> Dict[str, Any]: + durationMs = int((time.time() - startMs) * 1000) + logger.info( + "ingestion.connection.bootstrap.done part=kdrive connectionId=%s indexed=%d skippedDup=%d skippedPolicy=%d failed=%d durationMs=%d", + connectionId, + result.indexed, result.skippedDuplicate, result.skippedPolicy, result.failed, + durationMs, + extra={"event": "ingestion.connection.bootstrap.done", "part": "kdrive", + "connectionId": connectionId, "indexed": result.indexed, + "skippedDup": result.skippedDuplicate, "skippedPolicy": result.skippedPolicy, + "failed": result.failed, "durationMs": durationMs}, + ) + return { + "connectionId": result.connectionId, + "indexed": result.indexed, + "skippedDuplicate": result.skippedDuplicate, + "skippedPolicy": result.skippedPolicy, + "failed": result.failed, + "bytesProcessed": result.bytesProcessed, + "durationMs": durationMs, + "errors": result.errors[:20], + } diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py index 3f4a8afb..17220d97 100644 --- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncOutlook.py @@ -21,6 +21,12 @@ from dataclasses import dataclass, field from typing import Any, Dict, List, Optional from modules.serviceCenter.services.serviceKnowledge.subTextClean import cleanEmailBody +from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import ( + WalkerTimeout, + extractWithTimeout, + ingestWithTimeout, + logItemStart, +) logger = logging.getLogger(__name__) @@ -384,34 +390,42 @@ async def _ingestMessage( subject = message.get("subject") or "(no subject)" syntheticId = _syntheticMessageId(connectionId, messageId) fileName = f"{subject[:80].strip()}.eml" if subject else f"{messageId}.eml" + logItemStart("outlook", messageId, mime="message/rfc822") contentObjects = _buildContentObjects( message, limits.maxBodyChars, mailContentDepth=limits.mailContentDepth ) # Always at least the header is emitted, so `contentObjects` is non-empty. try: - handle = await knowledgeService.requestIngestion( - IngestionJob( - sourceKind="outlook_message", - sourceId=syntheticId, - fileName=fileName, - mimeType="message/rfc822", - userId=userId, - mandateId=mandateId, - contentObjects=contentObjects, - contentVersion=revision, - neutralize=limits.neutralize, - provenance={ - "connectionId": connectionId, - "dataSourceId": dataSourceId, - "authority": "msft", - "service": "outlook", - "externalItemId": messageId, - "internetMessageId": message.get("internetMessageId"), - "tier": limits.mailContentDepth, - }, - ) + handle = await ingestWithTimeout( + knowledgeService.requestIngestion( + IngestionJob( + sourceKind="outlook_message", + sourceId=syntheticId, + fileName=fileName, + mimeType="message/rfc822", + userId=userId, + mandateId=mandateId, + contentObjects=contentObjects, + contentVersion=revision, + neutralize=limits.neutralize, + provenance={ + "connectionId": connectionId, + "dataSourceId": dataSourceId, + "authority": "msft", + "service": "outlook", + "externalItemId": messageId, + "internetMessageId": message.get("internetMessageId"), + "tier": limits.mailContentDepth, + }, + ) + ), + label=messageId, ) + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + return except Exception as exc: logger.error("outlook ingestion %s failed: %s", messageId, exc, exc_info=True) result.failed += 1 @@ -443,18 +457,16 @@ async def _ingestMessage( logger.warning("outlook attachments %s failed: %s", messageId, exc) result.errors.append(f"attachments({messageId}): {exc}") - if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0: - processed = result.indexed + result.skippedDuplicate + processed = result.indexed + result.skippedDuplicate + if progressCb is not None and processed % 5 == 0: try: - progressCb( - min(90, 10 + int(80 * processed / max(1, limits.maxMessages))), - f"outlook processed={processed}", - ) + progressCb(0, f"{processed} Mails verarbeitet, {result.indexed} indexiert") except Exception: pass - logger.info( - "ingestion.connection.bootstrap.progress part=outlook processed=%d skippedDup=%d failed=%d", - processed, result.skippedDuplicate, result.failed, + if processed % 50 == 0: + logger.info( + "ingestion.connection.bootstrap.progress part=outlook processed=%d skippedDup=%d failed=%d", + processed, result.skippedDuplicate, result.failed, extra={ "event": "ingestion.connection.bootstrap.progress", "part": "outlook", @@ -518,13 +530,22 @@ async def _ingestAttachments( mimeType = attachment.get("contentType") or "application/octet-stream" attachmentId = attachment.get("id") or fileName syntheticId = _syntheticAttachmentId(connectionId, messageId, attachmentId) + attLabel = f"{messageId}/att:{attachmentId}/{fileName}" + logItemStart("outlook-attachment", attLabel, sizeBytes=size or None, mime=mimeType) - try: - extracted = runExtraction( + def _runAttExtraction(): + return runExtraction( extractorRegistry, chunkerRegistry, rawBytes, fileName, mimeType, ExtractionOptions(mergeStrategy=None), ) + + try: + extracted = await extractWithTimeout(_runAttExtraction, label=attLabel) + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + continue except Exception as exc: logger.warning("outlook attachment extract %s failed: %s", attachmentId, exc) result.failed += 1 @@ -556,28 +577,34 @@ async def _ingestAttachments( continue try: - await knowledgeService.requestIngestion( - IngestionJob( - sourceKind="outlook_attachment", - sourceId=syntheticId, - fileName=fileName, - mimeType=mimeType, - userId=userId, - mandateId=mandateId, - contentObjects=contentObjects, - neutralize=limits.neutralize, - provenance={ - "connectionId": connectionId, - "dataSourceId": dataSourceId, - "authority": "msft", - "service": "outlook", - "parentId": parentSyntheticId, - "externalItemId": attachmentId, - "parentMessageId": messageId, - }, - ) + await ingestWithTimeout( + knowledgeService.requestIngestion( + IngestionJob( + sourceKind="outlook_attachment", + sourceId=syntheticId, + fileName=fileName, + mimeType=mimeType, + userId=userId, + mandateId=mandateId, + contentObjects=contentObjects, + neutralize=limits.neutralize, + provenance={ + "connectionId": connectionId, + "dataSourceId": dataSourceId, + "authority": "msft", + "service": "outlook", + "parentId": parentSyntheticId, + "externalItemId": attachmentId, + "parentMessageId": messageId, + }, + ) + ), + label=attLabel, ) result.attachmentsIndexed += 1 + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) except Exception as exc: logger.warning("outlook attachment ingest %s failed: %s", attachmentId, exc) result.failed += 1 diff --git a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py index f664f1a8..892e41ba 100644 --- a/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py +++ b/modules/serviceCenter/services/serviceKnowledge/subConnectorSyncSharepoint.py @@ -20,6 +20,13 @@ from dataclasses import dataclass, field from typing import Any, Callable, Dict, List, Optional from modules.datamodels.datamodelExtraction import ExtractionOptions +from modules.serviceCenter.services.serviceKnowledge.subWalkerHelpers import ( + WalkerTimeout, + downloadWithTimeout, + extractWithTimeout, + ingestWithTimeout, + logItemStart, +) logger = logging.getLogger(__name__) @@ -330,9 +337,15 @@ async def _ingestOne( syntheticFileId = _syntheticFileId(connectionId, externalItemId) fileName = getattr(entry, "name", "") or externalItemId + declaredSize = int(getattr(entry, "size", 0) or 0) or None + logItemStart("sharepoint", entryPath, sizeBytes=declaredSize, mime=mimeType) try: - fileBytes = await adapter.download(entryPath) + fileBytes = await downloadWithTimeout(adapter.download(entryPath), label=entryPath) + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + return except Exception as exc: logger.warning("sharepoint download %s failed: %s", entryPath, exc) result.failed += 1 @@ -345,10 +358,16 @@ async def _ingestOne( result.bytesProcessed += len(fileBytes) try: - extracted = runExtractionFn( + extracted = await extractWithTimeout( + runExtractionFn, fileBytes, fileName, mimeType, ExtractionOptions(mergeStrategy=None), + label=entryPath, ) + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + return except Exception as exc: logger.warning("sharepoint extraction %s failed: %s", entryPath, exc) result.failed += 1 @@ -370,20 +389,27 @@ async def _ingestOne( "revision": revision, } try: - handle = await knowledgeService.requestIngestion( - IngestionJob( - sourceKind="sharepoint_item", - sourceId=syntheticFileId, - fileName=fileName, - mimeType=mimeType, - userId=userId, - mandateId=mandateId, - contentObjects=contentObjects, - contentVersion=revision, - neutralize=limits.neutralize, - provenance=provenance, - ) + handle = await ingestWithTimeout( + knowledgeService.requestIngestion( + IngestionJob( + sourceKind="sharepoint_item", + sourceId=syntheticFileId, + fileName=fileName, + mimeType=mimeType, + userId=userId, + mandateId=mandateId, + contentObjects=contentObjects, + contentVersion=revision, + neutralize=limits.neutralize, + provenance=provenance, + ) + ), + label=entryPath, ) + except WalkerTimeout as exc: + result.failed += 1 + result.errors.append(str(exc)) + return except Exception as exc: logger.error("sharepoint ingestion %s failed: %s", entryPath, exc, exc_info=True) result.failed += 1 @@ -399,27 +425,17 @@ async def _ingestOne( if handle.error: result.errors.append(f"ingest({entryPath}): {handle.error}") - if progressCb is not None and (result.indexed + result.skippedDuplicate) % 50 == 0: - processed = result.indexed + result.skippedDuplicate + processed = result.indexed + result.skippedDuplicate + if progressCb is not None and processed % 5 == 0: try: - progressCb( - min(90, 10 + int(80 * processed / max(1, limits.maxItems))), - f"sharepoint processed={processed}", - ) + progressCb(0, f"{processed} Dateien verarbeitet, {result.indexed} indexiert") except Exception: pass - logger.info( - "ingestion.connection.bootstrap.progress part=sharepoint processed=%d skippedDup=%d failed=%d", - processed, result.skippedDuplicate, result.failed, - extra={ - "event": "ingestion.connection.bootstrap.progress", - "part": "sharepoint", - "connectionId": connectionId, - "processed": processed, - "skippedDup": result.skippedDuplicate, - "failed": result.failed, - }, - ) + if processed % 50 == 0: + logger.info( + "ingestion.connection.bootstrap.progress part=sharepoint processed=%d indexed=%d failed=%d", + processed, result.indexed, result.failed, + ) # Yield so the event loop can interleave other tasks (download/extract are # CPU-ish and extraction uses sync libs; cooperative scheduling prevents diff --git a/modules/serviceCenter/services/serviceKnowledge/subWalkerHelpers.py b/modules/serviceCenter/services/serviceKnowledge/subWalkerHelpers.py new file mode 100644 index 00000000..8e65fd0f --- /dev/null +++ b/modules/serviceCenter/services/serviceKnowledge/subWalkerHelpers.py @@ -0,0 +1,116 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +"""Shared helpers for ingestion walkers (timeouts, per-item logging). + +Walkers (sharepoint, gdrive, gmail, outlook, clickup, kdrive) all face the +same risks: + +- A single `adapter.download()` call can hang on the network for hours. +- A single `runExtraction()` call can hang on a corrupt PDF/Office doc inside + a sync extractor library, blocking the asyncio loop. +- A single `requestIngestion()` call can stall on the embedding API. + +Without timeouts, one bad item freezes the whole bootstrap job and we end +up with "Job stuck at 10% for 10h" zombies. + +These helpers wrap each phase in `asyncio.wait_for`. Sync extraction runs +on a worker thread so the loop stays responsive. Every wrapped call also +emits a short start/done log line, so when something hangs we know the +exact item that caused it (path, size, mime). +""" + +from __future__ import annotations + +import asyncio +import logging +from typing import Any, Awaitable, Callable, Optional + +logger = logging.getLogger(__name__) + +DOWNLOAD_TIMEOUT_S = 60 +EXTRACTION_TIMEOUT_S = 90 +INGEST_TIMEOUT_S = 60 + + +class WalkerTimeout(Exception): + """Raised when a walker phase exceeds its timeout budget.""" + + +async def downloadWithTimeout( + awaitable: Awaitable[Any], + *, + label: str, + timeoutSeconds: int = DOWNLOAD_TIMEOUT_S, +) -> Any: + """Run a download awaitable with a hard timeout. + + `label` is a short human-readable identifier (typically the external path) + used in log messages so we can pinpoint the offending item in case of a + hang or timeout. + """ + logger.info("walker.download.start %s timeout=%ds", label, timeoutSeconds) + try: + result = await asyncio.wait_for(awaitable, timeout=timeoutSeconds) + logger.debug("walker.download.done %s", label) + return result + except asyncio.TimeoutError as ex: + logger.warning("walker.download.timeout %s after %ds", label, timeoutSeconds) + raise WalkerTimeout(f"download timeout after {timeoutSeconds}s: {label}") from ex + + +async def extractWithTimeout( + syncFn: Callable[..., Any], + *args: Any, + label: str, + timeoutSeconds: int = EXTRACTION_TIMEOUT_S, +) -> Any: + """Run a synchronous extraction function on a worker thread with timeout. + + Sync extractors (PDF, OCR, MS Office) cannot be cancelled cleanly from + asyncio; `wait_for` only protects the awaiter. The underlying thread may + keep running until the process exits — but at least the walker proceeds + to the next item instead of freezing forever. + """ + logger.info("walker.extract.start %s timeout=%ds", label, timeoutSeconds) + try: + result = await asyncio.wait_for( + asyncio.to_thread(syncFn, *args), + timeout=timeoutSeconds, + ) + logger.debug("walker.extract.done %s", label) + return result + except asyncio.TimeoutError as ex: + logger.warning("walker.extract.timeout %s after %ds", label, timeoutSeconds) + raise WalkerTimeout(f"extract timeout after {timeoutSeconds}s: {label}") from ex + + +async def ingestWithTimeout( + awaitable: Awaitable[Any], + *, + label: str, + timeoutSeconds: int = INGEST_TIMEOUT_S, +) -> Any: + """Run an ingestion request with a hard timeout.""" + logger.debug("walker.ingest.start %s timeout=%ds", label, timeoutSeconds) + try: + result = await asyncio.wait_for(awaitable, timeout=timeoutSeconds) + logger.debug("walker.ingest.done %s", label) + return result + except asyncio.TimeoutError as ex: + logger.warning("walker.ingest.timeout %s after %ds", label, timeoutSeconds) + raise WalkerTimeout(f"ingest timeout after {timeoutSeconds}s: {label}") from ex + + +def logItemStart(service: str, label: str, *, sizeBytes: Optional[int] = None, mime: Optional[str] = None) -> None: + """Log that processing of one item is about to begin. + + When the worker hangs, the LAST `walker.item.start` line in the log + points to the exact item that caused the freeze. This is the single + most valuable diagnostic for stuck-job triage. + """ + parts = [f"walker.item.start service={service} path={label}"] + if sizeBytes is not None: + parts.append(f"size={sizeBytes}") + if mime: + parts.append(f"mime={mime}") + logger.info(" ".join(parts)) diff --git a/modules/shared/aiAuditLogger.py b/modules/shared/aiAuditLogger.py index 04255ce1..5da105a8 100644 --- a/modules/shared/aiAuditLogger.py +++ b/modules/shared/aiAuditLogger.py @@ -85,6 +85,11 @@ class AiAuditLogger: try: from modules.datamodels.datamodelAiAudit import AiAuditLogEntry + if contentInput: + contentInput = contentInput.replace("\x00", "") + if contentOutput: + contentOutput = contentOutput.replace("\x00", "") + inputPreview = (contentInput or "")[:_PREVIEW_LENGTH] or None outputPreview = (contentOutput or "")[:_PREVIEW_LENGTH] or None inputHash = hashlib.sha256(contentInput.encode("utf-8")).hexdigest() if contentInput else None diff --git a/modules/system/mainSystem.py b/modules/system/mainSystem.py index 21d0cbee..aacc6d3c 100644 --- a/modules/system/mainSystem.py +++ b/modules/system/mainSystem.py @@ -330,6 +330,16 @@ NAVIGATION_SECTIONS = [ "adminOnly": True, "sysAdminOnly": True, }, + { + "id": "admin-stt-benchmark", + "objectKey": "ui.admin.sttBenchmark", + "label": t("STT Benchmark"), + "icon": "FaMicrophone", + "path": "/admin/stt-benchmark", + "order": 92, + "adminOnly": True, + "sysAdminOnly": True, + }, { "id": "admin-languages", "objectKey": "ui.admin.languages", diff --git a/tests/eval/__init__.py b/tests/eval/__init__.py new file mode 100644 index 00000000..fde23b13 --- /dev/null +++ b/tests/eval/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Eval harness for the Feature Data Sub-Agent (Phase 1.5).""" diff --git a/tests/eval/fakeFeatureDataProvider.py b/tests/eval/fakeFeatureDataProvider.py new file mode 100644 index 00000000..55557e7d --- /dev/null +++ b/tests/eval/fakeFeatureDataProvider.py @@ -0,0 +1,246 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""In-memory drop-in for FeatureDataProvider used by the eval harness. + +Implements the same three public methods (browseTable / queryTable / +aggregateTable) plus the small surface the Sub-Agent reads (getActualColumns), +but runs all filters/aggregations in Python over the BenchmarkFixture rows. + +This keeps the eval hermetic: no DB connection, no fixtures to insert/clean, +no flakiness from shared test schemas. Only the LLM call is real. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional + + +_ALLOWED_AGGREGATES = {"SUM", "COUNT", "AVG", "MIN", "MAX"} + + +class FakeFeatureDataProvider: + """In-memory provider compatible with :class:`FeatureDataProvider`.""" + + def __init__( + self, + rowsByTable: Dict[str, List[Dict[str, Any]]], + availableTables: Optional[List[Dict[str, Any]]] = None, + ) -> None: + self._rowsByTable = {name: list(rows) for name, rows in rowsByTable.items()} + self._availableTables = list(availableTables or []) + self.callLog: List[Dict[str, Any]] = [] + + def getAvailableTables(self, featureCode: str) -> List[Dict[str, Any]]: # noqa: ARG002 + return list(self._availableTables) + + def getTableSchema(self, featureCode: str, tableName: str) -> Optional[Dict[str, Any]]: # noqa: ARG002 + for obj in self._availableTables: + if obj.get("meta", {}).get("table") == tableName: + return obj + return None + + def getActualColumns(self, tableName: str) -> List[str]: + rows = self._rowsByTable.get(tableName, []) + if not rows: + return [] + seen: List[str] = [] + seenSet: set = set() + for row in rows: + for key in row.keys(): + if key not in seenSet: + seen.append(key) + seenSet.add(key) + return seen + + def browseTable( + self, + tableName: str, + featureInstanceId: str, + mandateId: str, + fields: List[str] = None, + limit: int = 50, + offset: int = 0, + extraFilters: Optional[List[Dict[str, Any]]] = None, + ) -> Dict[str, Any]: + self.callLog.append({"method": "browseTable", "table": tableName, "fields": fields, "limit": limit}) + rows = self._scopeRows(tableName, featureInstanceId, mandateId) + rows = _applyFilters(rows, extraFilters) + total = len(rows) + rows = rows[offset : offset + limit] + if fields: + rows = [{k: v for k, v in row.items() if k in fields} for row in rows] + return {"rows": rows, "total": total, "limit": limit, "offset": offset} + + def queryTable( + self, + tableName: str, + featureInstanceId: str, + mandateId: str, + filters: List[Dict[str, Any]] = None, + fields: List[str] = None, + orderBy: str = None, + limit: int = 50, + offset: int = 0, + extraFilters: Optional[List[Dict[str, Any]]] = None, + ) -> Dict[str, Any]: + self.callLog.append({ + "method": "queryTable", "table": tableName, "filters": filters, + "fields": fields, "orderBy": orderBy, "limit": limit, + }) + rows = self._scopeRows(tableName, featureInstanceId, mandateId) + combined = list(filters or []) + list(extraFilters or []) + rows = _applyFilters(rows, combined) + if orderBy: + try: + rows = sorted(rows, key=lambda r: (r.get(orderBy) is None, r.get(orderBy))) + except TypeError: + rows = sorted(rows, key=lambda r: str(r.get(orderBy))) + total = len(rows) + rows = rows[offset : offset + limit] + if fields: + rows = [{k: v for k, v in row.items() if k in fields} for row in rows] + return {"rows": rows, "total": total, "limit": limit, "offset": offset} + + def aggregateTable( + self, + tableName: str, + featureInstanceId: str, + mandateId: str, + aggregate: str, + field: str, + groupBy: str = None, + extraFilters: Optional[List[Dict[str, Any]]] = None, + ) -> Dict[str, Any]: + self.callLog.append({ + "method": "aggregateTable", "table": tableName, + "aggregate": aggregate, "field": field, "groupBy": groupBy, + }) + aggregate = aggregate.upper() + if aggregate not in _ALLOWED_AGGREGATES: + return {"rows": [], "error": f"Unsupported aggregate: {aggregate}"} + rows = self._scopeRows(tableName, featureInstanceId, mandateId) + rows = _applyFilters(rows, extraFilters) + + if groupBy: + groups: Dict[Any, List[Dict[str, Any]]] = {} + for row in rows: + groups.setdefault(row.get(groupBy), []).append(row) + outRows = [ + {"groupValue": key, "result": _aggregate(aggregate, [r.get(field) for r in grp])} + for key, grp in groups.items() + ] + outRows.sort(key=lambda r: (r["result"] is None, -(r["result"] or 0))) + else: + outRows = [{"result": _aggregate(aggregate, [r.get(field) for r in rows])}] + + return { + "rows": outRows, + "aggregate": aggregate, + "field": field, + "groupBy": groupBy, + } + + def _scopeRows(self, tableName: str, featureInstanceId: str, mandateId: str) -> List[Dict[str, Any]]: + rows = self._rowsByTable.get(tableName, []) + return [ + row for row in rows + if (row.get("featureInstanceId") in (None, featureInstanceId)) + and (row.get("mandateId") in (None, mandateId)) + ] + + +def _applyFilters(rows: List[Dict[str, Any]], filters: Optional[List[Dict[str, Any]]]) -> List[Dict[str, Any]]: + if not filters: + return rows + out = rows + for f in filters: + field = f.get("field") + op = (f.get("op") or "=").upper() + value = f.get("value") + out = [r for r in out if _matchesFilter(r.get(field), op, value)] + return out + + +def _matchesFilter(rowValue: Any, op: str, filterValue: Any) -> bool: + if op in ("IS NULL",): + return rowValue is None + if op in ("IS NOT NULL",): + return rowValue is not None + if rowValue is None: + return False + if op == "=": + return _coerceEqual(rowValue, filterValue) + if op == "!=": + return not _coerceEqual(rowValue, filterValue) + if op == ">": + return _coerceFloat(rowValue) > _coerceFloat(filterValue) + if op == "<": + return _coerceFloat(rowValue) < _coerceFloat(filterValue) + if op == ">=": + return _coerceFloat(rowValue) >= _coerceFloat(filterValue) + if op == "<=": + return _coerceFloat(rowValue) <= _coerceFloat(filterValue) + if op in ("LIKE", "ILIKE"): + pattern = str(filterValue or "") + target = str(rowValue) + if op == "ILIKE": + pattern = pattern.lower() + target = target.lower() + return _sqlLike(target, pattern) + if op == "IN": + if isinstance(filterValue, (list, tuple, set)): + return any(_coerceEqual(rowValue, v) for v in filterValue) + return _coerceEqual(rowValue, filterValue) + return False + + +def _coerceEqual(a: Any, b: Any) -> bool: + if a == b: + return True + try: + return str(a) == str(b) + except Exception: + return False + + +def _coerceFloat(value: Any) -> float: + if value is None: + return 0.0 + try: + return float(value) + except (TypeError, ValueError): + return 0.0 + + +def _sqlLike(value: str, pattern: str) -> bool: + """Approximate SQL LIKE -- only % and _ wildcards.""" + import re + regex = "" + i = 0 + while i < len(pattern): + ch = pattern[i] + if ch == "%": + regex += ".*" + elif ch == "_": + regex += "." + else: + regex += re.escape(ch) + i += 1 + return re.fullmatch(regex, value or "") is not None + + +def _aggregate(op: str, values: List[Any]) -> Any: + if op == "COUNT": + return sum(1 for v in values if v is not None) + nums = [_coerceFloat(v) for v in values if v is not None] + if not nums: + return 0 if op == "SUM" else None + if op == "SUM": + return round(sum(nums), 4) + if op == "AVG": + return round(sum(nums) / len(nums), 4) + if op == "MIN": + return min(nums) + if op == "MAX": + return max(nums) + return None diff --git a/tests/eval/runTrusteeBenchmark.py b/tests/eval/runTrusteeBenchmark.py new file mode 100644 index 00000000..3f298173 --- /dev/null +++ b/tests/eval/runTrusteeBenchmark.py @@ -0,0 +1,735 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Trustee Sub-Agent Eval Harness (Phase 1.5). + +Standalone runner that fires real AI calls against the Feature Data +Sub-Agent in three configurations: + +* ``baseline`` -- production code without the pre-execute validator + (Repair-Loop disabled, Trustee domain hints active). +* ``phase1`` -- pre-execute validator on (Repair-Loop active), + domain hints active, no ontology yet. +* ``phase2`` -- validator on, ontology-driven schema context + + constraints (replaces hand-written domain hints). + +For each mode we run all 19 gold-standard questions against an +in-memory :class:`FakeFeatureDataProvider`, capture the agent's tool +calls and final answer, score them against the gold standard, and +write a Markdown report to ``local/notes/`` for analysis. + +Usage:: + + cd gateway + python -m tests.eval.runTrusteeBenchmark # all 3 modes + python -m tests.eval.runTrusteeBenchmark phase1 # one mode only + python -m tests.eval.runTrusteeBenchmark baseline phase1 +""" + +from __future__ import annotations + +import asyncio +import json +import logging +import os +import re +import sys +import time +import uuid +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +# --------------------------------------------------------------------------- +# Path setup so `python -m tests.eval.runTrusteeBenchmark` works from gateway/ +# --------------------------------------------------------------------------- +_GATEWAY_DIR = Path(__file__).resolve().parents[2] +if str(_GATEWAY_DIR) not in sys.path: + sys.path.insert(0, str(_GATEWAY_DIR)) + +import yaml # noqa: E402 + +from modules.serviceCenter.services.serviceAgent.datamodelAgent import ( # noqa: E402 + AgentConfig, + AgentEventTypeEnum, +) +from modules.datamodels.datamodelAi import ( # noqa: E402 + AiCallRequest, + AiCallResponse, + OperationTypeEnum, +) +from modules.serviceCenter.services.serviceAgent.agentLoop import runAgentLoop # noqa: E402 +from modules.serviceCenter.services.serviceAgent.featureDataAgent import ( # noqa: E402 + _buildSubAgentTools, + _buildSchemaContext, +) +from modules.serviceCenter.services.serviceAgent.datamodelOntology import ( # noqa: E402 + QueryValidationError, +) +from modules.serviceCenter.services.serviceAgent.queryValidator import ( # noqa: E402 + QueryValidator, +) + +from tests.eval.fakeFeatureDataProvider import ( # noqa: E402 + FakeFeatureDataProvider, +) +from tests.fixtures.trusteeBenchmark.loadTrusteeBenchmarkFixture import ( # noqa: E402 + buildTrusteeBenchmarkFixture, + BenchmarkFixture, +) + + +logger = logging.getLogger("trusteeBenchmark") + + +# --------------------------------------------------------------------------- +# NoOpValidator -- baseline mode (Repair-Loop OFF) +# --------------------------------------------------------------------------- + + +class _NoOpValidator(QueryValidator): + """Validator that never rejects anything (used for baseline measurement).""" + + def validateBrowseQuery(self, tableName, args): # noqa: ARG002 + return None + + def validateQueryTable(self, tableName, args): # noqa: ARG002 + return None + + def validateAggregateQuery(self, tableName, args): # noqa: ARG002 + return None + + +# --------------------------------------------------------------------------- +# Mode-specific tool/prompt building +# --------------------------------------------------------------------------- + + +@dataclass +class _ModeConfig: + name: str + label: str + useValidator: bool + useOntology: bool + + +_MODES: Dict[str, _ModeConfig] = { + "baseline": _ModeConfig(name="baseline", label="Baseline (no validator)", useValidator=False, useOntology=False), + "phase1": _ModeConfig(name="phase1", label="Phase 1 (validator on)", useValidator=True, useOntology=False), + "phase2": _ModeConfig(name="phase2", label="Phase 2 (validator + ontology)", useValidator=True, useOntology=True), +} + + +def _buildValidator(mode: _ModeConfig) -> QueryValidator: + """Construct the per-mode validator. + + * baseline: no-op (Repair-Loop disabled, used to measure raw LLM + accuracy against today's prompt path). + * phase1: convention-based QueryValidator (NEVER_AGGREGATE on + ``*Balance``/``*Total`` suffixes; no ontology). + * phase2: ontology-driven QueryValidator (constraints from the + trustee ontology override the convention defaults). + """ + if not mode.useValidator: + return _NoOpValidator() + if mode.useOntology: + try: + from modules.features.trustee.trusteeOntology import getTrusteeOntology + return QueryValidator(ontology=getTrusteeOntology()) + except Exception as e: + logger.warning("Could not load trustee ontology, falling back: %s", e) + return QueryValidator() + + +def _applyEnvForMode(mode: _ModeConfig) -> None: + """Set the ontology toggle for the production prompt builder. + + The Phase 2 path uses ``featureDataAgent._buildSchemaContext`` to pull + the prompt block from ``getAgentOntology()`` automatically. For + baseline/phase1 we set ``POWERON_DISABLE_FEATURE_ONTOLOGY=1`` so the + builder falls back to the legacy ``getAgentDomainHints()`` block -- + measuring exactly the production prompt that ships today. + """ + if mode.useOntology: + os.environ.pop("POWERON_DISABLE_FEATURE_ONTOLOGY", None) + else: + os.environ["POWERON_DISABLE_FEATURE_ONTOLOGY"] = "1" + + +def _buildSystemPrompt(featureCode: str, instanceLabel: str, selectedTables: List[Dict[str, Any]]) -> str: + """Build the sub-agent system prompt via the production path. + + Mode-specific behaviour (legacy hints vs ontology block) is controlled + by the ``POWERON_DISABLE_FEATURE_ONTOLOGY`` env flag set per mode in + :func:`_applyEnvForMode`. Keeping the builder call identical for all + three modes means the benchmark measures the EXACT prompt the agent + would see in production -- no eval-only forks. + """ + return _buildSchemaContext(featureCode, instanceLabel, selectedTables, requestLang="de") + + +# --------------------------------------------------------------------------- +# Question loading + per-question evaluation +# --------------------------------------------------------------------------- + + +@dataclass +class _Question: + id: str + question: str + intent: str + expectedTools: List[str] + expectedTable: Optional[str] + expectedAggregate: Optional[str] + expectedAggregateField: Optional[str] + requiredFilters: Dict[str, Any] + forbiddenTools: List[str] + expectedNumbers: List[float] + expectedAnswerContains: List[str] + numericTolerance: float + + +def _loadQuestions(yamlPath: Path) -> List[_Question]: + with open(yamlPath, "r", encoding="utf-8") as f: + rawList = yaml.safe_load(f) + questions: List[_Question] = [] + for raw in rawList: + questions.append(_Question( + id=raw["id"], + question=raw["question"], + intent=raw.get("intent", ""), + expectedTools=list(raw.get("expectedTools") or []), + expectedTable=raw.get("expectedTable"), + expectedAggregate=raw.get("expectedAggregate"), + expectedAggregateField=raw.get("expectedAggregateField"), + requiredFilters=dict(raw.get("requiredFilters") or {}), + forbiddenTools=list(raw.get("forbiddenTools") or []), + expectedNumbers=[float(x) for x in (raw.get("expectedNumbers") or [])], + expectedAnswerContains=[str(x) for x in (raw.get("expectedAnswerContains") or [])], + numericTolerance=float(raw.get("numericTolerance") or 0.005), + )) + return questions + + +@dataclass +class _RunResult: + questionId: str + finalText: str + toolCalls: List[Dict[str, Any]] = field(default_factory=list) + toolResults: List[Dict[str, Any]] = field(default_factory=list) + summary: Dict[str, Any] = field(default_factory=dict) + durationS: float = 0.0 + error: Optional[str] = None + + @property + def costCHF(self) -> float: + return float(self.summary.get("costCHF") or 0.0) + + @property + def rounds(self) -> int: + return int(self.summary.get("rounds") or 0) + + @property + def validationFailures(self) -> int: + return int(self.summary.get("validationFailures") or 0) + + @property + def repairAttempts(self) -> int: + return int(self.summary.get("repairAttempts") or 0) + + @property + def successAfterRepair(self) -> int: + return int(self.summary.get("successAfterRepair") or 0) + + +@dataclass +class _Score: + patternOk: bool = False + forbidOk: bool = False + numericOk: bool = False + accuracyOk: bool = False + notes: List[str] = field(default_factory=list) + + +def _scoreRun(question: _Question, run: _RunResult) -> _Score: + score = _Score() + if run.error: + score.notes.append(f"Sub-agent error: {run.error}") + return score + + score.patternOk = _checkPattern(question, run) + score.forbidOk = _checkForbid(question, run) + score.numericOk = _checkNumeric(question, run) + score.accuracyOk = score.patternOk and score.forbidOk and score.numericOk + return score + + +def _checkPattern(question: _Question, run: _RunResult) -> bool: + """Did the agent call one of the expected tools on the expected table with required filters?""" + if not question.expectedTools: + return True + matchingCalls = [ + c for c in run.toolCalls + if c.get("toolName") in question.expectedTools + and (not question.expectedTable or c.get("args", {}).get("tableName") == question.expectedTable) + ] + if not matchingCalls: + return False + + if question.expectedAggregate: + wantAgg = question.expectedAggregate.upper() + wantField = question.expectedAggregateField + for c in matchingCalls: + args = c.get("args", {}) + if c.get("toolName") != "aggregateTable": + continue + if (args.get("aggregate") or "").upper() != wantAgg: + continue + if wantField and args.get("field") != wantField: + continue + if not _filtersSatisfied(question.requiredFilters, args.get("extraFilters") or args.get("filters") or []): + continue + return True + return False + + if question.requiredFilters: + for c in matchingCalls: + args = c.get("args", {}) + filters = args.get("filters") or args.get("extraFilters") or [] + if _filtersSatisfied(question.requiredFilters, filters): + return True + return False + + return True + + +def _filtersSatisfied(required: Dict[str, Any], actualFilters: List[Dict[str, Any]]) -> bool: + if not required: + return True + for reqField, reqValue in required.items(): + if reqField.endswith("Like"): + field = reqField[:-4] + wanted = str(reqValue) + ok = any( + (f.get("field") == field) and (f.get("op", "").upper() in ("LIKE", "ILIKE")) + and str(f.get("value")) == wanted + for f in actualFilters + ) + if not ok: + return False + else: + ok = any( + f.get("field") == reqField and _filterValueEqual(f.get("value"), reqValue) + for f in actualFilters + ) + if not ok: + return False + return True + + +def _filterValueEqual(a: Any, b: Any) -> bool: + if a == b: + return True + try: + return str(a).strip() == str(b).strip() + except Exception: + return False + + +def _checkForbid(question: _Question, run: _RunResult) -> bool: + """Did the agent AVOID forbidden tool/op combinations? + + Forbidden hits only count if the call actually went through to the + provider (success=True). Validator-rejected calls don't count -- the + Repair-Loop is doing its job and steering the agent away. + """ + if not question.forbiddenTools: + return True + forbiddenSet = set(question.forbiddenTools) + for r in run.toolResults: + if not r.get("success"): + continue + if r.get("toolName") in forbiddenSet: + return False + return True + + +def _checkNumeric(question: _Question, run: _RunResult) -> bool: + text = (run.finalText or "") + if question.expectedNumbers: + textNumbers = _extractNumbers(text) + for expected in question.expectedNumbers: + tol = max(abs(expected) * question.numericTolerance, 0.5) + if not any(abs(n - expected) <= tol for n in textNumbers): + return False + + if question.expectedAnswerContains: + lowered = text.lower() + for needle in question.expectedAnswerContains: + if needle.lower() not in lowered: + return False + + return True + + +def _extractNumbers(text: str) -> List[float]: + """Pick out all numbers from a free-text answer. + + Handles Swiss thousand separators (apostrophe and U+2019), German + decimals (comma), plain integers/floats, and JSON numbers. Trailing + punctuation (``,``, ``;``, ``.`` from end-of-sentence) is stripped + before parsing so ``"180500.0,"`` parses cleanly to 180500.0. + """ + cleaned = text.replace("\u2019", "'") + tokens = re.findall(r"-?\d[\d'.,]*", cleaned) + out: List[float] = [] + for tok in tokens: + tok = tok.rstrip(",;") + if tok.endswith(".") and tok.count(".") == 1: + tok = tok[:-1] + norm = tok.replace("'", "") + if norm.count(",") == 1 and norm.count(".") == 0: + norm = norm.replace(",", ".") + elif norm.count(",") >= 1 and norm.count(".") >= 1: + if norm.rfind(",") > norm.rfind("."): + norm = norm.replace(".", "").replace(",", ".") + else: + norm = norm.replace(",", "") + else: + norm = norm.replace(",", "") + try: + out.append(float(norm)) + except ValueError: + continue + return out + + +# --------------------------------------------------------------------------- +# AI call wiring +# --------------------------------------------------------------------------- + + +def _bootstrapServices() -> Tuple[Any, str, str]: + """Spin up a minimal service hub bound to the root user + initial mandate. + + Returns the ServiceHub, the user id, and the mandate id used for billing. + """ + from modules.interfaces.interfaceDbApp import getRootInterface + from modules.datamodels.datamodelUam import Mandate + from modules.serviceHub import getInterface as getServices + + rootInterface = getRootInterface() + user = rootInterface.currentUser + mandateId = rootInterface.getInitialId(Mandate) + if not mandateId: + raise RuntimeError("No initial mandate available -- run bootstrap loader first.") + services = getServices(user, workflow=None, mandateId=mandateId, featureInstanceId=None) + return services, user.id, mandateId + + +async def _runOneQuestion( + *, + services: Any, + userId: str, + mandateId: str, + fixture: BenchmarkFixture, + question: _Question, + mode: _ModeConfig, +) -> _RunResult: + """Execute a single sub-agent run for one question under one mode.""" + provider = FakeFeatureDataProvider( + rowsByTable=fixture.rowsByTable, + availableTables=fixture.selectedTables, + ) + validator = _buildValidator(mode) + registry = _buildSubAgentTools( + provider=provider, + featureInstanceId=fixture.featureInstanceId, + mandateId=fixture.mandateId, + tableFilters={}, + validator=validator, + ) + + systemPrompt = _buildSystemPrompt( + featureCode="trustee", + instanceLabel="Demo AG", + selectedTables=fixture.selectedTables, + ) + + cost = 0.0 + + async def _aiCallFn(req: AiCallRequest) -> AiCallResponse: + nonlocal cost + resp = await services.ai.callAi(req) + cost += float(getattr(resp, "priceCHF", 0.0) or 0.0) + return resp + + async def _getCost() -> float: + return cost + + config = AgentConfig( + maxRounds=6, + maxCostCHF=0.50, + operationType=OperationTypeEnum.DATA_QUERY, + ) + + run = _RunResult(questionId=question.id, finalText="") + t0 = time.time() + try: + async for event in runAgentLoop( + prompt=question.question, + toolRegistry=registry, + config=config, + aiCallFn=_aiCallFn, + getWorkflowCostFn=_getCost, + workflowId=f"eval-{mode.name}-{question.id}-{uuid.uuid4().hex[:6]}", + userId=userId, + featureInstanceId=fixture.featureInstanceId, + mandateId=mandateId, + systemPromptOverride=systemPrompt, + ): + if event.type == AgentEventTypeEnum.FINAL: + run.finalText = event.content or run.finalText + elif event.type == AgentEventTypeEnum.MESSAGE and event.content: + run.finalText += event.content + elif event.type == AgentEventTypeEnum.TOOL_CALL: + run.toolCalls.append(dict(event.data or {})) + elif event.type == AgentEventTypeEnum.TOOL_RESULT: + run.toolResults.append(dict(event.data or {})) + elif event.type == AgentEventTypeEnum.AGENT_SUMMARY: + run.summary = dict(event.data or {}) + elif event.type == AgentEventTypeEnum.ERROR: + run.error = (run.error or "") + (event.content or "") + except Exception as e: + run.error = f"{type(e).__name__}: {e}" + logger.exception("Sub-agent run failed for %s/%s", mode.name, question.id) + run.durationS = time.time() - t0 + return run + + +# --------------------------------------------------------------------------- +# Report +# --------------------------------------------------------------------------- + + +@dataclass +class _ModeReport: + mode: _ModeConfig + perQuestion: List[Tuple[_Question, _RunResult, _Score]] = field(default_factory=list) + + @property + def total(self) -> int: + return len(self.perQuestion) + + def _count(self, attr: str) -> int: + return sum(1 for _, _, s in self.perQuestion if getattr(s, attr)) + + @property + def accuracy(self) -> float: + return self._count("accuracyOk") / max(self.total, 1) + + @property + def patternCompliance(self) -> float: + return self._count("patternOk") / max(self.total, 1) + + @property + def repairConversionRate(self) -> float: + attempts = sum(r.repairAttempts for _, r, _ in self.perQuestion) + succeeded = sum(r.successAfterRepair for _, r, _ in self.perQuestion) + if attempts == 0: + return 0.0 + return succeeded / attempts + + @property + def totalCostCHF(self) -> float: + return sum(r.costCHF for _, r, _ in self.perQuestion) + + @property + def totalRounds(self) -> int: + return sum(r.rounds for _, r, _ in self.perQuestion) + + @property + def totalValidationFailures(self) -> int: + return sum(r.validationFailures for _, r, _ in self.perQuestion) + + +def _writeReport(reports: List[_ModeReport], outputPath: Path) -> None: + lines: List[str] = [] + lines.append("# Trustee Sub-Agent Benchmark Report") + lines.append("") + lines.append(f"Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}") + lines.append("") + lines.append("## Summary") + lines.append("") + lines.append("| Mode | Questions | Accuracy | Pattern compliance | Repair conversion | Validator rejects | Rounds | Cost (CHF) |") + lines.append("|---|---|---|---|---|---|---|---|") + for rep in reports: + lines.append( + f"| {rep.mode.label} | {rep.total} | {rep.accuracy:.1%} | {rep.patternCompliance:.1%} | " + f"{rep.repairConversionRate:.1%} | {rep.totalValidationFailures} | {rep.totalRounds} | " + f"{rep.totalCostCHF:.4f} |" + ) + lines.append("") + lines.append("## Per-question detail") + for rep in reports: + lines.append("") + lines.append(f"### {rep.mode.label}") + lines.append("") + lines.append("| id | acc | pattern | forbid | numeric | rounds | val-fail | repairs | cost CHF | duration | tools |") + lines.append("|---|---|---|---|---|---|---|---|---|---|---|") + for q, r, s in rep.perQuestion: + toolList = ",".join( + f"{c.get('toolName')}({c.get('args',{}).get('tableName','?')})" + for c in r.toolCalls + ) + lines.append( + f"| {q.id} | {_yn(s.accuracyOk)} | {_yn(s.patternOk)} | {_yn(s.forbidOk)} | {_yn(s.numericOk)} | " + f"{r.rounds} | {r.validationFailures} | {r.repairAttempts}/{r.successAfterRepair} | " + f"{r.costCHF:.4f} | {r.durationS:.1f}s | {toolList} |" + ) + lines.append("") + lines.append("#### Notes & failures") + for q, r, s in rep.perQuestion: + if s.accuracyOk: + continue + lines.append(f"- **{q.id}** ({q.intent}): pattern={s.patternOk} forbid={s.forbidOk} numeric={s.numericOk}") + if r.error: + lines.append(f" - error: `{r.error}`") + lines.append(f" - answer: `{(r.finalText or '').strip().replace('|', '/').splitlines()[0][:240]}`") + for note in s.notes: + lines.append(f" - note: {note}") + outputPath.parent.mkdir(parents=True, exist_ok=True) + outputPath.write_text("\n".join(lines), encoding="utf-8") + + +def _yn(b: bool) -> str: + return "OK" if b else "FAIL" + + +# --------------------------------------------------------------------------- +# Main entry point +# --------------------------------------------------------------------------- + + +async def _runMain(modesToRun: List[str], onlyQuestionId: Optional[str] = None) -> None: + logging.basicConfig( + level=logging.WARNING, + format="%(asctime)s %(levelname)s %(name)s -- %(message)s", + ) + logger.setLevel(logging.INFO) + + fixture = buildTrusteeBenchmarkFixture() + questionsPath = _GATEWAY_DIR / "tests" / "fixtures" / "trusteeBenchmark" / "questions.yaml" + allQuestions = _loadQuestions(questionsPath) + if onlyQuestionId: + allQuestions = [q for q in allQuestions if q.id == onlyQuestionId] + if not allQuestions: + print(f"No question matches id={onlyQuestionId!r}") + return + + print(f"Loaded {len(allQuestions)} questions, {len(modesToRun)} modes -> {len(allQuestions) * len(modesToRun)} sub-agent runs.") + + services, userId, mandateId = _bootstrapServices() + print(f"Bootstrap OK: user={userId}, mandate={mandateId}") + + reports: List[_ModeReport] = [] + for modeName in modesToRun: + mode = _MODES[modeName] + _applyEnvForMode(mode) + rep = _ModeReport(mode=mode) + print(f"\n=== Mode: {mode.label} ===") + for idx, question in enumerate(allQuestions, start=1): + print(f" [{idx:>2}/{len(allQuestions)}] {question.id}: {question.question[:80]} ...", flush=True) + run = await _runOneQuestion( + services=services, + userId=userId, + mandateId=mandateId, + fixture=fixture, + question=question, + mode=mode, + ) + score = _scoreRun(question, run) + rep.perQuestion.append((question, run, score)) + print( + f" -> acc={_yn(score.accuracyOk)} " + f"pattern={_yn(score.patternOk)} forbid={_yn(score.forbidOk)} " + f"numeric={_yn(score.numericOk)} rounds={run.rounds} cost={run.costCHF:.4f} " + f"val-fail={run.validationFailures} repairs={run.repairAttempts}/{run.successAfterRepair}", + flush=True, + ) + reports.append(rep) + + timestamp = time.strftime("%Y%m%d-%H%M%S") + outDir = _GATEWAY_DIR.parent / "local" / "notes" + reportPath = outDir / f"trustee-benchmark-{timestamp}.md" + _writeReport(reports, reportPath) + + rawJsonPath = outDir / f"trustee-benchmark-{timestamp}.json" + rawJsonPath.write_text( + json.dumps( + [ + { + "mode": rep.mode.name, + "accuracy": rep.accuracy, + "patternCompliance": rep.patternCompliance, + "repairConversionRate": rep.repairConversionRate, + "totalCostCHF": rep.totalCostCHF, + "totalRounds": rep.totalRounds, + "totalValidationFailures": rep.totalValidationFailures, + "items": [ + { + "questionId": q.id, + "intent": q.intent, + "accuracyOk": s.accuracyOk, + "patternOk": s.patternOk, + "forbidOk": s.forbidOk, + "numericOk": s.numericOk, + "rounds": r.rounds, + "validationFailures": r.validationFailures, + "repairAttempts": r.repairAttempts, + "successAfterRepair": r.successAfterRepair, + "costCHF": r.costCHF, + "durationS": r.durationS, + "finalText": (r.finalText or "")[:600], + "toolCalls": r.toolCalls, + "error": r.error, + } + for q, r, s in rep.perQuestion + ], + } + for rep in reports + ], + indent=2, + ensure_ascii=False, + ), + encoding="utf-8", + ) + + print(f"\nReport written: {reportPath}") + print(f"Raw JSON: {rawJsonPath}") + for rep in reports: + print(f" {rep.mode.label}: acc={rep.accuracy:.1%} pattern={rep.patternCompliance:.1%} cost={rep.totalCostCHF:.4f}") + + +def _parseArgs(argv: List[str]) -> Tuple[List[str], Optional[str]]: + modes: List[str] = [] + only: Optional[str] = None + for arg in argv: + if arg.startswith("--only="): + only = arg.split("=", 1)[1] + elif arg in _MODES: + modes.append(arg) + else: + print(f"Unknown argument: {arg!r}. Allowed modes: {list(_MODES)}") + sys.exit(2) + if not modes: + modes = ["baseline", "phase1", "phase2"] + return modes, only + + +def main() -> None: + modes, only = _parseArgs(sys.argv[1:]) + asyncio.run(_runMain(modes, onlyQuestionId=only)) + + +if __name__ == "__main__": + main() diff --git a/tests/fixtures/trusteeBenchmark/__init__.py b/tests/fixtures/trusteeBenchmark/__init__.py new file mode 100644 index 00000000..52f83ff7 --- /dev/null +++ b/tests/fixtures/trusteeBenchmark/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Trustee benchmark fixture: synthetic but realistic Swiss KMU accounting data. + +Used by the Feature Data Sub-Agent eval harness (Phase 1.5) to measure +hallucination rates against a fixed gold standard. Data is built in-memory +via Pydantic models -- no SQL, no DB connection -- so the harness stays +hermetic and reproducible. +""" + +from tests.fixtures.trusteeBenchmark.loadTrusteeBenchmarkFixture import ( + buildTrusteeBenchmarkFixture, + BenchmarkFixture, +) + +__all__ = ["buildTrusteeBenchmarkFixture", "BenchmarkFixture"] diff --git a/tests/fixtures/trusteeBenchmark/loadTrusteeBenchmarkFixture.py b/tests/fixtures/trusteeBenchmark/loadTrusteeBenchmarkFixture.py new file mode 100644 index 00000000..5eb77867 --- /dev/null +++ b/tests/fixtures/trusteeBenchmark/loadTrusteeBenchmarkFixture.py @@ -0,0 +1,275 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Synthetic Trustee benchmark fixture for the Feature Data Sub-Agent eval. + +Builds an in-memory snapshot of one fictional Swiss KMU mandate +("Demo AG") with: + +* 3 fiscal years (2023, 2024, 2025) of `TrusteeDataAccountBalance` rows + -- both annual totals (periodMonth=0) and monthly snapshots. +* 8 representative accounts spanning all major chart-of-accounts blocks + (cash, banks, receivables, payables, revenue, materials, personnel, + operating expenses). +* Per-month `TrusteeDataJournalEntry` + multiple `TrusteeDataJournalLine` + rows so debit/credit/COUNT aggregations have meaningful answers. + +The data is deterministic (no RNG) so a question's gold-standard answer +is stable across runs. + +This module deliberately stays decoupled from the production DB pipeline +-- the harness uses :class:`FakeFeatureDataProvider` (see +``gateway/tests/eval/fakeFeatureDataProvider.py``) to serve queries +against this in-memory snapshot, mirroring the public methods of +``FeatureDataProvider`` (browseTable / queryTable / aggregateTable). +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, List + + +_MANDATE_ID = "m-demo-ag" +_FEATURE_INSTANCE_ID = "fi-demo-ag-trustee" + + +# --------------------------------------------------------------------------- +# Account master data +# --------------------------------------------------------------------------- + +_ACCOUNT_MASTER: List[Dict[str, Any]] = [ + {"accountNumber": "1000", "label": "Hauptkasse", "accountType": "asset", "currency": "CHF"}, + {"accountNumber": "1020", "label": "ZKB Geschaeftskonto", "accountType": "asset", "currency": "CHF"}, + {"accountNumber": "1021", "label": "PostFinance", "accountType": "asset", "currency": "CHF"}, + {"accountNumber": "1100", "label": "Forderungen aus Lieferungen und Leistungen", "accountType": "asset", "currency": "CHF"}, + {"accountNumber": "2000", "label": "Verbindlichkeiten aus Lieferungen", "accountType": "liability", "currency": "CHF"}, + {"accountNumber": "3000", "label": "Ertrag aus Beratung", "accountType": "revenue", "currency": "CHF"}, + {"accountNumber": "5400", "label": "Materialaufwand", "accountType": "expense", "currency": "CHF"}, + {"accountNumber": "6000", "label": "Mietaufwand", "accountType": "expense", "currency": "CHF"}, +] + + +# Annual closing balances per (year, accountNumber) -- the canonical reference. +# Asset/expense balances are positive, liability/revenue balances are stored +# as positive numbers (sign by accountType, like most accounting systems). +_ANNUAL_CLOSING: Dict[int, Dict[str, float]] = { + 2023: { + "1000": 4_800.00, + "1020": 132_500.00, + "1021": 22_400.00, + "1100": 58_200.00, + "2000": 41_300.00, + "3000": 410_000.00, + "5400": 92_000.00, + "6000": 36_000.00, + }, + 2024: { + "1000": 5_200.00, + "1020": 148_900.00, + "1021": 26_750.00, + "1100": 61_400.00, + "2000": 44_100.00, + "3000": 462_500.00, + "5400": 104_300.00, + "6000": 39_000.00, + }, + 2025: { + "1000": 5_900.00, + "1020": 152_400.00, + "1021": 28_100.00, + "1100": 66_800.00, + "2000": 47_900.00, + "3000": 488_700.00, + "5400": 112_100.00, + "6000": 42_000.00, + }, +} + + +def _openingFromPriorYear(year: int, accountNumber: str) -> float: + """Opening balance of year N = closing balance of year N-1 (0 if N-1 is unknown).""" + prior = year - 1 + return float(_ANNUAL_CLOSING.get(prior, {}).get(accountNumber, 0.0)) + + +def _monthlyProgression(opening: float, closing: float, month: int) -> float: + """Linear interpolation between opening and closing for monthly snapshots. + + Not realistic in detail but deterministic and monotonic per account, so + questions about "Stand per Ende März" produce stable answers. + """ + if month <= 0: + return float(closing) + frac = month / 12.0 + return round(float(opening) + (float(closing) - float(opening)) * frac, 2) + + +# --------------------------------------------------------------------------- +# Journal entries / lines -- minimal but realistic +# --------------------------------------------------------------------------- + +_JOURNAL_ENTRIES_2025: List[Dict[str, Any]] = [ + {"month": 3, "day": 15, "reference": "RG-2025-0042", "description": "Beratung Kunde ACME AG", "amount": 18_500.00, "debit": "1100", "credit": "3000"}, + {"month": 3, "day": 22, "reference": "EK-2025-0017", "description": "Materialeinkauf Buehler AG", "amount": 9_200.00, "debit": "5400", "credit": "2000"}, + {"month": 3, "day": 28, "reference": "MIETE-2025-03", "description": "Mietzins Buero Maerz", "amount": 3_000.00, "debit": "6000", "credit": "1020"}, + {"month": 4, "day": 5, "reference": "RG-2025-0051", "description": "Beratung Kunde Bell AG", "amount": 24_300.00, "debit": "1100", "credit": "3000"}, + {"month": 4, "day": 18, "reference": "EK-2025-0024", "description": "Materialeinkauf Industriebedarf", "amount": 7_800.00, "debit": "5400", "credit": "2000"}, + {"month": 6, "day": 12, "reference": "RG-2025-0079", "description": "Beratung Kunde Bell AG", "amount": 32_100.00, "debit": "1100", "credit": "3000"}, + {"month": 6, "day": 30, "reference": "MIETE-2025-Q2", "description": "Mietzins Buero Q2-Abrechnung", "amount": 3_500.00, "debit": "6000", "credit": "1020"}, + {"month": 9, "day": 4, "reference": "RG-2025-0114", "description": "Beratung Kunde Migros", "amount": 41_500.00, "debit": "1100", "credit": "3000"}, + {"month": 9, "day": 25, "reference": "EK-2025-0061", "description": "Materialeinkauf Buehler AG", "amount": 12_400.00, "debit": "5400", "credit": "2000"}, + {"month": 11, "day": 14, "reference": "RG-2025-0188", "description": "Beratung Kunde ACME AG", "amount": 28_700.00, "debit": "1100", "credit": "3000"}, +] + + +# --------------------------------------------------------------------------- +# Snapshot containers +# --------------------------------------------------------------------------- + +@dataclass +class BenchmarkFixture: + """In-memory rows that mimic feature DB tables. + + Each ``rowsByTable[tableName]`` is a list of column dicts compatible + with the Pydantic feature data models (TrusteeDataAccountBalance, etc.). + """ + mandateId: str + featureInstanceId: str + rowsByTable: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + selectedTables: List[Dict[str, Any]] = field(default_factory=list) + + +def _buildSelectedTables() -> List[Dict[str, Any]]: + """Return the DATA_OBJECT-shaped descriptors the sub-agent expects. + + Mirrors what the catalog would return for the trustee feature; the + real `getDataObjects("trustee")` call would yield the same shape but + we hard-code the three tables we actually populate. + """ + return [ + { + "objectKey": "data.feature.trustee.TrusteeDataAccount", + "label": {"de": "Kontenplan", "en": "Chart of accounts"}, + "meta": { + "table": "TrusteeDataAccount", + "fields": ["id", "accountNumber", "label", "accountType", "currency", "isActive"], + }, + }, + { + "objectKey": "data.feature.trustee.TrusteeDataAccountBalance", + "label": {"de": "Kontosalden", "en": "Account balances"}, + "meta": { + "table": "TrusteeDataAccountBalance", + "fields": [ + "id", "accountNumber", "periodYear", "periodMonth", + "openingBalance", "debitTotal", "creditTotal", + "closingBalance", "currency", + ], + }, + }, + { + "objectKey": "data.feature.trustee.TrusteeDataJournalLine", + "label": {"de": "Buchungszeilen", "en": "Journal lines"}, + "meta": { + "table": "TrusteeDataJournalLine", + "fields": [ + "id", "journalEntryId", "accountNumber", + "debitAmount", "creditAmount", "currency", "description", + ], + }, + }, + ] + + +def buildTrusteeBenchmarkFixture() -> BenchmarkFixture: + """Materialize the full in-memory benchmark snapshot. + + All rows include ``mandateId`` and ``featureInstanceId`` columns so the + fake provider can scope them the same way the real one does. + """ + accountRows: List[Dict[str, Any]] = [] + for i, acc in enumerate(_ACCOUNT_MASTER): + accountRows.append({ + "id": f"acc-{i:03d}", + "accountNumber": acc["accountNumber"], + "label": acc["label"], + "accountType": acc["accountType"], + "currency": acc["currency"], + "isActive": True, + "mandateId": _MANDATE_ID, + "featureInstanceId": _FEATURE_INSTANCE_ID, + }) + + balanceRows: List[Dict[str, Any]] = [] + rowIdx = 0 + for year, closings in _ANNUAL_CLOSING.items(): + for accountNumber, closing in closings.items(): + opening = _openingFromPriorYear(year, accountNumber) + balanceRows.append({ + "id": f"bal-{rowIdx:04d}", + "accountNumber": accountNumber, + "periodYear": year, + "periodMonth": 0, + "openingBalance": opening, + "debitTotal": round(max(closing - opening, 0.0) * 1.2, 2), + "creditTotal": round(max(closing - opening, 0.0) * 0.2, 2), + "closingBalance": float(closing), + "currency": "CHF", + "mandateId": _MANDATE_ID, + "featureInstanceId": _FEATURE_INSTANCE_ID, + }) + rowIdx += 1 + for month in range(1, 13): + monthly = _monthlyProgression(opening, closing, month) + balanceRows.append({ + "id": f"bal-{rowIdx:04d}", + "accountNumber": accountNumber, + "periodYear": year, + "periodMonth": month, + "openingBalance": opening, + "debitTotal": round((monthly - opening) * 1.2, 2) if monthly > opening else 0.0, + "creditTotal": round((monthly - opening) * 0.2, 2) if monthly > opening else 0.0, + "closingBalance": monthly, + "currency": "CHF", + "mandateId": _MANDATE_ID, + "featureInstanceId": _FEATURE_INSTANCE_ID, + }) + rowIdx += 1 + + lineRows: List[Dict[str, Any]] = [] + for j, entry in enumerate(_JOURNAL_ENTRIES_2025): + entryId = f"je-2025-{j:03d}" + lineRows.append({ + "id": f"jl-{j*2:04d}", + "journalEntryId": entryId, + "accountNumber": entry["debit"], + "debitAmount": float(entry["amount"]), + "creditAmount": 0.0, + "currency": "CHF", + "description": entry["description"], + "mandateId": _MANDATE_ID, + "featureInstanceId": _FEATURE_INSTANCE_ID, + }) + lineRows.append({ + "id": f"jl-{j*2+1:04d}", + "journalEntryId": entryId, + "accountNumber": entry["credit"], + "debitAmount": 0.0, + "creditAmount": float(entry["amount"]), + "currency": "CHF", + "description": entry["description"], + "mandateId": _MANDATE_ID, + "featureInstanceId": _FEATURE_INSTANCE_ID, + }) + + fixture = BenchmarkFixture( + mandateId=_MANDATE_ID, + featureInstanceId=_FEATURE_INSTANCE_ID, + rowsByTable={ + "TrusteeDataAccount": accountRows, + "TrusteeDataAccountBalance": balanceRows, + "TrusteeDataJournalLine": lineRows, + }, + selectedTables=_buildSelectedTables(), + ) + return fixture diff --git a/tests/fixtures/trusteeBenchmark/questions.yaml b/tests/fixtures/trusteeBenchmark/questions.yaml new file mode 100644 index 00000000..7d277cae --- /dev/null +++ b/tests/fixtures/trusteeBenchmark/questions.yaml @@ -0,0 +1,226 @@ +# Trustee Sub-Agent Benchmark -- 19 questions analog Hein 2025 +# +# Each question covers ONE expected hallucination class so we can attribute +# accuracy gains to specific phases (validator / ontology). +# +# Scoring per question (all binary unless noted): +# patternOk -- did the agent call the right tool(s) with the right filters? +# forbidOk -- did it AVOID the forbidden tool/op (e.g. SUM closingBalance)? +# numericOk -- does the final answer contain the expected number(s)? +# accuracyOk -- patternOk AND forbidOk AND numericOk +# +# tolerance: relative tolerance for numeric comparison (default 0.005 = 0.5 %). + +- id: q01 + question: "Was ist der Banksaldo per 31.12.2025 fuer das ZKB-Konto 1020?" + intent: BANK_BALANCE_AT_DATE + expectedTools: [queryTable] + expectedTable: TrusteeDataAccountBalance + requiredFilters: + accountNumber: "1020" + periodYear: 2025 + periodMonth: 0 + forbiddenTools: [aggregateTable] + expectedNumbers: [152400.0] + +- id: q02 + question: "Wie hoch ist die Hauptkasse (Konto 1000) per Ende 2024?" + intent: CASH_BALANCE_AT_DATE + expectedTools: [queryTable] + expectedTable: TrusteeDataAccountBalance + requiredFilters: + accountNumber: "1000" + periodYear: 2024 + periodMonth: 0 + forbiddenTools: [aggregateTable] + expectedNumbers: [5200.0] + +- id: q03 + question: "Summiere alle Bankkonten (102x) per 31.12.2025." + intent: BANK_GROUP_TOTAL_AT_DATE + expectedTools: [queryTable] + expectedTable: TrusteeDataAccountBalance + requiredFilters: + periodYear: 2025 + periodMonth: 0 + accountNumberLike: "102%" + forbiddenTools: [aggregateTable] + expectedNumbers: [180500.0] + numericTolerance: 0.01 + +- id: q04 + question: "Wie hat sich der Schlusssaldo des ZKB-Kontos 1020 ueber die Jahre 2023 bis 2025 entwickelt?" + intent: BALANCE_HISTORY_PER_YEAR + expectedTools: [queryTable] + expectedTable: TrusteeDataAccountBalance + requiredFilters: + accountNumber: "1020" + periodMonth: 0 + forbiddenTools: [aggregateTable] + expectedNumbers: [132500.0, 148900.0, 152400.0] + +- id: q05 + question: "Welches Konto hatte 2025 den hoechsten Schlusssaldo bei den Aktiven (1xxx)?" + intent: TOP_ASSET_AT_DATE + expectedTools: [queryTable] + expectedTable: TrusteeDataAccountBalance + requiredFilters: + periodYear: 2025 + periodMonth: 0 + accountNumberLike: "1%" + forbiddenTools: [aggregateTable] + expectedAnswerContains: ["1020"] + expectedNumbers: [152400.0] + +- id: q06 + question: "Welche Konten gehoeren zu den Bankkonten (102x)?" + intent: ACCOUNT_LIST_FILTER + expectedTools: [queryTable] + expectedTable: TrusteeDataAccount + requiredFilters: + accountNumberLike: "102%" + forbiddenTools: [aggregateTable] + expectedAnswerContains: ["1020", "1021"] + +- id: q07 + question: "Wie hoch war der Materialaufwand (Konto 5400) im Jahr 2025?" + intent: EXPENSE_AT_YEAR + expectedTools: [queryTable] + expectedTable: TrusteeDataAccountBalance + requiredFilters: + accountNumber: "5400" + periodYear: 2025 + periodMonth: 0 + forbiddenTools: [aggregateTable] + expectedNumbers: [112100.0] + +- id: q08 + question: "Wie viele Buchungszeilen gibt es insgesamt im System?" + intent: COUNT_ROWS + expectedTools: [aggregateTable] + expectedTable: TrusteeDataJournalLine + expectedAggregate: COUNT + forbiddenTools: [] + expectedNumbers: [20] + +- id: q09 + question: "Wie hoch ist der gesamte Beratungsertrag (Konto 3000) im Jahr 2025?" + intent: REVENUE_AT_YEAR + expectedTools: [queryTable] + expectedTable: TrusteeDataAccountBalance + requiredFilters: + accountNumber: "3000" + periodYear: 2025 + periodMonth: 0 + forbiddenTools: [aggregateTable] + expectedNumbers: [488700.0] + +- id: q10 + question: "Wie viel wurde 2025 auf das Materialaufwand-Konto 5400 gebucht (Soll-Summe ueber Buchungszeilen)?" + intent: JOURNAL_SUM_AT_ACCOUNT + expectedTools: [aggregateTable] + expectedTable: TrusteeDataJournalLine + expectedAggregate: SUM + expectedAggregateField: debitAmount + requiredFilters: + accountNumber: "5400" + forbiddenTools: [] + expectedNumbers: [29400.0] + numericTolerance: 0.01 + +- id: q11 + question: "Welche Buchungen im 1. Quartal 2025 (Januar bis Maerz) wurden auf Konto 3000 gebucht?" + intent: JOURNAL_LINES_BY_ACCOUNT + expectedTools: [queryTable] + expectedTable: TrusteeDataJournalLine + requiredFilters: + accountNumber: "3000" + forbiddenTools: [aggregateTable] + expectedAnswerContains: ["18500", "ACME"] + +- id: q12 + question: "Wie hoch war die Hauptkasse (Konto 1000) jeweils per Ende Maerz 2025 und per Ende Juni 2025?" + intent: MULTI_MONTH_SNAPSHOT + expectedTools: [queryTable] + expectedTable: TrusteeDataAccountBalance + requiredFilters: + accountNumber: "1000" + periodYear: 2025 + forbiddenTools: [aggregateTable] + expectedNumbers: [5375.0, 5550.0] + numericTolerance: 0.01 + +- id: q13 + question: "Wie hoch ist die Summe aller Aufwandskonten (5xxx und 6xxx) per Ende 2025?" + intent: EXPENSE_GROUP_TOTAL + expectedTools: [queryTable] + expectedTable: TrusteeDataAccountBalance + requiredFilters: + periodYear: 2025 + periodMonth: 0 + forbiddenTools: [aggregateTable] + expectedNumbers: [154100.0] + numericTolerance: 0.01 + +- id: q14 + question: "Welches Konto hat den hoechsten openingBalance fuer 2025?" + intent: TOP_OPENING_BALANCE + # Both routes are legitimate: queryTable+orderBy+limit=1, or + # aggregateTable(MAX) followed by queryTable lookup. We only insist that + # the final answer names the right account and (optionally) the value. + expectedTools: [queryTable, aggregateTable] + expectedTable: TrusteeDataAccountBalance + forbiddenTools: [] + expectedAnswerContains: ["3000"] + expectedNumbers: [462500.0] + +- id: q15 + question: "Liste alle Konten vom Typ asset auf." + intent: ACCOUNTS_BY_TYPE + expectedTools: [queryTable] + expectedTable: TrusteeDataAccount + requiredFilters: + accountType: "asset" + forbiddenTools: [aggregateTable] + expectedAnswerContains: ["1000", "1020", "1021", "1100"] + +- id: q16 + question: "Wie hoch ist der Schlusssaldo der Forderungen aus Lieferungen und Leistungen (Konto 1100) per Ende 2025?" + intent: BALANCE_BY_NAME_LOOKUP + expectedTools: [queryTable] + expectedTable: TrusteeDataAccountBalance + requiredFilters: + accountNumber: "1100" + periodYear: 2025 + periodMonth: 0 + forbiddenTools: [aggregateTable] + expectedNumbers: [66800.0] + +- id: q17 + question: "Wie hoch waren die Verbindlichkeiten (Konto 2000) jeweils per Ende 2023, 2024 und 2025?" + intent: LIABILITY_HISTORY + expectedTools: [queryTable] + expectedTable: TrusteeDataAccountBalance + requiredFilters: + accountNumber: "2000" + periodMonth: 0 + forbiddenTools: [aggregateTable] + expectedNumbers: [41300.0, 44100.0, 47900.0] + +- id: q18 + question: "Wie viele Bankkonten gibt es im Kontenplan (102x)?" + intent: ACCOUNT_COUNT_BY_PREFIX + expectedTools: [queryTable, aggregateTable] + expectedTable: TrusteeDataAccount + requiredFilters: + accountNumberLike: "102%" + forbiddenTools: [] + expectedNumbers: [2] + +- id: q19 + question: "Gib mir alle Buchungszeilen mit einem Sollbetrag groesser als 20'000 CHF." + intent: JOURNAL_LINES_BY_AMOUNT + expectedTools: [queryTable] + expectedTable: TrusteeDataJournalLine + forbiddenTools: [aggregateTable] + expectedAnswerContains: ["24300", "32100", "41500", "28700"] diff --git a/tests/unit/serviceAgent/test_agentTrace_repairCounters.py b/tests/unit/serviceAgent/test_agentTrace_repairCounters.py new file mode 100644 index 00000000..4a0909d1 --- /dev/null +++ b/tests/unit/serviceAgent/test_agentTrace_repairCounters.py @@ -0,0 +1,112 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Unit tests for the repair-loop telemetry aggregation in agentLoop. + +These counters (`validationFailures`, `repairAttempts`, `successAfterRepair`) +land on `AgentTrace` and are surfaced via the `AGENT_SUMMARY` event. The +Eval-Harness (Phase 1.5) reads them to compute the repair conversion rate. +""" + +from __future__ import annotations + +from modules.serviceCenter.services.serviceAgent.agentLoop import _computeRepairCounters +from modules.serviceCenter.services.serviceAgent.datamodelAgent import ( + AgentRoundLog, ToolCallLog, +) + + +def _round(*toolCalls: ToolCallLog) -> AgentRoundLog: + return AgentRoundLog(roundNumber=1, toolCalls=list(toolCalls)) + + +def _failed(toolName: str, code: str) -> ToolCallLog: + return ToolCallLog( + toolName=toolName, + success=False, + validationFailureCode=code, + error=f"{code}: ...", + ) + + +def _ok(toolName: str) -> ToolCallLog: + return ToolCallLog(toolName=toolName, success=True) + + +def test_computeRepairCounters_emptyTrace(): + fails, attempts, succeeded = _computeRepairCounters([]) + assert (fails, attempts, succeeded) == (0, 0, 0) + + +def test_computeRepairCounters_allCleanRunsHaveZeroCounters(): + rounds = [ + _round(_ok("queryTable"), _ok("browseTable")), + _round(_ok("aggregateTable")), + ] + fails, attempts, succeeded = _computeRepairCounters(rounds) + assert (fails, attempts, succeeded) == (0, 0, 0) + + +def test_computeRepairCounters_singleFailureCountsButNoRepairYet(): + """One failure in round 1, no follow-up call -- counts the failure but + nothing else. Repair only counts when the LLM tries again.""" + rounds = [_round(_failed("queryTable", "FIELD_NOT_FOUND"))] + fails, attempts, succeeded = _computeRepairCounters(rounds) + assert (fails, attempts, succeeded) == (1, 0, 0) + + +def test_computeRepairCounters_repairThatSucceeds(): + """Round 1 fails, round 2 retries same tool successfully.""" + rounds = [ + _round(_failed("queryTable", "FIELD_NOT_FOUND")), + _round(_ok("queryTable")), + ] + fails, attempts, succeeded = _computeRepairCounters(rounds) + assert (fails, attempts, succeeded) == (1, 1, 1) + + +def test_computeRepairCounters_repairThatFailsAgain(): + """Round 1 fails, round 2 retries same tool but fails validation again.""" + rounds = [ + _round(_failed("queryTable", "FIELD_NOT_FOUND")), + _round(_failed("queryTable", "FIELD_NOT_FOUND")), + ] + fails, attempts, succeeded = _computeRepairCounters(rounds) + assert (fails, attempts, succeeded) == (2, 1, 0) + + +def test_computeRepairCounters_siblingCallsInSameRoundAreNotRepairs(): + """When the LLM emits two queryTable calls in the same round, the + second is NOT a repair attempt -- it had no way to see the first + one's rejection yet (parallel dispatch within a round).""" + rounds = [ + _round( + _failed("queryTable", "FIELD_NOT_FOUND"), + _failed("queryTable", "FIELD_NOT_FOUND"), + ), + ] + fails, attempts, succeeded = _computeRepairCounters(rounds) + assert (fails, attempts, succeeded) == (2, 0, 0) + + +def test_computeRepairCounters_differentToolNamesAreIndependent(): + """A queryTable failure does not flag a later browseTable as a repair.""" + rounds = [ + _round(_failed("queryTable", "FIELD_NOT_FOUND")), + _round(_ok("browseTable")), + ] + fails, attempts, succeeded = _computeRepairCounters(rounds) + assert (fails, attempts, succeeded) == (1, 0, 0) + + +def test_computeRepairCounters_multiToolMix(): + """Trustee-like sequence: SUM(closingBalance) rejected, LLM switches to + queryTable with a typo (rejected), then fixes the typo (success).""" + rounds = [ + _round(_failed("aggregateTable", "INVALID_AGGREGATE_TARGET")), + _round(_failed("queryTable", "FIELD_NOT_FOUND")), + _round(_ok("queryTable")), + ] + fails, attempts, succeeded = _computeRepairCounters(rounds) + # 2 validation failures total, 1 prior-rejected queryTable retry that + # succeeded; aggregateTable was never retried so no attempt counted for it. + assert (fails, attempts, succeeded) == (2, 1, 1) diff --git a/tests/unit/services/test_featureDataAgent_schema.py b/tests/unit/services/test_featureDataAgent_schema.py index ef37753b..616f46cc 100644 --- a/tests/unit/services/test_featureDataAgent_schema.py +++ b/tests/unit/services/test_featureDataAgent_schema.py @@ -19,11 +19,18 @@ asked for the closing balance per period). from __future__ import annotations +import asyncio +from unittest.mock import MagicMock + import pytest from modules.shared import fkRegistry +from modules.serviceCenter.services.serviceAgent.datamodelAgent import ( + ToolCallRequest, ToolResult, +) from modules.serviceCenter.services.serviceAgent.featureDataAgent import ( _buildSchemaContext, + _buildSubAgentTools, _buildTableSchemaBlock, _formatFieldLine, _summarizePythonType, @@ -152,10 +159,29 @@ def test_buildSchemaContext_forbidsSummingAggregateFields(): assert "closingBalance" in prompt -def test_buildSchemaContext_appendsTrusteeDomainHints(): - """When the feature module exposes getAgentDomainHints(), the schema prompt - must include those hints so the sub-agent knows e.g. that 102x are bank - accounts and periodMonth=0 is the annual total.""" +def test_buildSchemaContext_appendsTrusteeOntologyBlock(monkeypatch): + """When the feature exposes getAgentOntology(), the schema prompt must + include the compiled ontology block (Phase 2 path).""" + monkeypatch.delenv("POWERON_DISABLE_FEATURE_ONTOLOGY", raising=False) + selected = [_trusteeAccountBalanceObj()] + prompt = _buildSchemaContext( + featureCode="trustee", + instanceLabel="Demo AG", + selectedTables=selected, + requestLang="de", + ) + assert "DOMAIN ONTOLOGY (trustee):" in prompt + assert "BankAccount" in prompt + assert "NEVER_AGGREGATE on TrusteeDataAccountBalance.closingBalance" in prompt.replace("never aggregate", "NEVER_AGGREGATE") + assert "BANK_BALANCE_AT_DATE" in prompt + + +def test_buildSchemaContext_fallsBackToLegacyHints_whenOntologyDisabled(monkeypatch): + """With POWERON_DISABLE_FEATURE_ONTOLOGY=1 the builder must fall back to + the legacy `getAgentDomainHints()` block. This is the path used by the + eval harness to measure `baseline` and `phase1` accuracy without the + ontology-driven prompt.""" + monkeypatch.setenv("POWERON_DISABLE_FEATURE_ONTOLOGY", "1") selected = [_trusteeAccountBalanceObj()] prompt = _buildSchemaContext( featureCode="trustee", @@ -164,16 +190,14 @@ def test_buildSchemaContext_appendsTrusteeDomainHints(): requestLang="de", ) assert "TRUSTEE DOMAIN HINTS" in prompt + assert "DOMAIN ONTOLOGY" not in prompt assert "102x Bank / Post" in prompt - assert "periodMonth = 0" in prompt - assert "ANTI-PATTERNS" in prompt - assert 'LIKE \'102%\'' in prompt or "LIKE '102%'" in prompt -def test_buildSchemaContext_skipsHintsForFeaturesWithoutHook(): - """Features that don't export getAgentDomainHints() should produce a prompt - without the trailing hints block. Verified by using a feature code that - cannot resolve to a main module (registry returns None).""" +def test_buildSchemaContext_skipsHintsForFeaturesWithoutHook(monkeypatch): + """Features that don't export getAgentDomainHints()/getAgentOntology() + should produce a prompt without any trailing hints block.""" + monkeypatch.delenv("POWERON_DISABLE_FEATURE_ONTOLOGY", raising=False) selected = [_trusteeAccountBalanceObj()] prompt = _buildSchemaContext( featureCode="nosuchfeature", @@ -182,4 +206,90 @@ def test_buildSchemaContext_skipsHintsForFeaturesWithoutHook(): requestLang="de", ) assert "TRUSTEE DOMAIN HINTS" not in prompt + assert "DOMAIN ONTOLOGY" not in prompt assert "Keep your answer SHORT" in prompt + + +# ------------------------------------------------------------------ +# Validator integration (Phase 1: Repair-Loop) +# +# These tests guard that pre-execute validation fires BEFORE the provider +# is touched, and that the structured error payload reaches the LLM via +# `ToolResult.errorDetails` -- the contract the LLM relies on for repair. +# ------------------------------------------------------------------ + + +def _buildRegistryWithMockProvider(): + """Build a sub-agent ToolRegistry where the provider is a MagicMock. + + The mock records calls so we can assert the validator short-circuits + before the DB layer is reached.""" + provider = MagicMock() + provider.browseTable.return_value = {"rows": [], "total": 0, "limit": 50, "offset": 0} + provider.queryTable.return_value = {"rows": [], "total": 0, "limit": 50, "offset": 0} + provider.aggregateTable.return_value = {"rows": [], "aggregate": "SUM", "field": "x"} + registry = _buildSubAgentTools( + provider=provider, + featureInstanceId="fi-test", + mandateId="m-test", + tableFilters=None, + validator=None, + ) + return registry, provider + + +def _dispatchSync(registry, toolName, args): + """Synchronously dispatch a tool call through the registry.""" + call = ToolCallRequest(name=toolName, args=args) + loop = asyncio.new_event_loop() + try: + return loop.run_until_complete(registry.dispatch(call, context={})) + finally: + loop.close() + + +def test_subAgentTools_invalidFieldShortCircuitsBeforeProvider(): + """A queryTable call with an unknown field must NOT reach the provider.""" + registry, provider = _buildRegistryWithMockProvider() + result = _dispatchSync(registry, "queryTable", { + "tableName": "TrusteeDataAccountBalance", + "filters": [{"field": "klosingBalance", "op": "=", "value": 1}], + }) + assert isinstance(result, ToolResult) + assert result.success is False + assert result.errorDetails is not None + assert result.errorDetails["code"] == "FIELD_NOT_FOUND" + assert result.errorDetails["suggestion"] == "closingBalance" + assert result.error and result.error.startswith("FIELD_NOT_FOUND:") + provider.queryTable.assert_not_called() + + +def test_subAgentTools_sumClosingBalanceShortCircuits(): + """The flagship hallucination -- SUM(closingBalance) -- must be blocked + by the pre-execute validator before the DB is touched.""" + registry, provider = _buildRegistryWithMockProvider() + result = _dispatchSync(registry, "aggregateTable", { + "tableName": "TrusteeDataAccountBalance", + "aggregate": "SUM", + "field": "closingBalance", + }) + assert result.success is False + assert result.errorDetails["code"] == "INVALID_AGGREGATE_TARGET" + assert result.errorDetails["field"] == "closingBalance" + provider.aggregateTable.assert_not_called() + + +def test_subAgentTools_validCallReachesProvider(): + """Sanity: a valid call passes the validator and hits the provider.""" + registry, provider = _buildRegistryWithMockProvider() + result = _dispatchSync(registry, "queryTable", { + "tableName": "TrusteeDataAccountBalance", + "filters": [ + {"field": "periodYear", "op": "=", "value": 2025}, + {"field": "periodMonth", "op": "=", "value": 0}, + ], + "fields": ["accountNumber", "closingBalance"], + }) + assert result.success is True + assert result.errorDetails is None + provider.queryTable.assert_called_once() diff --git a/tests/unit/services/test_queryValidator.py b/tests/unit/services/test_queryValidator.py new file mode 100644 index 00000000..40c8f444 --- /dev/null +++ b/tests/unit/services/test_queryValidator.py @@ -0,0 +1,295 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Unit tests for the Feature Data Sub-Agent QueryValidator. + +Each constraint is exercised with both a Happy and a Sad path so a future +refactor that silently drops a check is caught immediately. + +Test fixture is the real ``TrusteeDataAccountBalance`` / ``TrusteeDataJournalLine`` +Pydantic models -- both are perfectly suited because they cover all four +constraint classes in production-realistic shape (string fields, numeric +fields, fields named ``closingBalance`` / ``debitTotal``). +""" + +from __future__ import annotations + +import pytest + +from modules.shared import fkRegistry +from modules.serviceCenter.services.serviceAgent.datamodelOntology import ( + Constraint, + ConstraintRule, + OntologyDescriptor, + ValidationErrorCode, +) +from modules.serviceCenter.services.serviceAgent.queryValidator import QueryValidator + + +@pytest.fixture(scope="module", autouse=True) +def _ensureModels(): + fkRegistry._ensureModelsLoaded() + + +@pytest.fixture() +def validator() -> QueryValidator: + return QueryValidator() + + +# ------------------------------------------------------------------ +# FieldExists -- browseTable / queryTable / aggregateTable +# ------------------------------------------------------------------ + + +def test_browseQuery_happyPath_returnsNone(validator): + err = validator.validateBrowseQuery( + "TrusteeDataAccountBalance", + {"fields": ["accountNumber", "closingBalance"]}, + ) + assert err is None + + +def test_browseQuery_invalidField_returnsFieldNotFound(validator): + err = validator.validateBrowseQuery( + "TrusteeDataAccountBalance", + {"fields": ["closingBlance"]}, # typo + ) + assert err is not None + assert err.code == ValidationErrorCode.FIELD_NOT_FOUND + assert err.field == "closingBlance" + assert err.suggestion == "closingBalance" + + +def test_queryTable_filterOnInvalidField_returnsFieldNotFound(validator): + err = validator.validateQueryTable( + "TrusteeDataAccountBalance", + {"filters": [{"field": "klosingBalance", "op": "=", "value": 100}]}, + ) + assert err is not None + assert err.code == ValidationErrorCode.FIELD_NOT_FOUND + assert err.suggestion == "closingBalance" + + +def test_queryTable_unknownTable_isLenient(validator): + """When the table isn't in MODEL_REGISTRY we skip validation -- relying on + the SQL layer to surface schema errors. Prevents false positives for + pure UDB tables not exposed via Pydantic.""" + err = validator.validateQueryTable( + "NoSuchTable123", + {"filters": [{"field": "anything", "op": "=", "value": 1}]}, + ) + assert err is None + + +# ------------------------------------------------------------------ +# OperatorCompatible +# ------------------------------------------------------------------ + + +def test_queryTable_likeOnStringField_isOk(validator): + err = validator.validateQueryTable( + "TrusteeDataAccountBalance", + {"filters": [{"field": "accountNumber", "op": "LIKE", "value": "102%"}]}, + ) + assert err is None + + +def test_queryTable_likeOnNumericField_isOperatorIncompatible(validator): + err = validator.validateQueryTable( + "TrusteeDataAccountBalance", + {"filters": [{"field": "closingBalance", "op": "LIKE", "value": "100%"}]}, + ) + assert err is not None + assert err.code == ValidationErrorCode.OPERATOR_INCOMPATIBLE + assert err.field == "closingBalance" + + +def test_queryTable_gteOnNumericField_isOk(validator): + err = validator.validateQueryTable( + "TrusteeDataAccountBalance", + {"filters": [{"field": "closingBalance", "op": ">=", "value": 100}]}, + ) + assert err is None + + +def test_queryTable_gteOnStringField_isOperatorIncompatible(validator): + err = validator.validateQueryTable( + "TrusteeDataAccountBalance", + {"filters": [{"field": "currency", "op": ">=", "value": "CHF"}]}, + ) + assert err is not None + assert err.code == ValidationErrorCode.OPERATOR_INCOMPATIBLE + + +def test_queryTable_equalsOnAnyField_isOk(validator): + """`=` and `!=` work on any field type.""" + err = validator.validateQueryTable( + "TrusteeDataAccountBalance", + {"filters": [{"field": "currency", "op": "=", "value": "CHF"}]}, + ) + assert err is None + + +def test_queryTable_isNullOnAnyField_isOk(validator): + err = validator.validateQueryTable( + "TrusteeDataAccountBalance", + {"filters": [{"field": "mandateId", "op": "IS NULL", "value": None}]}, + ) + assert err is None + + +# ------------------------------------------------------------------ +# AggregateTarget -- the highest-impact rule +# ------------------------------------------------------------------ + + +def test_aggregate_sumDebitAmount_isOk(validator): + err = validator.validateAggregateQuery( + "TrusteeDataJournalLine", + {"aggregate": "SUM", "field": "debitAmount"}, + ) + assert err is None + + +def test_aggregate_sumClosingBalance_isInvalidAggregateTarget(validator): + """The flagship bug: SUM(closingBalance) across periods. Must be blocked.""" + err = validator.validateAggregateQuery( + "TrusteeDataAccountBalance", + {"aggregate": "SUM", "field": "closingBalance"}, + ) + assert err is not None + assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET + assert err.field == "closingBalance" + assert "already aggregated" in err.hint + + +def test_aggregate_avgDebitTotal_isInvalidAggregateTarget(validator): + """`*Total` columns are turnovers per period -- AVG across periods is nonsense.""" + err = validator.validateAggregateQuery( + "TrusteeDataAccountBalance", + {"aggregate": "AVG", "field": "debitTotal"}, + ) + assert err is not None + assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET + + +def test_aggregate_countClosingBalance_isOk(validator): + """COUNT on a balance column is meaningful (how many balance rows exist).""" + err = validator.validateAggregateQuery( + "TrusteeDataAccountBalance", + {"aggregate": "COUNT", "field": "closingBalance"}, + ) + assert err is None + + +def test_aggregate_sumOnStringField_isTypeMismatch(validator): + err = validator.validateAggregateQuery( + "TrusteeDataAccountBalance", + {"aggregate": "SUM", "field": "currency"}, + ) + assert err is not None + assert err.code == ValidationErrorCode.TYPE_MISMATCH + + +def test_aggregate_invalidField_returnsFieldNotFound(validator): + err = validator.validateAggregateQuery( + "TrusteeDataAccountBalance", + {"aggregate": "SUM", "field": "nonExistent"}, + ) + assert err is not None + assert err.code == ValidationErrorCode.FIELD_NOT_FOUND + + +def test_aggregate_invalidGroupBy_returnsFieldNotFound(validator): + err = validator.validateAggregateQuery( + "TrusteeDataJournalLine", + {"aggregate": "SUM", "field": "debitAmount", "groupBy": "ghostColumn"}, + ) + assert err is not None + assert err.code == ValidationErrorCode.FIELD_NOT_FOUND + + +# ------------------------------------------------------------------ +# OrderByValid +# ------------------------------------------------------------------ + + +def test_queryTable_orderByValid_isOk(validator): + err = validator.validateQueryTable( + "TrusteeDataAccountBalance", + {"orderBy": "periodYear"}, + ) + assert err is None + + +def test_queryTable_orderByInvalid_returnsOrderByInvalid(validator): + err = validator.validateQueryTable( + "TrusteeDataAccountBalance", + {"orderBy": "periodYr"}, + ) + assert err is not None + assert err.code == ValidationErrorCode.ORDER_BY_INVALID + assert err.suggestion == "periodYear" + + +def test_queryTable_orderByLiteralStringNone_isOk(validator): + """LLMs sometimes pass the literal string 'None'.""" + err = validator.validateQueryTable( + "TrusteeDataAccountBalance", + {"orderBy": "None"}, + ) + assert err is None + + +# ------------------------------------------------------------------ +# Ontology-driven override (Phase 2 readiness check) +# ------------------------------------------------------------------ + + +def test_ontologyOverride_blocksAggregateForOntologyField(): + """When the ontology marks a field NEVER_AGGREGATE, SUM/AVG is blocked + even if the field name doesn't match the convention suffixes.""" + ontology = OntologyDescriptor( + featureCode="trustee", + constraints=[ + Constraint( + appliesTo="TrusteeDataJournalLine.debitAmount", + rule=ConstraintRule.NEVER_AGGREGATE, + message="Synthetic test rule.", + ) + ], + ) + validatorWithOntology = QueryValidator(ontology=ontology) + err = validatorWithOntology.validateAggregateQuery( + "TrusteeDataJournalLine", + {"aggregate": "SUM", "field": "debitAmount"}, + ) + assert err is not None + assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET + + +# ------------------------------------------------------------------ +# QueryValidationError serialization (consumed by featureDataAgent) +# ------------------------------------------------------------------ + + +def test_validationError_toShortErrorIncludesCodeAndField(validator): + err = validator.validateAggregateQuery( + "TrusteeDataAccountBalance", + {"aggregate": "SUM", "field": "closingBalance"}, + ) + assert err is not None + short = err.toShortError() + assert short.startswith("INVALID_AGGREGATE_TARGET:") + assert "closingBalance" in short + + +def test_validationError_toErrorDetailsHasFourKeys(validator): + err = validator.validateQueryTable( + "TrusteeDataAccountBalance", + {"filters": [{"field": "klosingBalance", "op": "=", "value": 0}]}, + ) + assert err is not None + details = err.toErrorDetails() + assert set(details.keys()) == {"code", "field", "suggestion", "hint"} + assert details["code"] == "FIELD_NOT_FOUND" + assert details["suggestion"] == "closingBalance" diff --git a/tests/unit/services/test_trusteeOntology.py b/tests/unit/services/test_trusteeOntology.py new file mode 100644 index 00000000..887f69a4 --- /dev/null +++ b/tests/unit/services/test_trusteeOntology.py @@ -0,0 +1,199 @@ +# Copyright (c) 2026 Patrick Motsch +# All rights reserved. +"""Unit tests for the trustee ontology and the ontology-to-prompt compiler. + +Verifies: + +* the descriptor passes Pydantic validation +* `constraintsForTable` correctly scopes by table/field prefix +* the compiler emits a stable header + every entity name + every + constraint message +* the QueryValidator picks up ontology constraints (NEVER_AGGREGATE on + closingBalance) over the convention-based defaults +* the `getAgentOntology()` hook on `mainTrustee` returns the descriptor +* `_buildValidatorForFeature("trustee")` wires the validator with the + ontology +""" + +from __future__ import annotations + +import pytest + +from modules.features.trustee.mainTrustee import getAgentOntology +from modules.features.trustee.trusteeOntology import getTrusteeOntology +from modules.serviceCenter.services.serviceAgent.datamodelOntology import ( + ConstraintRule, + OntologyDescriptor, + SemanticType, + ValidationErrorCode, +) +from modules.serviceCenter.services.serviceAgent.featureDataAgent import ( + _buildValidatorForFeature, + _loadFeatureOntologyBlock, +) +from modules.serviceCenter.services.serviceAgent.ontologyToPromptCompiler import ( + compileOntologyToPrompt, +) +from modules.serviceCenter.services.serviceAgent.queryValidator import QueryValidator +from modules.shared import fkRegistry + + +@pytest.fixture(scope="module", autouse=True) +def _ensureModels(): + fkRegistry._ensureModelsLoaded() + + +# --------------------------------------------------------------------------- +# OntologyDescriptor structure +# --------------------------------------------------------------------------- + + +def test_trusteeOntology_returnsValidDescriptor(): + ont = getTrusteeOntology() + assert isinstance(ont, OntologyDescriptor) + assert ont.featureCode == "trustee" + assert ont.entities and ont.relations and ont.constraints and ont.canonicalPatterns + + +def test_trusteeOntology_hasBankAccountSpecialization(): + ont = getTrusteeOntology() + bank = next((e for e in ont.entities if e.name == "BankAccount"), None) + assert bank is not None + assert bank.parentEntity == "Account" + assert bank.semanticType == SemanticType.ACCOUNT + + +def test_trusteeOntology_closingBalanceIsNeverAggregate(): + ont = getTrusteeOntology() + constraints = ont.constraintsForTable("TrusteeDataAccountBalance") + matching = [ + c for c in constraints + if c.rule == ConstraintRule.NEVER_AGGREGATE + and c.appliesTo == "TrusteeDataAccountBalance.closingBalance" + ] + assert matching, "Expected NEVER_AGGREGATE constraint on closingBalance" + + +def test_trusteeOntology_requiresPeriodFilterOnBalanceTable(): + ont = getTrusteeOntology() + constraints = ont.constraintsForTable("TrusteeDataAccountBalance") + table_level = [c for c in constraints if c.rule == ConstraintRule.REQUIRES_FILTER_ON] + assert table_level, "Expected at least one REQUIRES_FILTER_ON constraint" + required = table_level[0].params.get("requiredFields") or [] + assert "periodYear" in required + assert "periodMonth" in required + + +def test_constraintsForTable_filtersScopeCorrectly(): + ont = getTrusteeOntology() + bal = ont.constraintsForTable("TrusteeDataAccountBalance") + journal = ont.constraintsForTable("TrusteeDataJournalLine") + for c in bal: + assert c.appliesTo.startswith("TrusteeDataAccountBalance") + for c in journal: + assert c.appliesTo.startswith("TrusteeDataJournalLine") + + +# --------------------------------------------------------------------------- +# Prompt compiler +# --------------------------------------------------------------------------- + + +def test_compiler_emitsExpectedHeader(): + block = compileOntologyToPrompt(getTrusteeOntology()) + assert block.startswith("DOMAIN ONTOLOGY (trustee):"), block.splitlines()[0] + + +def test_compiler_includesAllEntityNames(): + ont = getTrusteeOntology() + block = compileOntologyToPrompt(ont) + for e in ont.entities: + assert e.name in block, f"Entity {e.name} missing from compiled prompt" + + +def test_compiler_includesAllConstraintMessages(): + ont = getTrusteeOntology() + block = compileOntologyToPrompt(ont) + for c in ont.constraints: + assert c.message.split(".")[0] in block, f"Constraint message missing: {c.message[:40]}" + + +def test_compiler_includesCanonicalPatternTools(): + ont = getTrusteeOntology() + block = compileOntologyToPrompt(ont) + for p in ont.canonicalPatterns: + assert p.intent in block + assert p.pattern["tool"] in block + + +def test_compiler_deterministic(): + block1 = compileOntologyToPrompt(getTrusteeOntology()) + block2 = compileOntologyToPrompt(getTrusteeOntology()) + assert block1 == block2 + + +# --------------------------------------------------------------------------- +# QueryValidator x ontology integration +# --------------------------------------------------------------------------- + + +def test_validator_picksUpOntologyNeverAggregate(): + validator = QueryValidator(ontology=getTrusteeOntology()) + err = validator.validateAggregateQuery( + "TrusteeDataAccountBalance", + {"aggregate": "SUM", "field": "closingBalance"}, + ) + assert err is not None + assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET + assert err.field == "closingBalance" + + +def test_validator_ontologyConstraintFiresOnDebitTotal(): + validator = QueryValidator(ontology=getTrusteeOntology()) + err = validator.validateAggregateQuery( + "TrusteeDataAccountBalance", + {"aggregate": "SUM", "field": "debitTotal"}, + ) + assert err is not None + assert err.code == ValidationErrorCode.INVALID_AGGREGATE_TARGET + + +def test_validator_allowsLegitimateAggregateOnJournalLine(): + validator = QueryValidator(ontology=getTrusteeOntology()) + err = validator.validateAggregateQuery( + "TrusteeDataJournalLine", + {"aggregate": "SUM", "field": "debitAmount"}, + ) + assert err is None + + +# --------------------------------------------------------------------------- +# featureDataAgent integration hooks +# --------------------------------------------------------------------------- + + +def test_mainTrustee_getAgentOntology_returnsDescriptor(): + ont = getAgentOntology() + assert isinstance(ont, OntologyDescriptor) + assert ont.featureCode == "trustee" + + +def test_loadFeatureOntologyBlock_returnsCompiledBlock(): + block = _loadFeatureOntologyBlock("trustee") + assert block.startswith("DOMAIN ONTOLOGY (trustee):") + assert "BankAccount" in block + + +def test_loadFeatureOntologyBlock_unknownFeatureReturnsEmpty(): + assert _loadFeatureOntologyBlock("doesNotExist") == "" + + +def test_buildValidatorForFeature_trustee_hasOntology(): + validator = _buildValidatorForFeature("trustee") + assert validator._ontology is not None + assert validator._ontology.featureCode == "trustee" + + +def test_buildValidatorForFeature_unknownFeature_noOntology(): + validator = _buildValidatorForFeature("doesNotExist") + assert validator._ontology is None