fixes private model and udb scoping sources
Some checks failed
Deploy Plattform-Core (Int) / test (push) Failing after 24s
Deploy Plattform-Core (Int) / deploy (push) Has been skipped

This commit is contained in:
ValueOn AG 2026-06-03 09:37:03 +02:00
parent 24899b0cf2
commit d61e29bcac
90 changed files with 2323 additions and 9697 deletions

29
app.py
View file

@ -282,7 +282,7 @@ initLogging()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
instanceLabel = APP_CONFIG.get("APP_ENV_LABEL") instanceLabel = APP_CONFIG.get("APP_ENV_LABEL")
# Pre-warm AI connectors on process load (before lifespan). Critical for chatbot latency. # Pre-warm AI connectors on process load (before lifespan). Critical for AI/agent latency.
try: try:
import modules.aicore.aicoreModelRegistry # noqa: F401 import modules.aicore.aicoreModelRegistry # noqa: F401
logger.info("AI connectors pre-warm (app load) triggered") logger.info("AI connectors pre-warm (app load) triggered")
@ -428,10 +428,20 @@ async def lifespan(app: FastAPI):
# --- Shutdown sequence (protected against CancelledError) --- # --- Shutdown sequence (protected against CancelledError) ---
try: try:
# 1. Stop scheduler first (removes all pending cron/interval jobs) # 1. Signal DB layer to abort in-flight borrow waits immediately.
# This MUST happen first so that sync worker threads stuck in
# _acquireConn (30 s poll loop) bail out within one backoff tick
# instead of blocking process exit for the full borrow timeout.
try:
from modules.connectors.connectorDbPostgre import closeAllPools
closeAllPools()
except Exception as e:
logger.warning(f"Closing DB connection pools failed: {e}")
# 2. Stop scheduler (removes all pending cron/interval jobs)
eventManager.stop() eventManager.stop()
# 2. Stop Feature Containers (Plug&Play) # 3. Stop Feature Containers (Plug&Play)
try: try:
mainModules = loadFeatureMainModules() mainModules = loadFeatureMainModules()
for featureName, module in mainModules.items(): for featureName, module in mainModules.items():
@ -444,14 +454,6 @@ async def lifespan(app: FastAPI):
except Exception as e: except Exception as e:
logger.warning(f"Could not shutdown feature containers: {e}") logger.warning(f"Could not shutdown feature containers: {e}")
# 3. Close all PostgreSQL connection pools (LAST -- features may still
# issue DB calls during their onStop hooks)
try:
from modules.connectors.connectorDbPostgre import closeAllPools
closeAllPools()
except Exception as e:
logger.warning(f"Closing DB connection pools failed: {e}")
logger.info("Application has been shut down") logger.info("Application has been shut down")
except asyncio.CancelledError: except asyncio.CancelledError:
@ -628,8 +630,7 @@ app.include_router(connectionsRouter)
from modules.routes.routeRagInventory import router as ragInventoryRouter from modules.routes.routeRagInventory import router as ragInventoryRouter
app.include_router(ragInventoryRouter) app.include_router(ragInventoryRouter)
from modules.routes.routeAdminSttBenchmark import router as sttBenchmarkRouter
app.include_router(sttBenchmarkRouter)
from modules.routes.routeTableViews import router as tableViewsRouter from modules.routes.routeTableViews import router as tableViewsRouter
app.include_router(tableViewsRouter) app.include_router(tableViewsRouter)
@ -745,4 +746,4 @@ if __name__ == "__main__":
], check=True) ], check=True)
except ImportError: except ImportError:
import uvicorn import uvicorn
uvicorn.run("app:app", host="0.0.0.0", port=port, workers=1, timeout_graceful_shutdown=5) uvicorn.run("app:app", host="0.0.0.0", port=port, workers=1, timeout_graceful_shutdown=2)

View file

@ -1,309 +0,0 @@
# Aufwandsschätzung Althaus Bot v2 -- Unabhängige Analyse
**Projekt:** Althaus Bot v2 -- Weiterentwicklung & neue Use Cases
**Kunde:** W. Althaus AG, Aarwangen
**Erstellt:** 13. April 2026
**Basis:** Code-Analyse Gateway-Repository + Offerte v2 vom 14.04.2026
**Methodik:** Bottom-Up-Schätzung auf Basis der bestehenden Implementierung, Dreipunktschätzung (Min / Mitte / Max)
---
## 1. Ist-Zustand der Implementierung
### 1.1 Architekturübersicht
```
┌─────────────────────────────────────────────────────────────────┐
│ React Frontend (SSE-Streaming, Chat-UI) │
└──────────────────────────┬──────────────────────────────────────┘
│ /api/chatbot/*
┌──────────────────────────▼──────────────────────────────────────┐
│ Gateway (Python/FastAPI) │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ Chatbot Feature (modules/features/chatbot/) │ │
│ │ ┌─────────┐ ┌──────────┐ ┌──────────┐ ┌────────┐ │ │
│ │ │ Planner │→ │ SQL Plan │→ │ Parse & │→ │Formul. │ │ │
│ │ │ Node │ │ Node │ │ Execute │ │ Node │ │ │
│ │ └────┬────┘ └──────────┘ └────┬─────┘ └────────┘ │ │
│ │ │ │ │ │
│ │ ├→ Tavily (Web Search) │ │ │
│ │ └→ Direct Answer │ │ │
│ └──────────────────────────────────┼──────────────────────┘ │
│ │ │
│ ┌──────────────────────────────────▼──────────────────────┐ │
│ │ PreprocessorConnector (HTTP POST → Azure SQL API) │ │
│ └─────────────────────────────────────────────────────────┘ │
│ │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ KnowledgeService (pgvector/RAG) -- NICHT IM CHATBOT │ │
│ │ Produktiv im AgentService + CommCoach │ │
│ └─────────────────────────────────────────────────────────┘ │
└──────────────────────────────────────────────────────────────────┘
┌──────────────────────────▼──────────────────────────────────────┐
│ Azure Preprocessing Server (deployed, ERP-Daten deaktiviert) │
│ Tabellen: Artikel, Einkaufspreis, Lagerplatz, Lagerplatz_Art. │
│ Repo: github.com/valueonag/gateway_preprocessing │
└─────────────────────────────────────────────────────────────────┘
```
### 1.2 Vorhandene Komponenten (Wiederverwendung)
| Komponente | Datei / Modul | Status | Wiederverwendbar für |
|---|---|---|---|
| LangGraph-Workflow | `chatbot/chatbot.py` | Produktiv (deaktiviert) | Alle Positionen -- Grundgerüst |
| PreprocessorConnector | `connectors/connectorPreprocessor.py` | Produktiv (deaktiviert) | Pos. 1, 2, 3, 4 -- SQL-Abfragen |
| ChatbotConfig | `chatbot/config.py` | Produktiv | Alle -- Konfiguration pro Instanz |
| Streaming-Bridge | `chatbot/service.py` | Produktiv | Alle -- SSE ans Frontend |
| ChatbotDocument | `chatbot/interfaceFeatureChatbot.py` | Implementiert | Pos. 1.4, 2.1, 2.5 -- File-Handling |
| KnowledgeService/RAG | `serviceCenter/services/serviceKnowledge/` | Produktiv (AgentService) | Pos. 5 -- Wiki-Integration |
| Automation-Template | `automation/subAutomationTemplates.py` | Produktiv | Pos. 6 -- Preprocessor-Updates |
| SQL-Sanitize | `chatbot.py``_sanitize_sql_typos` | Produktiv | Pos. 1.1 -- Gesperrte Artikel |
| Markdown-Tabellen | `chatbot.py``_tool_output_to_markdown_table` | Produktiv | Pos. 1.3, 3.3 -- Darstellung |
| File-Upload Backend | `service.py``_convert_file_ids_to_document_references` | Implementiert | Pos. 1.4 -- Upload-Pipeline |
| Excel-Export | `service.py``_create_chat_document_from_action_document` | Implementiert | Pos. 2.5 -- Kalktool-Export |
### 1.3 Fehlende Komponenten (Neuentwicklung)
| Komponente | Benötigt für | Komplexität |
|---|---|---|
| Matching-Engine (exakt → fuzzy → KI) | Pos. 2.2 | Hoch |
| Neuer Planner-Pfad "WIKI" | Pos. 5.2 | Mittel |
| KnowledgeService → Chatbot Integration | Pos. 5.2 | Mittel |
| Wiki-Connector (API/Crawling) | Pos. 5.1 | Unbekannt (Wiki-abhängig) |
| Delta-Sync-Mechanismus | Pos. 5.3 | Mittel |
| Preprocessor: 8-10 neue Tabellen/Views | Pos. 1.5, 3.1, 4.1 | Mittel (Code-Änderung) |
| Frontend: File-Picker, Drag&Drop | Pos. 1.4 | Mittel |
| Frontend: Thread-Liste, Suchfunktion | Pos. 1.2 | Mittel |
| Kalktool-Excel-Format-Export | Pos. 2.5 | Mittel |
| Schwellenwert-Insights | Pos. 4.5 | Mittel |
---
## 2. Detaillierte Aufwandsschätzung
### Position 1: Basics (Plattform-Verbesserungen)
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|---|---|:-:|:-:|:-:|:-:|---|
| 1.1 | Gesperrte Artikel filtern | 4 | 3 | 4 | 4 | System-Prompt + SQL-Sanitize-Regel. Kleine Änderung. |
| 1.2 | Chat-Verlauf speichern | 12 | 12 | 14 | 16 | Backend existiert. Frontend-Aufwand (Thread-Liste, Suche). |
| 1.3 | Längere Antworten | 6 | 4 | 5 | 6 | Streaming-Config + Frontend-Rendering. |
| 1.4 | Datei-Upload | 16 | 16 | 18 | 20 | Full-Stack: Drag&Drop + LangGraph-Integration + Extraktion. |
| 1.5 | Kundenartikelnummern | 8 | 10 | 12 | 14 | Preprocessor-Code + Prompt + Cross-Ref-Queries. ERP-abhängig. |
| 1.6 | Abklärungen & Testing | 8 | 8 | 8 | 8 | Standard. |
| | **Subtotal** | **54** | **53** | **61** | **68** | |
**Delta zur Offerte: +7h (Mitte) / +14h (Max)**
**Haupttreiber:** Preprocessor-Erweiterung für Kundenartikelnummern (Pos. 1.5) erfordert Code-Änderung, nicht nur Config. Frontend-Aufwand bei Upload (Pos. 1.4) eher am oberen Ende.
---
### Position 2: Use Case Kalktool
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|---|---|:-:|:-:|:-:|:-:|---|
| 2.1 | Stücklisten-Upload & Extraktion | 12 | 10 | 12 | 14 | Nutzt Pos. 1.4. serviceExtraction vorhanden. |
| 2.2 | Artikelidentifikation & Matching | 20 | 24 | 28 | 32 | **KRITISCH**: Neue Matching-Engine, 3 Stufen, ERP-abhängig. |
| 2.3 | Automatische Feldergänzung | 16 | 14 | 16 | 18 | Preprocessor + Enrichment-Logik. |
| 2.4 | Alternativartikel-Vorschläge | 12 | 12 | 14 | 16 | KI-Vorschläge + Bestätigungs-Workflow im Chat. |
| 2.5 | Excel-Export (Kalktool-Format) | 12 | 10 | 12 | 14 | Basis existiert. Kalktool-Vorlage-Anpassung. |
| 2.6 | Erweiterbarkeit neue Felder | 8 | 6 | 8 | 10 | Config-gesteuertes Feld-Mapping. |
| 2.7 | Abklärungen & Testing | 12 | 12 | 12 | 12 | Kalktool-Vorlage, Testdaten, UAT. |
| | **Subtotal** | **92** | **88** | **102** | **116** | |
**Delta zur Offerte: +10h (Mitte) / +24h (Max)**
**Haupttreiber:** Die Matching-Engine (Pos. 2.2) ist die komplexeste Neuentwicklung im gesamten Projekt. Mehrstufiges Matching (exakt → fuzzy → KI-gestützt) ohne bestehende Basis. Die Qualität hängt stark von der ERP-Datenqualität und der Vielfalt der Kunden-Stücklisten-Formate ab.
---
### Position 3: Use Case Materialmanagement 1
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|---|---|:-:|:-:|:-:|:-:|---|
| 3.1 | ERP-Daten erweitern | 16 | 16 | 19 | 22 | Preprocessor: Bestellungen, Wareneingänge, Aufträge. Code nötig. |
| 3.2 | System-Prompt Materialmanagement | 8 | 6 | 8 | 10 | Prompt-Engineering + SQL-Templates. |
| 3.3 | Transparente Statusübersicht | 8 | 6 | 7 | 8 | Markdown-Rendering existiert, Erweiterung nötig. |
| 3.4 | Auswirkungsanalyse & Empfehlungen | 12 | 14 | 16 | 18 | Cross-Table-Queries + KI-Analyse. Komplex. |
| 3.5 | Abklärungen & Testing | 8 | 8 | 8 | 8 | Standard. |
| | **Subtotal** | **52** | **50** | **58** | **66** | |
**Delta zur Offerte: +6h (Mitte) / +14h (Max)**
**Haupttreiber:** Auswirkungsanalyse (Pos. 3.4) erfordert Multi-Table-Joins und KI-gestützte Bewertung, was über einfache SQL-Abfragen hinausgeht.
---
### Position 4: Use Case Materialmanagement 2 (KPIs)
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|---|---|:-:|:-:|:-:|:-:|---|
| 4.1 | ERP-Daten erweitern | 16 | 16 | 19 | 22 | Lagerjournal, Preishistorie. Aggregierte Views. |
| 4.2 | System-Prompt KPI-Analyse | 8 | 6 | 8 | 10 | Prompt-Engineering. |
| 4.3 | Liefertermintreue-Analyse | 10 | 10 | 12 | 14 | Zeitreihen, Lieferantenvergleich, komplexe SQL. |
| 4.4 | Preisentwicklungs-Analyse | 10 | 10 | 11 | 12 | Preishistorie, Abweichungsberechnung. |
| 4.5 | Automatisierte Insights | 8 | 10 | 12 | 14 | Schwellenwert-Warnungen, proaktive Erkennung. Neues Konzept. |
| 4.6 | Abklärungen & Testing | 8 | 8 | 8 | 8 | Standard. |
| | **Subtotal** | **60** | **60** | **70** | **80** | |
**Delta zur Offerte: +10h (Mitte) / +20h (Max)**
**Haupttreiber:** Automatisierte Insights (Pos. 4.5) erfordern eine neue Logikschicht, die proaktiv Schwellenwerte überwacht und Empfehlungen generiert. Das ist im aktuellen Chat-Flow nicht vorgesehen.
---
### Position 5: Use Case Wiki-Anbindung
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|---|---|:-:|:-:|:-:|:-:|---|
| 5.1 | Wiki-Anbindung & Indexierung | 16 | 16 | 20 | 24 | KnowledgeService existiert. Wiki-Zugang UNBEKANNT. |
| 5.2 | RAG-Integration im Chatbot | 12 | 12 | 14 | 16 | Pattern existiert (AgentService), muss portiert werden. |
| 5.3 | Inkrementelle Aktualisierung | 8 | 8 | 11 | 14 | Delta-Sync stark Wiki-abhängig. |
| 5.4 | Abklärungen & Testing | 8 | 8 | 9 | 10 | Relevanz-Tuning ist iterativ. |
| | **Subtotal** | **44** | **44** | **54** | **64** | |
**Delta zur Offerte: +10h (Mitte) / +20h (Max)**
**Haupttreiber:** Wiki-System ist unbekannt. Bei Wiki mit guter API (Confluence, SharePoint) sind 44h erreichbar. Bei proprietärem System ohne API steigt der Aufwand erheblich.
**Synergie:** KnowledgeService mit pgvector, Chunking, Embedding und semanticSearch ist bereits produktiv. Die RAG-Pipeline (Ingestion → Embedding → Retrieval) muss nicht neu gebaut werden. Das spart geschätzt 20-30h gegenüber einer Neuentwicklung.
---
### Position 6: Azure-Migration
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|---|---|:-:|:-:|:-:|:-:|---|
| 6.1 | Migration Preprocessor | 6 | 4 | 6 | 8 | Config-Änderungen, Env-Files, Netzwerk. |
| 6.2 | Validierung & Smoke-Tests | 4 | 4 | 4 | 4 | End-to-End-Tests. |
| | **Subtotal** | **10** | **8** | **10** | **12** | |
**Delta zur Offerte: 0h (Mitte)**
**Bewertung:** Realistisch. Einfachste Position.
---
### Position 7: Projektmanagement
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|---|---|:-:|:-:|:-:|:-:|---|
| 7.1 | Kick-off & Workshop | 4 | 4 | 4 | 4 | Standard. |
| 7.2 | Projektmanagement | 8 | 10 | 12 | 14 | 10-14 Wochen, 3 Ansprechpartner, 7 Positionen. |
| 7.3 | Deployment & Go-Live | 6 | 6 | 7 | 8 | Staging + Prod + erste Betriebswoche. |
| | **Subtotal** | **18** | **20** | **23** | **26** | |
**Delta zur Offerte: +5h (Mitte) / +8h (Max)**
**Haupttreiber:** PM-Aufwand bei 3-Monats-Projekt mit mehreren Stakeholdern ist erfahrungsgemäss höher.
---
## 3. Gesamtübersicht
| Pos. | Beschreibung | Offerte (h) | Min (h) | Mitte (h) | Max (h) | Offerte CHF | Mitte CHF |
|---|---|:-:|:-:|:-:|:-:|:-:|:-:|
| 1 | Basics | 54 | 53 | 61 | 68 | 8'100 | 9'150 |
| 2 | Kalktool | 92 | 88 | 102 | 116 | 13'800 | 15'300 |
| 3 | Materialmanagement 1 | 52 | 50 | 58 | 66 | 7'800 | 8'700 |
| 4 | Materialmanagement 2 | 60 | 60 | 70 | 80 | 9'000 | 10'500 |
| 5 | Wiki-Anbindung | 44 | 44 | 54 | 64 | 6'600 | 8'100 |
| 6 | Azure-Migration | 10 | 8 | 10 | 12 | 1'500 | 1'500 |
| 7 | Projektmanagement | 18 | 20 | 23 | 26 | 2'700 | 3'450 |
| | **Gesamt** | **330** | **323** | **378** | **432** | **49'500** | **56'700** |
### Zusammenfassung
| Szenario | Stunden | CHF (à 150/h) | Differenz zur Offerte |
|---|:-:|:-:|:-:|
| Offerte (Kostendach) | 330 | 49'500 | -- |
| Eigene Schätzung (Minimum) | 323 | 48'450 | -2% |
| **Eigene Schätzung (Mitte)** | **378** | **56'700** | **+15%** |
| Eigene Schätzung (Maximum) | 432 | 64'800 | +31% |
---
## 4. Risikobewertung
### Risikomatrix
| # | Risiko | Wahrscheinlichkeit | Auswirkung | Betroffene Pos. | Möglicher Mehraufwand |
|---|---|:-:|:-:|---|:-:|
| R1 | Matching-Engine komplexer als erwartet | Hoch | Hoch | 2.2 | +10-15h |
| R2 | Wiki-System ohne API | Mittel | Hoch | 5.1, 5.3 | +10-20h |
| R3 | ERP-Datenqualität mangelhaft | Mittel | Mittel | 1.5, 2.2, 3.1, 4.1 | +8-16h |
| R4 | Preprocessor-Erweiterung aufwändiger | Mittel | Mittel | 1.5, 3.1, 4.1 | +8-12h |
| R5 | Frontend-Aufwand unterschätzt | Mittel | Gering | 1.2, 1.4 | +4-8h |
| R6 | KI-Modell-Qualität für SQL-Generierung | Gering | Mittel | 3, 4 | +4-8h |
### Synergien (Aufwandsreduktion durch bestehende Komponenten)
| Synergie | Geschätzte Einsparung | Betroffene Pos. |
|---|:-:|---|
| KnowledgeService/RAG existiert produktiv | 20-30h | Pos. 5 |
| ChatbotDocument-Modell existiert | 4-6h | Pos. 1.4, 2.1 |
| LangGraph modular erweiterbar | 6-10h | Pos. 3, 4, 5 |
| Prompt-Engineering über DB-Config | 2-4h | Pos. 1.1, 3.2, 4.2 |
| Excel-Export-Pattern existiert | 2-4h | Pos. 2.5 |
| **Gesamt Einsparung** | **34-54h** | |
---
## 5. Empfehlungen
### 5.1 Zur Offerte
Die Offerte mit 330h als Kostendach ist **ambitioniert, aber bei idealem Verlauf erreichbar**. Die grössten Risiken liegen in:
- Position 2 (Kalktool): Die Matching-Engine ist die komplexeste Neuentwicklung
- Position 5 (Wiki): Komplett abhängig vom Wiki-System, das noch unklärt ist
**Empfehlung:** Offerte bei 330h als Kostendach belassen, aber intern mit 370-380h planen. Die Differenz (~40-50h) als interne Reserve einkalkulieren.
### 5.2 Priorisierung
1. **Must-Have (Prio 1):** Pos. 1 (Basics) + Pos. 6 (Azure-Migration) -- Voraussetzung für alles
2. **High-Value (Prio 2):** Pos. 2 (Kalktool) -- Höchster Kundennutzen, aber auch höchstes Risiko
3. **Quick-Win (Prio 3):** Pos. 3+4 (Materialmanagement) -- Nutzen vorhandene Architektur
4. **Abhängig (Prio 4):** Pos. 5 (Wiki) -- Erst nach Wiki-Klärung starten
### 5.3 Offene Punkte (vor Projektstart zu klären)
| # | Offener Punkt | Verantwortlich | Kritisch für |
|---|---|---|---|
| O1 | Wiki-System und Zugangsart klären | Althaus (Samuel) | Pos. 5 |
| O2 | ERP-System identifizieren und Datenstrukturen dokumentieren | Althaus (Stefan) | Pos. 1.5, 3.1, 4.1 |
| O3 | Preprocessor-Code-Review für Erweiterbarkeit | PowerOn (Entwicklung) | Pos. 1.5, 3.1, 4.1 |
| O4 | Kalktool-Vorlage erhalten und analysieren | Althaus (Reto) | Pos. 2.5 |
| O5 | Muster-Stücklisten für Matching-Test | Althaus (Reto) | Pos. 2.2 |
| O6 | Azure-Subscription-Details | Althaus | Pos. 6 |
---
## 6. Zeitplan (2 Entwickler)
```
Woche 1-2: Kick-off + Azure-Migration (Pos. 6) + Basics 1.1-1.3
Entwickler A: Azure-Migration + 1.1 (Gesperrte Artikel)
Entwickler B: 1.2 (Chat-Verlauf Frontend) + 1.3 (Lange Antworten)
Woche 2-5: Basics 1.4-1.6 (Grundlage für Use Cases)
Entwickler A: 1.4 (File-Upload Full-Stack)
Entwickler B: 1.5 (Kundenartikelnummern + Preprocessor)
Woche 4-9: Kalktool (Pos. 2) -- längster Block, früh starten
Entwickler A: 2.1-2.2 (Upload + Matching-Engine)
Entwickler B: 2.3-2.5 (Feldergänzung + Export)
Woche 6-9: Materialmanagement 1+2 (Pos. 3+4) -- parallel zum Kalktool
Entwickler B: 3.1-3.4 + 4.1-4.5 (Preprocessor + Prompts)
(Entwickler A bleibt auf Kalktool)
Woche 9-12: Wiki-Anbindung (Pos. 5) -- nach Klärung des Wiki-Systems
Entwickler A: 5.1-5.2 (Connector + RAG-Integration)
Entwickler B: 5.3 (Delta-Sync) + Integrationstests
Woche 12-13: Integrationstests, UAT, Go-Live (Pos. 7.3)
Beide Entwickler: E2E-Tests + Deployment + Monitoring
```
**Gesamtdauer:** 12-14 Wochen
**Kritischer Pfad:** Pos. 1 → Pos. 2 (Kalktool braucht Upload + Kundenartikelnummern)
---
*Dokument erstellt auf Basis der Code-Analyse des Gateway-Repository (Stand 13.04.2026)*

View file

@ -1,143 +0,0 @@
# Fragenkatalog Althaus Bot v2 -- Kick-off-Vorbereitung
**Zweck:** Strukturierte Fragen für den Anforderungsworkshop mit W. Althaus AG
**Erstellt:** 13. April 2026
**Zielgruppe:** Projektleitung PowerOn + Ansprechpartner Althaus (Reto, Stefan, Samuel)
---
## A. Wiki-System (Ansprechpartner: Samuel)
> **Kritisch für:** Position 5 (Wiki-Anbindung) -- Aufwandsschätzung schwankt zwischen 44h und 64h je nach Wiki-System.
### A.1 Wiki-Identifikation
| # | Frage | Hintergrund |
|---|---|---|
| A1.1 | Welches Wiki-System wird eingesetzt? (z.B. Confluence, SharePoint Wiki, MediaWiki, DokuWiki, Notion, anderes) | Bestimmt die Anbindungsstrategie (API vs. Export vs. Crawling) |
| A1.2 | Wo wird das Wiki gehostet? (Cloud-SaaS, On-Premise, Azure) | Netzwerk-Zugang und Firewall-Konfiguration |
| A1.3 | Wie viele Seiten/Artikel enthält das Wiki ungefähr? | Dimensionierung der Erstindexierung und Embedding-Kosten |
| A1.4 | In welchen Formaten liegen die Inhalte vor? (reiner Text, HTML, Markdown, eingebettete PDFs/Bilder) | Bestimmt die Extraktions-Komplexität |
### A.2 Technischer Zugang
| # | Frage | Hintergrund |
|---|---|---|
| A2.1 | Gibt es eine REST-API oder ähnliche Schnittstelle zum Lesen der Wiki-Inhalte? | API-Zugang = deutlich weniger Aufwand als Crawling |
| A2.2 | Gibt es eine Export-Funktion? (z.B. XML-Export, PDF-Export, Datenbank-Dump) | Fallback wenn keine API vorhanden |
| A2.3 | Gibt es Authentifizierung (API-Key, OAuth, LDAP)? Welche Credentials werden benötigt? | Konfiguration des Connectors |
| A2.4 | Gibt es eine Change-API oder Webhooks, die bei Änderungen notifizieren? | Bestimmt den Aufwand für inkrementelle Updates (Pos. 5.3) |
| A2.5 | Gibt es Zugriffsbeschränkungen auf bestimmte Wiki-Bereiche? | RBAC-Überlegungen bei der Indexierung |
### A.3 Inhaltliche Abgrenzung
| # | Frage | Hintergrund |
|---|---|---|
| A3.1 | Soll das gesamte Wiki indexiert werden oder nur bestimmte Bereiche? | Scope-Begrenzung für Erstindexierung |
| A3.2 | Gibt es vertrauliche Inhalte, die nicht in den Chatbot einfliessen dürfen? | Datenschutz-/Compliance-Anforderung |
| A3.3 | Wie oft werden Wiki-Inhalte aktualisiert? (täglich, wöchentlich, selten) | Bestimmt die Sync-Frequenz |
| A3.4 | Welche Sprache(n) haben die Wiki-Inhalte? (Deutsch, Englisch, gemischt) | Embedding-Modell-Auswahl |
---
## B. ERP-System & Datenstrukturen (Ansprechpartner: Stefan)
> **Kritisch für:** Positionen 1.5, 2.2-2.3, 3.1, 4.1 -- Preprocessor-Erweiterungen und Matching-Engine.
### B.1 ERP-Identifikation
| # | Frage | Hintergrund |
|---|---|---|
| B1.1 | Welches ERP-System wird eingesetzt? (z.B. Abacus, SAP, Microsoft Dynamics, bexio, Sage) | Bestimmt Datenstruktur und Zugriffsmöglichkeiten |
| B1.2 | Wie werden die Daten aktuell an den Preprocessor geliefert? (direkter DB-Zugriff, API, Export-Datei) | Verständnis der bestehenden Datenpipeline |
| B1.3 | In welchem Rhythmus werden die Daten aktualisiert? (Echtzeit, täglich, wöchentlich) | Aktualität der Chatbot-Antworten |
### B.2 Kundenartikelnummern (Position 1.5)
| # | Frage | Hintergrund |
|---|---|---|
| B2.1 | Gibt es im ERP eine dedizierte Tabelle für Kundenartikelnummern? Wenn ja, wie heisst sie? | Preprocessor-Schema-Erweiterung |
| B2.2 | Wie ist die Zuordnung: 1 Kundenartikel → 1 ERP-Artikel, oder n:m? | Bestimmt die Mapping-Komplexität |
| B2.3 | Wie viele Kundenartikelnummern gibt es ungefähr? | Dimensionierung |
| B2.4 | Welche Felder hat die Kundenartikelnummern-Tabelle? (z.B. KundenNr, KundenArtikelNr, InterneArtikelNr, Bezeichnung) | Schema-Definition für Preprocessor |
### B.3 Bestellwesen & Materialmanagement (Positionen 3 + 4)
| # | Frage | Hintergrund |
|---|---|---|
| B3.1 | Welche ERP-Tabellen/Views gibt es für Bestellungen? (Bestellkopf, Bestellpositionen, Status) | Preprocessor-Erweiterung Pos. 3.1 |
| B3.2 | Gibt es eine Tabelle für Wareneingänge mit Datum und Menge? | Liefertermin-Treue-Berechnung Pos. 4.3 |
| B3.3 | Gibt es eine Preishistorie-Tabelle? Welche Felder enthält sie? (Datum, Preis, Lieferant, Währung) | Preisentwicklungs-Analyse Pos. 4.4 |
| B3.4 | Gibt es ein Lagerjournal mit Buchungsdaten? | KPI-Analyse Pos. 4.1 |
| B3.5 | Gibt es eine Bestandesbedarfsliste oder Dispositions-View? | Material-Analyse Pos. 3.4 |
| B3.6 | Gibt es Felder für "bestätigter Liefertermin" vs. "gewünschter Liefertermin"? | Termintreue-KPI Pos. 4.3 |
| B3.7 | Wie viele offene Bestellungen gibt es typischerweise gleichzeitig? | Performance-Dimensionierung |
### B.4 Datenqualität
| # | Frage | Hintergrund |
|---|---|---|
| B4.1 | Wie konsistent sind Lieferanten-Namen im ERP? (exakt gleich oder Varianten wie "Siemens AG" vs. "Siemens") | Matching-Qualität Pos. 2.2 |
| B4.2 | Gibt es Pflichtfelder die häufig leer sind? | Feldergänzungs-Logik Pos. 2.3 |
| B4.3 | Wie sind Preise gespeichert? (Netto, Brutto, mit/ohne MwSt., Währung) | SQL-Query-Generierung |
| B4.4 | Werden gelöschte/gesperrte Datensätze physisch oder nur logisch gelöscht? | Filter-Logik Pos. 1.1 |
---
## C. Kalktool (Ansprechpartner: Reto)
> **Kritisch für:** Position 2 (Kalktool) -- Höchstes Risiko in der Offerte.
### C.1 Kalktool-Vorlage
| # | Frage | Hintergrund |
|---|---|---|
| C1.1 | Können wir die aktuelle Kalktool-Vorlage (Kalktool_Aktuell_2026_V1.4.xlsx) erhalten? | Zielformat für Excel-Export Pos. 2.5 |
| C1.2 | Welche Spalten/Felder sind Pflicht in der Kalktool-Vorlage? | Feldergänzungs-Priorität Pos. 2.3 |
| C1.3 | Gibt es Formeln in der Vorlage, die erhalten bleiben müssen? | Komplexität des Excel-Exports |
| C1.4 | Welches Format haben die Kunden-Stücklisten typischerweise? (PDF, Excel, CSV) | Extraktions-Strategie Pos. 2.1 |
### C.2 Matching-Anforderungen
| # | Frage | Hintergrund |
|---|---|---|
| C2.1 | Können wir 3-5 Muster-Stücklisten von verschiedenen Kunden erhalten? | Testdaten für Matching-Engine Pos. 2.2 |
| C2.2 | Welche Identifikationsmerkmale haben Kunden-Stücklisten? (Kundenartikelnr., Hersteller-Typ, Beschreibung) | Matching-Stufen definieren |
| C2.3 | Wie hoch ist die erwartete Trefferquote beim exakten Match? (10%? 50%? 90%?) | Gewichtung exakt vs. fuzzy vs. KI |
| C2.4 | Welche Felder sollen bei nicht-eindeutigem Match als "Alternative durch KI" markiert werden? | Bestätigungs-Workflow Pos. 2.4 |
| C2.5 | Gibt es Produktgruppen, die besonders schwierig zu matchen sind? | Risikobewertung |
---
## D. Infrastruktur & Azure (Ansprechpartner: Stefan / IT)
| # | Frage | Hintergrund |
|---|---|---|
| D1 | Details zur neuen Azure-Subscription (Subscription-ID, Region, Resource Group) | Pos. 6 -- Migration |
| D2 | Gibt es Netzwerk-Einschränkungen (VPN, Private Endpoints, Firewall)? | Zugang Preprocessor ↔ ERP |
| D3 | Wer hat Admin-Zugang zur neuen Subscription? | Deployment-Planung |
| D4 | Gibt es Budget-Limits auf der Azure-Subscription? | Betriebskosten-Planung |
---
## E. Priorisierung & Vorgehensweise
| # | Frage | Hintergrund |
|---|---|---|
| E1 | Sollen alle 7 Positionen umgesetzt werden, oder gibt es eine Priorisierung? | Scope-Bestätigung |
| E2 | Gibt es einen gewünschten Go-Live-Termin? | Zeitplanung |
| E3 | Wie soll die UAT organisiert werden? (dedizierte Testphase, laufend, Key-User) | Testplanung |
| E4 | Wer sind die Pilot-User für den reaktivierten Bot? | UAT-Teilnehmer |
| E5 | Sollen Schulungen für Endanwender durchgeführt werden? (nicht in Offerte enthalten) | Ggf. Nachtragsofferte |
---
## Nächste Schritte
1. **Vor dem Kick-off:** Fragenkatalog an Althaus senden, damit Antworten vorbereitet werden können
2. **Im Kick-off:** Fragen durchgehen, fehlende Antworten als Action Items festhalten
3. **Nach dem Kick-off:** Aufwandsschätzung anhand der Antworten finalisieren, insbesondere Pos. 2.2 (Matching) und Pos. 5 (Wiki)
---
*PowerOn AG -- Vorbereitung Anforderungsworkshop Althaus Bot v2*

View file

@ -1,223 +0,0 @@
# Preprocessor Assessment -- Althaus Bot v2
**Zweck:** Technische Analyse des Preprocessing-Servers für die Aufwandsschätzung der Erweiterungen
**Erstellt:** 13. April 2026
**Quellen:** Gateway-Code-Analyse (Repo nicht lokal verfügbar: github.com/valueonag/gateway_preprocessing)
---
## 1. Ist-Zustand (abgeleitet aus Gateway-Code)
### 1.1 Infrastruktur
| Eigenschaft | Wert |
|---|---|
| **Host** | Azure App Service (Switzerland North) |
| **URL (Datenverarbeitung)** | `poweron-althaus-preprocess-prod-*.azurewebsites.net/api/v1/dataprocessor/update-db-with-config` |
| **URL (Abfragen)** | `poweron-althaus-preprocess-prod-*.azurewebsites.net/api/v1/dataquery/query` |
| **Authentifizierung** | `X-PP-API-Key` (Abfragen) / `X-DB-API-Key` (Abfragen) |
| **Status** | Deployed, ERP-Datenanbindung deaktiviert |
| **Quellcode** | `github.com/valueonag/gateway_preprocessing` (separates Repo) |
### 1.2 Aktuelle Tabellen-Konfiguration
Aus dem Automation-Template (`subAutomationTemplates.py`) extrahiert:
```json
{
"tables": [
{
"name": "Artikel",
"powerbi_table_name": "Artikel",
"steps": [
{
"keep": {
"columns": [
"I_ID", "Artikelbeschrieb", "Artikelbezeichnung",
"Artikelgruppe", "Artikelkategorie", "Artikelkürzel",
"Artikelnummer", "Einheit", "Gesperrt",
"Keywords", "Lieferant", "Warengruppe"
]
}
},
{
"fillna": {
"column": "Lieferant",
"value": "Unbekannt"
}
}
]
},
{
"name": "Einkaufspreis",
"powerbi_table_name": "Einkaufspreis",
"steps": [
{
"to_numeric": {
"column": "EP_CHF",
"errors": "coerce"
}
},
{
"dropna": {
"subset": ["EP_CHF"]
}
}
]
}
]
}
```
### 1.3 Zusätzliche Tabellen (im Chatbot referenziert, aber nicht in der Config)
Aus den SQL-Beispielen in `bridges/tools.py` und `chatbot.py`:
| Tabelle | Spalten (referenziert im Code) | Joins |
|---|---|---|
| `Lagerplatz_Artikel` | `R_ARTIKEL`, `R_LAGERPLATZ`, `S_IST_BESTAND`, `S_RESERVIERTER__BESTAND` | ON `Artikel.I_ID = Lagerplatz_Artikel.R_ARTIKEL` |
| `Lagerplatz` | `I_ID`, `Lagerplatz` (Name) | ON `Lagerplatz_Artikel.R_LAGERPLATZ = Lagerplatz.I_ID` |
Diese Tabellen sind vermutlich in einer älteren Config-Version oder direkt im Preprocessor konfiguriert.
### 1.4 API-Schnittstellen
**Abfrage-API** (genutzt vom `PreprocessorConnector`):
- Methode: `POST`
- Payload: `{"query": "SELECT ..."}`
- Header: `X-DB-API-Key: <api_key>`
- Response: `{"success": true/false, "data": [...], "row_count": N, "message": "..."}`
- Einschränkung: Nur SELECT-Queries (validiert im Gateway)
**Update-API** (genutzt vom Automation-Template):
- Methode: `POST`
- Payload: `configJson` (Tabellendefinitionen + Transformationsschritte)
- Header: `X-PP-API-Key: <secret>`
- Zweck: Datenbank mit neuer Konfiguration aktualisieren
### 1.5 Transformation-Steps (bekannte Operationen)
Aus der Config-JSON abgeleitet:
| Operation | Parameter | Beschreibung |
|---|---|---|
| `keep` | `columns: [...]` | Nur angegebene Spalten behalten |
| `fillna` | `column`, `value` | NULL-Werte ersetzen |
| `to_numeric` | `column`, `errors` | Spalte in numerischen Typ konvertieren |
| `dropna` | `subset: [...]` | Zeilen mit NULL in angegebenen Spalten entfernen |
---
## 2. Benötigte Erweiterungen (nach Position)
### 2.1 Position 1.5: Kundenartikelnummern
**Neue Tabelle: `Kundenartikelnummer`**
| Spalte (geschätzt) | Typ | Beschreibung |
|---|---|---|
| `I_ID` | INT | Primary Key |
| `R_ARTIKEL` | INT | FK auf Artikel.I_ID |
| `Kundenummer` | VARCHAR | Kundennummer |
| `Kundenartikelnummer` | VARCHAR | Kunden-eigene Artikelnummer |
| `Bezeichnung` | VARCHAR | Kundenbezeichnung (optional) |
**Config-Erweiterung:**
```json
{
"name": "Kundenartikelnummer",
"powerbi_table_name": "Kundenartikelnummer",
"steps": [
{"keep": {"columns": ["I_ID", "R_ARTIKEL", "Kundenummer", "Kundenartikelnummer", "Bezeichnung"]}}
]
}
```
**Aufwand-Bewertung:** Falls der Preprocessor neue Tabellen per Config akzeptiert: ~2-3h Config + Test. Falls neuer Code nötig: ~6-8h.
### 2.2 Position 3.1: Bestellwesen (Materialmanagement 1)
**Neue Tabellen (geschätzt 3-4 Tabellen):**
| Tabelle | Wichtige Spalten | Zweck |
|---|---|---|
| `Bestellkopf` | ID, Bestellnummer, Lieferant, Bestelldatum, Status, Wunschtermin | Bestellübersicht |
| `Bestellposition` | ID, R_Bestellung, R_Artikel, Menge, Preis, Status, Bestätigter_Termin | Positionsdetails |
| `Wareneingang` | ID, R_Bestellung, R_Position, Eingangsdatum, Menge, Qualität | Lieferverfolgung |
| `Auftrag` | ID, Auftragsnummer, Kunde, R_Artikel, Menge, Termin | Betroffene Aufträge |
**Aufwand-Bewertung:** 4 Tabellen × ~4h pro Tabelle (Config + Code + Transformationen + Test) = ~16h. Bei komplexen Transformationen (Joins, Aggregationen): +4-6h.
### 2.3 Position 4.1: KPI-Daten (Materialmanagement 2)
**Neue Tabellen/Views (geschätzt 3-4):**
| Tabelle/View | Wichtige Spalten | Zweck |
|---|---|---|
| `Lagerjournal` | ID, R_Artikel, Buchungsdatum, Menge, Typ | Lagerbewegungen |
| `Preishistorie` | ID, R_Artikel, R_Lieferant, Datum, Preis, Währung | Preisentwicklung |
| `Bestandesbedarfsliste` | R_Artikel, Bedarf, Bestand, Fehlmenge, Datum | Dispositionsplanung |
| `View_Termintreue` | R_Lieferant, Wunschtermin, Bestätigt, Geliefert, Abweichung_Tage | Aggregierte KPIs |
**Aufwand-Bewertung:** 4 Tabellen/Views × ~4h = ~16h. Aggregierte Views (Termintreue): +4-6h für Berechnungslogik im Preprocessor.
---
## 3. Gesamtbewertung Preprocessor-Erweiterungen
### 3.1 Zusammenfassung
| Position | Neue Tabellen | Config-Aufwand | Code-Aufwand | Test | Gesamt |
|---|:-:|:-:|:-:|:-:|:-:|
| 1.5 (Kundenartikelnummern) | 1 | 1h | 3-5h | 2h | **6-8h** |
| 3.1 (Bestellwesen) | 3-4 | 2h | 8-12h | 4h | **14-18h** |
| 4.1 (KPIs) | 3-4 | 2h | 8-12h | 4h | **14-18h** |
| **Gesamt** | **7-9** | **5h** | **19-29h** | **10h** | **34-44h** |
### 3.2 Offene Fragen (Code-Review des Preprocessor-Repos erforderlich)
| # | Frage | Auswirkung |
|---|---|---|
| P1 | Unterstützt der Preprocessor neue Tabellen per Config-Erweiterung, oder muss für jede Tabelle Code geschrieben werden? | Bestimmt ob Config-only (~2h/Tabelle) oder Code (~4h/Tabelle) |
| P2 | Können aggregierte Views/Berechnungen im Preprocessor definiert werden? | Termintreue-KPI, Bestandsreichweite |
| P3 | Wie werden Joins zwischen Tabellen gehandhabt? (SQLite-seitig oder Preprocessor-seitig) | Komplexität der Cross-Table-Queries |
| P4 | Gibt es Rate-Limits oder Grössen-Limits bei der Query-API? | Performance bei komplexen KPI-Abfragen |
| P5 | Wie gross ist die aktuelle SQLite-Datenbank? Wie viele Artikel? | Dimensionierung für 8-10 neue Tabellen |
### 3.3 Empfehlung
**Vor Projektstart sollte ein Code-Review des Preprocessor-Repos durchgeführt werden** (geschätzter Aufwand: 2-4h). Dabei klären:
1. Erweiterbarkeit: Kann der Preprocessor neue Tabellen per Config akzeptieren?
2. Transformationen: Welche Operationen sind neben `keep`, `fillna`, `to_numeric`, `dropna` verfügbar?
3. Performance: Wie skaliert die SQLite-DB mit 8-10 zusätzlichen Tabellen?
4. Deployment: Wie wird der Preprocessor deployed? (CI/CD, manuell, Azure DevOps)
Das Ergebnis dieses Reviews kann die Aufwandsschätzung für Pos. 1.5, 3.1 und 4.1 um jeweils 4-6h nach oben oder unten korrigieren.
---
## 4. Aktueller Datenfluss (zur Referenz)
```
ERP (Althaus)
▼ (Power BI Export / API / DB-Zugriff -- Mechanismus unklar)
Preprocessor Server (Azure)
├── /api/v1/dataprocessor/update-db-with-config ← Automation-Template
│ (Tabellen laden, transformieren, in SQLite schreiben)
└── /api/v1/dataquery/query ← PreprocessorConnector (Gateway)
(SQL SELECT auf SQLite ausführen)
Gateway (Chatbot LangGraph)
React Frontend (Chat-UI)
```
---
*Assessment erstellt auf Basis der Gateway-Code-Analyse. Für eine genauere Schätzung ist ein Code-Review des Preprocessor-Repos erforderlich.*

View file

@ -71,7 +71,7 @@ Connector_AiMistral_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlelh2T2hqNGcxV0hMV1FKbmF
Service_MSFT_TENANT_ID = common Service_MSFT_TENANT_ID = common
# Google Cloud Speech Services configuration # Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0= Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQnFIc3YtU0x4LTlHbTY1NUVGY2V2bUdmck85dDh1ZWVKa2ktR0N6NjdlTGFrUHMybVQ2bVRLN01XNFRZR2lyN0ZNSHhzWVVGNnVtZjRjV2hhR0ViTDYwT25lSmxJY0pSTkl3OUEyT0JxMFVYRndfUFJudExMajdTYUNXS01JU2lhQzZmNWFYdXA4aVZ5Zkh4Zko1Z00tcEE5ZFEwQkFVa1oyR296YXozRFI2WUdXN0ZSREFFclFNaTd6OUVlSmFxS1BTSlNJbnlWNHNfbkk4QzVOUGlkMzdfQUZxUlJOVEZzUlN1aWRWY01JZmlRM0JNZE1EZ3BmbW10c3BDdERpa2FMakstQUlqVEVlRC1hUmZoeFVoQ3pYNXRlRFVSTlI3ekJrU0QwSHBSaWxiSGU0akFGMXUtY2Q0RnUzS0tPOEQtcTdVdWhQeHFDM1hRRVVMcUxCeklvWHNWRUN2bjVHZUUwLTVtaGpUbWdPUnJabWlIcHZ5UjNtN0NMTUNRN29ZRGVXU28xQmhJTVg2eEZnaUdrcW9UVklHMHJycm1nT0JkdGJReVVHeV8tYm12UDlOU0lpNHFidXBQbUFSSVVmWUl1M1BVMFFncm0xSldkVzBrb2poRFMyaVUwcUZvMHl0QlZIZ1h1MjZwR3AtZWhqdzN4UVhtT2hUa1lQU3VudzNXdW1FcVY3VnQ3RmpkQnFQemlrQlF3WGhBNWxOZXJ6Zm9KVFlEZExUXzlqODhYaFNNMzVWTzFNMmVTcWdodDZoRmZTUzlhLVlOSU5fYW1vNXctaFpFMC1pUllRZW11d1JQN25sbldHVjI1anc2UC1ycndjTGtxWk55WmpJeU1wOVR0RnlTdFpad1dkRmlUNDE0d240TDlKc3JFUXdOYzd5UTFYSXUzLTQ2Y1ZGcWE3R2RyQ0I1WDMtMHBScEFzZDV4UEkyanh4ckJZUjdTYnJGZjAxQkU3MEJ6OXdybGRaWHNod1hZZEhVOXRpMWRLbVJsRGd0UDRDN3JsRzF4T0RpcnczRU5TM0RKVjVkWTRqNTl6bmhQdmdvaEg1U2kya0QtQ0l4ZHVUcGxkNi1vNVVVOEcyWXhxZWc5N1lKMk4tT0o3ZFVzYjJtT3NVZFJiSTFNUnpaSmFOeDZaLWVpZlc0VUhZRHdXOUMyQ3cwaXBQUDRJN1g1YkwzaTFiRVRxRFY5UTdZU1dSaGR6NUw3aEtac2RENXF3WEpVN0dXVTlQR0F6MFlpWl83MU44NVR1ZUtPVUNlZ205YUIwOFoxUDBvTlI0SU52emVvQ3VZXy1jTlFXRWZXQ0d5RHJ0eV9JeE5wMHl0b3FVSjNoVzg2d21hYVNYY3Q0dkFaVEZwa09tRnFBbEtoOUlGY2xkeVJoZGYzQUxYNFZfb0ZiaU5VRjJPbGhieXYtWTFKckZwenVCUGFva1IwVVFORVQ4SDMxWHVuRWhBRGd0cVlsc3kyQ0RyY2ZIVDlwcGh5ampySV9uOVpsVmlWbGoxMEg3SXh6NzRJbmZXRlhMMWc0RXhzeWtnQlJ0VnZSdENkbEpOdENwUzItUjZhZWFYRFhzbDM1WDBxaGFPX19CSG1KZjRTTU5JemcxZzJRSFY5bkx4TTlIZFNHOW1USWxBYWhEZ1FSNVdSSDJETUZwMi1Hd0RESkF2cVA1TVJGTEtPUl9oN3gzVEIwSzZOVzlOWXhNa2I1Vzc1SV9tdENfRy1rQTNzRlZGSTYwQmJIaGswZUNWSnRDVXFfdWFCckZZcnJOT2Rfb3FrcWI4S1lVRTMyRnZJQTRZV1VsU0xobGRjekhtbG9LamR2d1hfVklsM3JBeW9SRzJnWVdiWDRzN1ltcXdSVGoxRVBvczViVXNjMUxBazZUdS1WbkRQX0h1MzdNd3ltVDUzd2FGdi1XeUMybV9ia1YxQVBPdnUxY1dfT2M5eEpZR2JHMkdZbWdDZTRERXRYOWxodndkTXltVW40c0t0bVA5YWxuRzM3LWlCdmJiYmF5dkNBY3ozbUw1Zm5zRmpBdk5ORmFZRWJKM3Q2UDdKNl9zaUV5eVVGbkF0QmZSZzk5dGo3UjNIQWxwcjRlVTdUT2s1VGFjdndvX2c3d1VmaHRMZU10M1ZKVk9Ma3dZb1kwYVV5Z2NlTjUxdUYtZXRnRTRzQlp1aFp0OUF5TVBwN1gzU21kRmJ6OUlOeUFOOEhEOU5WSENNZndvLXdoVUFJYVFDTWEyakJEcTVSVDhJOWJscU8taThqNUZkdThCOUlXcldndFBTZk9QVnlMaUphUU5sUktpb1plZDZOQnFzNFNMUzRWbWFVQWhUWmJfem96X0cxWXVTcUxCeDhOc3E2OEpFa2lzWHFIV0p3eGdBZmN1aXBhYjExZTZqaUY4S0ZudTNhcUx2WlpuTU9lNUk2ZmNyN0JCODdYMGNEU2JsZkZXYlRFaTJQUTI5RU5SMmtkV1NHQTVTTjEyZGZLYnhTNTg2Nl9aaWJqX2Q1U1NwQ3pRTGRBSUw0N3FNQ0ItMks1QVZmbURYVWdHMWFZTWhGNURVOUg0bGVuMUozanlxTnRwbVlGX2RnN2FBVTZlZjhDaXVzZEtVR1Z5azhzWHRrS1dYSG9rYkowTjQ1N0hyRWdNVWMya1ZmWmZvSnVTdHNiMHFDODNLckpjQ081SFlieGxuM0picGhKMnNQRURwY2hpQzF3dHRnNEFWcUlPYjVxZEhod0JDbWZhU01Ob21UWmRwd0NQRlpjOE5CUFBOT004U2JKNkFSUlFzRklYZGJobUoxQzZzT2wzZ3J1Z05aYThRVVNzcFktMGJDcXFfSkxVS2hhajI3dTdrR2poa21ZM3Z4UzFRblFsOFlOZVVUM0YxaFRuNjFWQ2E4ZlhvZjZpMWFtOGRuaGx0MTZxZE9TY1dsTTMyMHhsNXJ2MkduaGRkZXpYUWJ3cEt1U3YwMC1IRzM5eWRCb0lvaUhTQ2R4XzhEZl9zRk5GeHhCSWx2X3BkUkJ4NFZLVzdVRFZkbnpNNkpjUTFHY1pDV0ZOMFBaNTVpLUlmSnFrX1N5X05MTjRUeTVERUs5MG9kMFJ3di03U3BpMUM4YXNwaG1fangwYURIVjBpSVdCUkt4UW5HbWtGOUh3TUdPZjMxYXpVZDcwTmlDcTR6WldZb3VzbHRpRUgyN2lFTjlpUV85T0M4blJxMWx0cC1iU0FDOHhueDBLYjdLZGhNbjFPbE1RdmhhNlEzX3ZpT2ZsYllwNkU5TE9fZWFabDE4RWRoRWxiMk5aVFZrWmxjaW5MX1VrUGhUN29vbU1tWldESnczYTNBQ1RPd1VTNGNJdjdJU3p3QXZQLVlDNkQ1cTh4Rk1WNnRMUi1DT3VGREFPa28xejc2NUl1dzJSa2hCTlJublBRNGkydlJVRjlFbFotOWtraWFqQkNNTXBpT1hZM0NXNEpObGMxQUNuS29rOExMSnMxT3NLbjNfLTdpQW1BcDMxR1RZdVRvbElGbENWbHJqRlVrTXhYbFdiMmItUzlxR2ZxT2FCWXpMVVJYZXBfSFVwNTczU3JHUVhET3hSWm80Ry1KcE9mV3FYejVHSEVSS0pxOUtCc3V2VHNFVkRqYk5Od20tM0ttdFQ1eGdsc091WGFYNFgybzNVd3ZvbzEwUDJ0T0hvTVd3YnlHNnpNWC0wbkJOQTIwQ3VYdlUzaXY5NFhDNlNOOW9UdGZNUk4zZ0VJakpwS21SZlJtQjVWLUxfejFYZFc1cjRwR3ZUOGdZb2VJaTdJUS1MYlRJb0ZFYW9uYzM3MDd4b09BR1pnTEh3RFpnaGhxZURQamllNUhqTHg0cHJfN08wMkdGSVQwQUlqWDhLVGViY3J5NlVFTzY3RGhGQ0R6aXNsb2w4dnBVYndTd1Jhd3IwS1BxY0h1X05RcGsySzVNbXR5YlBVQi1IOGFUNkh5QjhRZk5BQmZvcGF6ZTNXenZkdy1GRjFGdE1saGdMSnotUkIyX1VqTlZFWnJER1YyNGQtMFZHU3hmRVNPUWFCdXV3QUxzOGVSbF9EdEZGUFNxbTdiYm5oWHdYak5qa3Zoem5WY1ZUdDREVUxGX0VQeS1jckhqS2lRLXQ1Y2tyOFRjYnVhajNUZmZOUE9kbU9PYXdqdk5DYUtEOVFiMW9yZTYxMFNUaDdvUTExUFZ1bklYSkRKTnJ1RURvOTR3ODREcWdWeHpRS2RETjZqeXpvbUpxMW5lWl84RzVocmJFQ3JfZlpMd3RCZEo5RWZ0MzIxNWV6bHlwdWJJWXhoaWxlM2FHSjBhWG14Sk94ZV96cXFvU1JwWDdKZldmZWdvdWVKdXVfaS1jZjdENXQzSzNyb1d3eWhUMU53QzgxemRiTTlkdFRxZU1OdEN5c1kxOEd2MTJMcnBJWEE0eXdJdFpOYVNMQTNLR292UFlGb0Ztdz0=
# Teamsbot Browser Bot Service # Teamsbot Browser Bot Service
# For local testing: run the bot locally with `npm run dev` in service-teams-browser-bot # For local testing: run the bot locally with `npm run dev` in service-teams-browser-bot

View file

@ -73,7 +73,7 @@ Connector_AiMistral_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlU2tMLTFnQWhET2Nia2pTcVp
Service_MSFT_TENANT_ID = common Service_MSFT_TENANT_ID = common
# Google Cloud Speech Services configuration # Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0= Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQnFIc3YtSjhlcklrU2JCOW5mdHFHd0dLTUZZZk9PT3o5RWt5RjAxX2s3ekJRLUUzU0dNSnNseTE4bUpNTnZSTWg0QV9mWm5iX19aWjV4YnRXU1JBSm1INVB5dXNRT2JiYk1tLWRSS29pdTRMdS1lMDZxMkx4VTh3bU5aVWh3cEwyOE1QcXVockgtZWh5bzdNVXQyemFuSmZqRzZZYmNGN21JdjNwNWpPRXB6WU1qSU5rZUVSb3JBS0lhcThvakkwbTRUUHhBdjRZdWNsZ1Z1RmFaNGZLcEpaNVNLdFAxYzFXdTJydU9COWJ0bkNyYUF2X2FNc1BfT05teEs1SE9PeGhPd3VJSFY2VFJ5VEl6V3R3bzd6OTVKTEVRcmt5ZzdBMXBFY1A5dUFJRFJONFBlaDlJcjNBQnBraC0wMTBhNW8wYWZaeHNWclVTOVotLTdWSmVuYzJKcUZSUkdrdXB3VEVESzd4UTI0bGd6SzdCajdoazZXVTVCaGRiaWJaOHg5Z2thSWItcS05U25DbUdrT2M1QV81WEg2dlJfMlBtZU9Bc3V5bmtBWHRoRUVLR2lWNHY3M3hHcU1raFRFOWQwSEtUU1RDWDFRNFlkNHVnTkZDbk5zS3RZeGR2Z015RnRGc3NndFVEQjc4bVpNeE81bXc1MnQ2QjNZeHZCbUJJZVJ2TE5xWEd4M3hHT2hJWW5DOWMxQlNmZE9uMVRGVnRwTUlXZjZCRUZBLU9GWVZGWFpZbUE3WVlpZU1DX1Z0bWQ0bjlaRThHOE9WR3VOVzlYWS1JampTNmxkNmFxWG54WDJjallIT3UyT0tGSzJpeG1tX0JoQjZxbEpESHBhMWZFa205bjdvTVFwSVVidnVzdURZVDAzVVpkekJ2SVZTZmhxQVJ2OWpuRGR2WFE3elMtb3B2ZzhpQVNvRmkzbzRrY1BuamVzM0E2eVM0bXBHTHgtYmhsVG5jNlB1Q1JHZU9HUlNfaTJSQkcwS2FSZnZSOW9oZzdXa1RUVTVTZTgwY01GYXQyQ0xWX1Fnb0xaOTRQY3hTclgweVJ5clc5OVpRWWlDb0JQVXoxVDA0bW8zUE55aGowb1ZZNEpBN2UtSTZTY2llRGhISFFkYWFYVlVBQ0IzbGxzVTQ2V2dsUGV1Y2I5bEZLRnlwdXRHMWZVcnBaTXNzNzNkUVFqR2xnSEQ1VlpTdXpwMFVVYjQ0enFlUnk0d3dDQUtSS1dUVnNyYnBKQW9TRjJxN2JNY2NhRWNONWRpWU5RbzNNZVJBS3EzN2ZMZ1E5VXQtMDFTZklLY1JiSDNYRlFuOF9VYUktS0xoY2IyR0xkT19qTEpIV1p6RFExUWNCQTdqN1kyS0Jaa2lyMDluenc1MS1vdmhPVlE5OUphWEY2dXFYNE04Z3lBUG5DNGZjTUVnYzEzYWhzTHpMdVBzT0dzRGJaT2x5b0pVbWJtUzJxdEd2VGtrc01kTlNPNURoVHhwZzU1d3pTZGJiTUZIME5tQ0xqNWJ2QS1QSEJHV2FEOExHWDByV19rVnc2R2pibnNENEo1cTh4bGNMX2ZpSTBMcjRvQWRhbW5xYVBiZkZzWTRERlVESEU2aHpvdzNMTjlCazRYeEJhMmZwdXY5T25IYkFTaUM3SmdIV1FCX2xxRXctWHZQOHgxLXI1c1JkWmcydkFTUmxFSU03cGtnallnTXplOElQbEJRSEE2aW5KREU0YUxwX25wOFhuS2RIbms1dXNIRHBtNjFtb3B3UGVGb0hwOENKM1hMclBwa3NBa2pFYnZYbEtFbUF0Y3pmeFRmMDNMaTZrR1BZWnBrNUQ1WlU1NVZQSWUxN3dwcXhhcjdXNTl4LVVpYVF3Y0wtRmFyNXZRNTE3UUc2cHVaVVNpaVdHbXRqQVJNZWZmNjdQQ2lwTGd6RFFZN2tSY2NEdmxvaXk4MTZMcmg0VGo3MTN2R2V6cmV3YjdQVlNEZTQySUpaY2pkTHZzUzdJLVJ2WnlOQ3Vmem5FZXRaWjBMWjF4ZEF3ZHJ4VF8tMVNsRnljejVsaEpGOU5JbnhydjNVdzNMOENrWUVsbXp0ZEhuVE1Vd0RJcnp2N0RXUGFuNDM2OXBPbV9LRDUwTWk1NHYwaDhlVEhKUmtEa09INURwNjV5ZE1VWmpRSGdjeXJNc3FqcjZDdmx5WXluNWZ2VlpsWmR2TXVXVnBubEFmQlRfaGRwRndCVXVkMjkyLWVhaDQtZDN1cmFZLUoybGRwbGQ5MTExU2NnZ2lueVNfSjFDQ2NkWGtNX2M1T2I4YnVJOUFueGIxbG1EYlZOcFYtQlE3cm90SE40X0ZjalhLdXM5S2l5aW84ZUJPMlR4MU9EVkhZcHdrX1Zqc0NhWEJacDZHMzQwSzdkdi1Rd2s4Y1dfLS1ES0NfYTNxYl84UTN1S0lIM0pVTTNEYlJ0YW55Tk4yVjBONXNTQWtVZTJ2V3B5eHBJcG9IWGRMMklob0hMbVVZZzJKbTFMUExOQm5HSEZzWHU0VGVIWlJMVzFLeFB0NkkyWFkwWk0wdjdHRmxSWFFoSkJ2Vm5NUWNQQlp6YWlIc2NKLUdhOVVycHd5N3NFMDNVWlAxZGQ1NzRGbm9LcWxEb2tKR1RnVEtvRUc1d3l4aU1IOUQ5RldUT3Z0a3lpRHpVSWJ4MjU4RWY5MEpCQ0VFdHNMbnkxOGswcE44QzJwNXFCVGpIa0VGc2VNXy1qdzVNRU9DaXg2MW9VX3FjUk41QVFVLURwVGFLRTkyNWlENy1IcGZjNW9wY0Y5Q3d5eFg5emVUUF9hV3ZTQWNaNEN0VzdJRlFBR0picXJoUERacWNLbDZhTE8wdWlfZ3kxd2QzOXBOZV9uaUNGMkNJbGhNd3k0S2t3dTRGWVVxTTFRRlg3Ui1zLW1FLU1Mai1yaURjb2Fob2c4MDUyRHN5aldUVWMxLTVNbm5VQTdrYy0zLVFyOHRkNzZ3dGdhbXZXN3JHNkdfZ2RuRXFDM3R2TVB1cDNOdWZGTmpFNnNFTmMxTmFuZDdJUld5bERyQkJ0TGZXRk54NEdqN09hSmVMYV91NXUwNXFvMl9KV0hBNlB4bklNQ2U5WGZLUTdlX2dJenVGcDYwWHBsdTNpbE5mWGhWeXFuUkFPV0puR2h0RkhrR2MwTzJGUmp4bUR6UFlUWTlNbTJLa19hTUZZR0dscVpBbFBReTBRMDNseXo4SXNnZWt4VFdpOERqLV9ZczRkR0QwRFJQM0pqdHluWktDUlp6WU9XSjVNZi1tYnNzcVlGTDRFMzNlSmRTazFfTkNxSjAwM0wxNk9Sd2h1SWpfOW5MVWMtVXYyYlVZR0VuaHRpN1pnNnpHME5raVBMd2h2dDRyMV8yZGFJNnlkcmhtSWdmNlpLN19NcjNkc002dXFxQzhTaDZzRlgzNUJ1SzVpVnp6NVU1Y2luUlM4UEJoajNTOUJadnE1MlhzV0kxSzBObXkteVhNM3RKYW9heDVWWFJ1NGlDM0l0elRPbThwUU9oYkVkbC1PZFNLSHY3WHJiZWpEamNIVC00MlNNWV9qcHdjNDRjRlVhZXlrLTlicVBNaDlDeXdRb0Fwc3RmUGFvbURQZ29yckliaS1VUDNxcXVlYTJJRUhXNUVobk1KUDhHZE16UzBLeDViYVRwZWY3d2w0d253eEZYcExKRGpsaGlBUElaTzB3eUVadnROX1dabENGb3R4ZF9aS05KY0dHTVZaYzRFc1Z4TlZGbFd2NjdYRzJMTzVwU2NaN1Y3MzQ2Z2pzV2RSMzJBbjg0MEhaZmhoREloY0oxOFdjNDZNdVZfYlRKU1Q1M2hYdHgwUjVsTV9USjZCZXlQTTdNRWc3bUxOcXRDVkpTdnJxR0hkWWpaRUdrOEFyNHk4MENwVzdob0hUSkJvam4zZW1kcGxZUjg0RXFRNnBxSUg1MDVHdHRwVlFkWWhHM0ZyZVFvMF96R2V5YjBuMnVZTU5CQ3pVci16SGJlQTQtbnFLa1E2eHFncUg3UmYyYlZvOF82a3d2ZE4tbmxIUlNYYjlrck9QYk5CcV9faXludS1yem1JNjFBdVYyb21RQWFMMFkxX0s1TjQ4czZ2WXI3X0FzRWdNTlZndHl4bnVOTHl2YlZfaURQV053dHl4N1czRFdzaVFnRHB0MWRDV2ZuU2lzX1NZZkRQYzhsT3ItZWw0dVJlVmtFWUM5cEppOGxuYVdpQkN5dV9hQ2dodTJvV3REVkw2dVVDaGtvc0Zqd0V2dldLZEVNRVRRNVRUVmw5aHZmZEpHdk1wS0xwRFc5Vmx4dTdfdGZDRUtCU29qdEVIOW5VdjBmeGpFMFZHSUthamtVN1E2bDZqaEFackVSQnZMN0tyaUhIcUs1ZHMzMzl2TnhadGIwZW5QNS1BM3pSODY3WVFsLU1jeUpCMG1PWmhPVT0=
# Teamsbot Browser Bot Service (service-main-teams-browser-bot on Infomaniak) # Teamsbot Browser Bot Service (service-main-teams-browser-bot on Infomaniak)
TEAMSBOT_BROWSER_BOT_URL = http://teamsbot.poweron.swiss:4100 TEAMSBOT_BROWSER_BOT_URL = http://teamsbot.poweron.swiss:4100

View file

@ -72,7 +72,7 @@ Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnFGdnVLRHplbzNheDhIdndsU0xUeG
Service_MSFT_TENANT_ID = common Service_MSFT_TENANT_ID = common
# Google Cloud Speech Services configuration # Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4NFQxaF9uN3h1cVB6dnZid1c1R1VfNDlSQ1NHMEVDZWtKanpMQ29CLXc1MXBqRm1hQ0YtWVhaejBMY1ZTOEFEVlpWQ3hrYkFza1E2RDNsYkdMMndNR0VGNTMwVDRGdURJY3hyaVFxVjEtSEYwNHJzeWM3WmlpZW9jU2E3NTgycEV2allqQ3dJRTNyRFAzaDJ6dklKeXpNRkJhYjFzUkptN2dpbkNpMklrcGxuZl9vTkt3T0JvNm1YTXd5UlkwZWptUXdWVFpnV2J4X3J2WUhIUlFkSElFVnlqMnlJRnNHTnlpMWs2R1dZc2ROWjNYZG85cndmd1E5cUZnVmZRYnVjTG43dXFmSWd2bGFfVWFWSmtpWkpndWNlSUNwcnFNU2NqZXFaV0xsY3l3SElLRkVHcHZGZERKV1ltcGhTS0dhTko1VTJLYzNoZjRkSGVEX3dTMWVVTmdDczV5cE1JQUdSbUJGUm11eFhTVjJHbkt0SzB4UG1Dc2xmbnp1Y041Y2RTeWRuWGdmQy1sTGx0MGtnM2VJQ3EyLXViRlNhTU9ybzZkR1N1bXE5SXhlZENWRFpWSGlYOWx4SUQ3UlR0ZEVxQkxNakRUVFRiUmFnbklOalphLUZkRFVVaXBRUk5NZW5PaUZydTFmQkNPSTdTVTNZd0plWXllNVFJdmN4MVcyTGlwMGFtVjBzOGRxR1FjbzhfYW5zdTB0ZEZBTTJhakltazh1dktNMUZsOUItdFdTb1pIaUxySllXNkdlY20zUS0wTnpFNTB2SU5acG1VcXhyaHBmME8takw3RDh5T043T2VGOV92TzNya2pWSlpYVjZDdXlZcjM3a0hPTlhkaW9oQmxqQlpGRFYyTTY4WmZmT3k4Tk1tdXRuSGdTUVpNT2NKenhXb05PdXBfSEdhMTNxNjdpNXlKUUI2YUgydFFPX1VvXzVJb0UxWTU2YVNiNDQ0QndZanhMMHR1cGdHWGhvcEg1QXEtSXZJdTdZUE12ZEVVWkF4QmtsQS1GYnY3SFIxSHlsOGVfcEpGS1A4QUVEQWNEOFZYYlljQ3ByTU03YU16Y0UzUnJQZEprSWNjT1ZXVEtDWi03Y3ZzRVdYUTlabXJISEo5THRHVXVuM0xqbzA4bGVlZVpOMk1QMmptb21tV0pTMlVoOXdWVU95UW1iQmttc2w1RG9mMWwxXzg1T2IxYUVmTUJEZkpUdTFDTzZ3RlBFeUFiX01iRTZNWkNaSG45TkFOM2pzbUJRZ2N0VFpoejJUTG1RODY3TzZpSzVkYUQzaEpfY2pSTkRzU0VpanlkdXVQQmJ2WU5peno4QWNLTDVxZTlhSHI3NnNiM0k0Y3JkQ0xaOU05bGtsQl8zQklvaktWSDZ4aVp2MHlYelJuUDJyTU9CZC1OZjJxNFc1dDcwSUlxaVh1LTMyWWFwU0IwUU9kOUFpMWpnOERtLTh1VmJiNGVwcXBMbU5fMjVZc0hFbmxQT2puSFd1ZGpyTkphLU5sVlBZWWxrWEZrWGJQWmVkN19tZFZfZ1l1V3pSWlA0V0ZxM2lrWnl2NU9WeTdCbDROSmhfeENKTFhMVXk1d195S2JMUFJoRXZjcVo4V2g0MTNKRnZhUE1wRkNPM3FZOGdVazJPeW5PSGpuZnFGTTdJMkRnam5rUlV6NFlqODlIelRYaEN5VjdJNnVwbllNODNCTFRHMWlXbmM1VlRxbXB3Wm9LRjVrQUpjYzRNMThUMWwwSVhBMUlyamtPZnE4R0o4bEdHay1zMjR5RDJkZ1lYRHZaNHVHU2otR3ZpN25LZlEySEU0UmdTNzJGVHNWQXMyb0dVMV9WUE13ODhZWUFaakxGOWZieGNXZkNYRnV5djEyWTZLcmdrajRBLU1rS1Z0VVRkOWlDMU9fMGVmYXFhZXJGMUhpNkdmb2hkbzZ1OWV6VlNmVzNISjVYTFh6SjJNdWR5MWZidE8yVEo2dnRrZXhMRXBPczUwTG13OGhNUVpIQm0zQmRKRnJ0Nl8wNW1Ob0dHRDVpU0NWREV3TkY2SjktdVBkMFU1ZXBmSFpHQ3FHNTRZdTJvaExpZVEtLTU4YTVyeFBpNDdEajZtWUc4c1dBeUJqQ3NIY1NLS0FIMUxGZzZxNFNkOG9ORGNHWWJCVnZuNnJVTEtoQi1mRTZyUl81ZWJJMi1KOGdERzBhNVRZeHRYUUlqY2JvMFlaNHhWMU9pWFFiZjdaLUhkaG15TTBPZVlkS2R5UVdENTI4QVFiY1RJV0ZNZnlpVWxfZmlnN1BXbGdrbjFGUkhzYl9qeHBxVVJacUE4bjZETENHVFpSamh0NVpOM2hMYTZjYzBuS3J0a3hhZGxSM1V5UHd2OTU3ZHY0Yy1xWDBkWUk0Ymp0MWVrS3YzSktKODhQZnY3QTZ1Wm1VZkZJbS1jamdreks1ZlhpQjFOUDFiOHJ2Nm9NcmdTdU5LQXV2RkZWZEFNZnVKUjVwcVY3dDdhQnpmRVJ6SmlvVXpDM0ZiYXh5bGE2X04tTE9qZ3BiTnN3TF9ZaFRxSUpjNjB1dXZBcy1TZHRHTjFjSUR3WUl4cE9VNzB5Rkk4U3Z1SVZYTl9sYXlZVk83UnFrMlVmcnBpam9lRUlCY19DdVJwOXl2TVVDV1pMRFZTZk9MY3Z1eXA0MnhGazc5YllQaWtOeTc4NjlOa2lGY05RRzY1cG9nbGpYelc4c3FicWxWRkg0YzRSamFlQ19zOU14YWJreU9pNDREZVJ3a0REMUxGTzF1XzI1bEF3VXVZRjlBeWFiLXJsOXgza3VZem1WckhWSnVNbDBNcldadU8xQ3RwOTl5NGgtVlR0QklCLWl5WkE4V1FlQTBCOVU1RE9sQlRrYUNZOGdfUmEwbEZvUTFGUEFWVmQ4V1FhOU9VNjZqemRpZm1sUDhZQTJ0YVBRbWZldkF5THV4QXpfdUtNZ0tlcGdSRFM3c0lDOTNQbnBxdmxYYWNpTmI3MW9BMlZIdTQ5RldudHpNQWQ5NDNPLVVTLXVVNzdHZXh4UXpZa3dVa2J4dTFDV1RkYjRnWXU2M3lJekRYWGNMcWU5OVh6U2xZWDh6MmpqcnpiOHlnMjA5S3RFQm1NZjNSM21adkVnTUpSYVhkTzNkNnJCTmljY0x1cl9kMkx3UHhySjZEdHREanZERzNEUTFlTkR0NWlBczAtdmFGTjdZNVpTMlkxV2czYW5RN2lqemg4eUViZDV6RjdKNXdFcUlvcVhoNkJ6eVJkR1pua1hnNzQwOEs2TXJYSlpGcW9qRDU2QjBOWFFtdXBJRkRKbmdZUF9ZSmRPVEtvUjVhLTV1NjdXQjRhS0duaEtJb2FrQnNjUTRvdFMxdkdTNk1NYlFHUFhhYTJ1eUN3WHN4UlJ4UjdrZjY0SzFGYWVFN1k0cGJnc1RjNmFUenR4NHljbVhablZSWHZmUVN3cXRHNjhsX1BSZWEzdTJUZFA0S2pTaU9YMnZIQ1ZPcGhWMFJqZkVEMWRMR1h3SnU0Z2FzZ3VGM3puNzdhVjhaQXNIWHFsbjB0TDVYSFdSNV9rdWhUUUhSZHBGYkJIVDB5SDdlMC13QTVnS0g5Qkg5RGNxSGJlelVndUhPcEQ0QkRKMTJTZUM1OXJhVm0zYjU0OVY2dk9MQVBheklIQXpVNW9Yc0ROVjEzaFZTWmVxYlBWMlNlSzladzJ6TmNuMG5FVVZkN1VZN1pfS2ZHa0lQcE80S24wSnQtVlJVV09OVWJ3M09YMkZpV2ktVF9ENHhKU2dfYUQ2aUVyamk0VHJHQmVfVHU4clpUTFoteW5aSWRPV1M0RDRMTms4NGRoYmJfVE82aUl2X3VieVJOdDhBQmRwdzdnRTVBNzZwaW93dUlZb3ZRYUtOeG9ULWxvNVp5a0haSjdkcUhRb3d6UGIxRUpCVkVYX2d6TkRqQVozUWxkNGFoc1FXYVd2YWNkME9Qclo0bjYxMFRWTy1nbnI5NTBJNzRMMDluUXRKYTFqQUN4d0d5aHVlamN3Tkk3NWJXeXR0TW9BeUg5Vnp4Q2RnZUY3b3AtMDlrNmlrSGR0eGRtbUdUd2lFRWg4MklEeWJHN2wwZEpVSXMxNDNOWjRFS0tPdWxhMmFCckhfRENIY184aEFDZXNrRDl2dHQtQW12UnRuQXJjaDJoTUpiYkNWQUtfRG9GMUZoNWM4UnBYZ29RWWs2NHcyUm5kdTF3Vk1GeFpiRUJLaVZ2UGFjbi1jV3lMV0N2ZDl4VERPN295X01NNG56ZjZkRzZoYUtmY1E5NlVXemx2SnVfb19iSXg0R2M3Mjd1a2JRPT0= Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnFIc3YtNDZzenJuZEZiQnVMOWRmZjl3R29QOWZRaGlPdk56WG1DR0FSZU5DM3dENWdoMmRpaks1U1VDNDJkZ3d3UXhSbXlkZ2h3SGZfdk54WXVidF82VkdJQXZiRTk0UlhZaUY1b2kwNzNPSm52VFdsdkwtaHJBb2dpRDBVLXRwd19Bb0dUZDkyV1VWZDJ1TG5mZ0ktYXpuS3U1U0JkZUk5TXpMdnhOaUtMN3BIb0pEZ1N0SlpFN3NNby15VTRfWWtxaF9DYjlJcnVKb0ZualVMTUx2aVNGY0JJdE1oZy1xSVBUZDF1aDM0TGVlTzVrNkFHcjlhcEk0SmRIMTFGdDFTMVUxX1dERk9NTXZMb0tVTFRoc20xME1uRkdVV0Z5N200ZTQzSjVsVExoa2VRZmFBU21ZczF0Vm9Ib3BZM2ZneDkwak12UmFyWWd0eng3ZVVFTUFLVzNOazcxeUhLVWUxcEFIZWtNRi1mT29kM1pqNGJJUUh3UVBlNGY3SlotOWZFUk5aQXFXcUFVdnUzc0Z5bERXYUNPbG14VnBNenFvb2tiQ3lZeHNHUVBlQTdTdVdXOEkxaGxCX016WWktWmN2WFcwM0VmVHdvMHVnY212VFE2cjJwUjdENkFCZF9GcUktWWpmWlNXNWVTMHBPdzVxRi15d3FSRDFra2k0NEFmTmpUeVh3SHRuZWE3WGJ4eUNIcE5tdnRqX2NCZnJoMEI2emU4U0ZYN1Nmdlhva1NacFo3UFh3WnpSdGw5ZmNpSGhicFo0ZThReXl3LW9vUzZaMkFHX2lJalFEMWtjZVdqbVpIZGk0cEdEU01TMl9xQkdSNDllTS1GV3lXS0xROTJvSlhaTjlXenJhQ3lOd2p0VjR5ZjEyektUZGJ3UThJOVJuMzhsTTVBVW9BcDFtcjk5Y0pVeW0zX3R0Nk81R3VDRWEzZnRqSXhFUW5ONHFTSWlwQU4yazlDb01KYlFQRjBFVTljdEJIY29WdF9hUkRJOThVTVFfWlJQUXI0Z3RzWFlzR1ZxUWFBd2I1SW1EMWlKdVprT3dKYTlaREp6TkZEZmVsZGEyalZGc3dHaUkyamdmQWtUT2czNzBCZEg0Vk1HSHFpRnhRYzBRNnN3TFkyaE9uMTVXN1VJTmJwbTNUMTdZbVRyc2d6Yl9aaVBXNmFvanROQVhfbWpXTDRlR1RfbklnYnJUQTZPX2JfNnlrWDVDUWJ4Z3YwNXVsTkJFQlRhTG5DVHpwejdsMGl1bzRfRXRTU2dmb3BVMUo4VkQwa0hsTmFBZnVjVzRrQmNzS2R0ZHNGV24yQnktWENtMUp6eG1MQW1ENE1vWFpFUF9PMEpWZVlxX05hSW1QUGlVT1l3MFp4bDBDZVVldHlEUlVCY1VvVlBNTlBhWFlmcVRobDNqRHo0QjZvNDBqVUVKN3JOb2dtYXQxSWw5NERSeEVRdHNUWndzUkY5RjdBOG1FZFRiVTNVSzl5bDNwdTl2SVd5aW5Ub2Q1YlBDRnpBUDkteU44YnV5X05ONmNndm9teUpqaFZVcVlHdGVRcXRpZkJLVnRuMTJSUFhGWndibExqRW03YUJTWXZXUXJ5WXlvd01ISDFuUFpaMFJzNFVQbWRUb2h1Zi1rcXJXMkRQSUFPeWFJN3lzOFc1d3BjWG1kbWlQWGUwelNiSnJXbUpnajdlQTlQR19XNTF0Q3JYcUMzaGp3eU0yZGhKa3FtX0tleHBfekZaWlRJRlZlSzNDVU56cml0TnFJeUc3b09uYVlwbGxFVFR6WFJVMzRmak5yWjBhcjl5ZmJpQ3hpajRXV1dwbDF5N25tNnI2bWtFem1TS08yV3JybUF0enYxRXpkUVdTNVp4WVB0aldJUUN3TnhHcHdMczh5MTFETzNWLXZFSktsdU1vM1JSNXhraDlJRDl0MEhvR1NOQWRaQW1NdzhpZnFVa1hvdXNwY2FvaThHQjVMOXdySnNIcWJlWERfLXVOcHhpN2ZZOW4yVzB3VTI2a3hvVmFkc29aX2ZUZkY5bi04WEV4MTlxNXQ4cTcwaHE4X3hDWkQxelRwSUl2amZOQ0JXRlJjRFhJNVhjNjRmaXp5eG15LTN1MFRvN3BHTFRZQ1ZFVFYyNUxleFpKTHlIVzRnVHk1Y3ZUbV9RUDdqN1Z2M2ZqVG8wa2RoVHJPeENFRDNHV0wwdi1DbEdOVDFJZnRiZGEydlZyM2tQVExOVlo3LXhIUnhZUnB6a2UzZXNtTjR0S2NzUmFNOWNiSHhHTnJDWHowWk1tbVFKUC14M25aQ1hyYjhJM2pxOEtZY0J1WTZrU3l6cDJOdk5iSXpBUk41MFFVellVZFU4UWVDZXFkQnJFbGxQX2J0S3pReU8zZUdsZUgtTnJuSlpfTjdxR3UxWTBEV0JaRV93eE9qa2dNa2tVTHRxMWNyeUh2VWNrYkdKM3BZOURkUlBxUDA3R2M4NnlMTVR2dmNMZi1lZlhzalRJWlFocGRleVRJYXBBY2hCXzFGZEU4ZVFxbHNic3RDV2FYN1dNaWpkaGdwYTEzRkZYRlEtRXR1cERHdnJKX1Zzb1Q0MnVYZkVhb0VYU1JPdFhoV29TMlhTaEppR1lTTURLYmZnNS1pSzl4T1k5MXJ0YV9qX0ZyQ1R6RFFzRndrTW9IUVlxcG5jcTEyYVU3dkpIR0tZZTZiOXNIRFpIalRtUDFBLVNyd1NfNUMtLW52NVpFZGpQenJCOGw0UlJZNlZVT1ZXTm92R3k4c3hTQXFoNFE3TUFHcjRWc01zT082anJZT0laakl5VUk1WDdDaWlubjIwS3RNcjBjTTdpbUNxSmxNR05JaWtEQURlS1h6N2h0NE9CcW5rQ3NXWkwyNXVBUU5mLTU5MG8xX29xZ0t6Z2pKWmhMNG1BNXBhYWkzY0loSmluUXNKdURwQWRIV2laM2dHQTFxV19lbkZXWmdfWEdiWEZsMGVIWDdoMnJ5dzM0ZGtBM3BSRVp2QzFNbFJSWXBManN5WmFVMlp6aUpWMF9jMTRPbWptM1lsTE41NG1kUW4tT0ZqTzNaZnZ5ZzBLZzNNc1N1X2FMMVJ0N3o4a25LMkxKVUE0dTNhU3hZX3RFMUtKcEgtX1B0cTdEMmYyMzdPaEhoeWhaUGRITC11NzRWYTJnZldiUkFvdG95a1RwWnNKaERkT0kxN1RJMzZQZzFiSjl1SlJieTJjaHBMYmZDUlhTT2hvQnRPaTNhS3NzaVc1Tms0X0FyUHRsSXdCLW1OUWk1RkRKc3pqSjVQTFFROEN5M3pxUGVjZHI4SVM3Qmx1S1A2bEEzNWlVWkFndGpUSm4wcV9jRjQ5T0l1c3ZqN0w3Z1dMV2ZtbU9MbTVSOXphX3VLMko2ZEs3U0NIaFFIMVFIcnN0OGIxSjdxNGlHUHRnOEJDaGwzcXJYNFBnOGdFSVFuSGUyOWJ3WmtlVGhGQWk0THdZd1hUbGRydk83SWVzWUJrb21tSlNvVkJjdWYtcWo0aEc1Ri1XNTZoSENaRWJISmp3UlJNMU9vSnNzZ0VudXpxMDA3aGdfSDBNZlA0Y1gybkF4dGl6SzFOc1VMN0dzVkQxVllkSDhyby12SWNxTFRYdThJUm13S3p3cGFYc05TbVc2YVNtZEdCOFBCUXhadkIzNmdkbXpnc1pLYUhzOEtsY2kxVmNYZm9wOS1LOERLRHJhY2VhanNjaThUZW1rS01wUW05SFJxOGd1VF9STlJZWDRiTV92dXlQTkdxN3BYYTN1SUhRSjRNTy1PZWpGd0xhUlVES0hiWE5LUkM5dHNvenR3TVMySC1ueUZXUkxFY2VyRmhISGc2U2ZxeXY2VkJULV9pOTU1QkI5VUNndnVQcVItTW96VTBqRTdzem1IQ1UxVWtWdjhvTERFeGJ6M3dJNERUV1BTeUlRcG1fbUVjQ0lNREF5QkpLeHJHRkFxQS1kZEE4bXJ2aVVSckVoTkZwNGtoRElIcUktQjA1bkNRclM4dWlqUVRXXzdlQ0VjQWZGSTZlR01NQmU5bHQ3bGNtZWU1eHVvRVdQRVU4Rmx0OFRTaWF3cGgyeFJoM25sRk1GNXJtdEpfcEJmYVFrZXd4eXl0c0ZKVjQ3MkFNRjh5bDBTbFZNd256dmxpQlo5Z1FRM1ZmVTJSb3VrZTk3cXVQYmZ6SnNUWGhlSUhrUjVWUHFwemNmbW1scWVxTkcxT1p5dVlvUjhCSVJaSnBjU0dpc3YzVkt1WUtrd2xoQlVNQXh1eDhmTXNISWMyUnBUMmIwamxlS0tjMVRiWDlBcE03b1BHR1FmdmlsX2ZlMTNCaFNvNG1TeTNiQXRNZ2Y1eE1IaFAxTUZGZ1YyZjEzTG9PaGRCdHJzVlB5Mm12T1NiX2RyT2d2RERCRWFHT0dadW5DZjNtdXE4cHhEQlpub2l3bz0=
# Teamsbot Browser Bot Service # Teamsbot Browser Bot Service
TEAMSBOT_BROWSER_BOT_URL = http://teamsbot.poweron.swiss:4100 TEAMSBOT_BROWSER_BOT_URL = http://teamsbot.poweron.swiss:4100

View file

@ -341,8 +341,8 @@ class ModelRegistry:
modelRegistry = ModelRegistry() modelRegistry = ModelRegistry()
# Eager pre-warm on first import: ensures connectors are ready in this process. # Eager pre-warm on first import: ensures connectors are ready in this process.
# Critical for chatbot performance — avoids 48 s latency on first request. # Critical for AI/agent performance — avoids 48 s latency on first request.
# Runs when this module is first imported (lifespan or first chatbot request). # Runs when this module is first imported (lifespan or first AI request).
def _eager_prewarm() -> None: def _eager_prewarm() -> None:
try: try:
modelRegistry.ensureConnectorsRegistered() modelRegistry.ensureConnectorsRegistered()

View file

@ -16,11 +16,7 @@ Models (next-gen — RTX PRO 6000 96 GB, auto-activated when pulled in Ollama):
- poweron-vision-general: Vision (llama4:scout); multimodal, long-context documents - poweron-vision-general: Vision (llama4:scout); multimodal, long-context documents
- poweron-embed: Embedding (nomic-embed-text); local RAG embedding - poweron-embed: Embedding (nomic-embed-text); local RAG embedding
Pricing (CHF per call): Pricing: byte-based (~per-token via bytes/4), configured via the PRICE_* constants below.
- Text models: CHF 0.010
- Vision models: CHF 0.100
- Reasoning models: CHF 0.050
- Embedding: CHF 0.000 (flat rate)
""" """
import logging import logging
@ -43,11 +39,20 @@ from modules.datamodels.datamodelAi import (
# Configure logger # Configure logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Pricing constants (CHF) # Pricing constants (CHF per 1k tokens; billed byte-based via bytes/4 ~ 1 token)
PRICE_TEXT_PER_CALL = 0.01 # CHF 0.010 per text model call PRICE_INPUT_PER_1K = 0.0075
PRICE_VISION_PER_CALL = 0.10 # CHF 0.100 per vision model call PRICE_OUTPUT_PER_1K = 0.0375
PRICE_REASONING_PER_CALL = 0.05 # CHF 0.050 per reasoning call (longer runtime) PRICE_EMBED_PER_1K = 0.0005
PRICE_EMBED_PER_CALL = 0.00 # CHF 0.000 flat rate (local embedding)
def _calcPrivatePriceCHF(processingTime, bytesSent, bytesReceived):
"""Byte-based price for private text/vision/reasoning models."""
return (bytesSent / 4 / 1000) * PRICE_INPUT_PER_1K + (bytesReceived / 4 / 1000) * PRICE_OUTPUT_PER_1K
def _calcPrivateEmbedPriceCHF(processingTime, bytesSent, bytesReceived):
"""Byte-based price for private embedding (input only)."""
return (bytesSent / 4 / 1000) * PRICE_EMBED_PER_1K
# Private-LLM Service URL (fix, nicht via env konfigurierbar) # Private-LLM Service URL (fix, nicht via env konfigurierbar)
@ -242,8 +247,8 @@ class AiPrivateLlm(BaseConnectorAi):
temperature=0.1, temperature=0.1,
maxTokens=4096, maxTokens=4096,
contextLength=8192, # Reduced for RAM constraints contextLength=8192, # Reduced for RAM constraints
costPer1kTokensInput=0.0, # Flat rate pricing costPer1kTokensInput=PRICE_INPUT_PER_1K,
costPer1kTokensOutput=0.0, # Flat rate pricing costPer1kTokensOutput=PRICE_OUTPUT_PER_1K,
speedRating=8, # Fast and efficient speedRating=8, # Fast and efficient
qualityRating=9, # High quality text model qualityRating=9, # High quality text model
functionCall=self.callAiText, functionCall=self.callAiText,
@ -259,7 +264,7 @@ class AiPrivateLlm(BaseConnectorAi):
(OperationTypeEnum.AGENT, 8), (OperationTypeEnum.AGENT, 8),
), ),
version="qwen2.5:7b", version="qwen2.5:7b",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_TEXT_PER_CALL calculatepriceCHF=_calcPrivatePriceCHF
), ),
"ollamaModel": "qwen2.5:7b" "ollamaModel": "qwen2.5:7b"
}, },
@ -273,8 +278,8 @@ class AiPrivateLlm(BaseConnectorAi):
temperature=0.2, temperature=0.2,
maxTokens=2048, maxTokens=2048,
contextLength=4096, # Reduced for RAM constraints (vision needs more) contextLength=4096, # Reduced for RAM constraints (vision needs more)
costPer1kTokensInput=0.0, # Flat rate pricing costPer1kTokensInput=PRICE_INPUT_PER_1K,
costPer1kTokensOutput=0.0, # Flat rate pricing costPer1kTokensOutput=PRICE_OUTPUT_PER_1K,
speedRating=7, speedRating=7,
qualityRating=9, qualityRating=9,
functionCall=self.callAiVision, functionCall=self.callAiVision,
@ -285,7 +290,7 @@ class AiPrivateLlm(BaseConnectorAi):
(OperationTypeEnum.NEUTRALIZATION_IMAGE, 9), (OperationTypeEnum.NEUTRALIZATION_IMAGE, 9),
), ),
version="qwen2.5vl:7b", version="qwen2.5vl:7b",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_VISION_PER_CALL calculatepriceCHF=_calcPrivatePriceCHF
), ),
"ollamaModel": "qwen2.5vl:7b" "ollamaModel": "qwen2.5vl:7b"
}, },
@ -299,8 +304,8 @@ class AiPrivateLlm(BaseConnectorAi):
temperature=0.1, temperature=0.1,
maxTokens=2048, maxTokens=2048,
contextLength=4096, # Reduced for RAM constraints contextLength=4096, # Reduced for RAM constraints
costPer1kTokensInput=0.0, # Flat rate pricing costPer1kTokensInput=PRICE_INPUT_PER_1K,
costPer1kTokensOutput=0.0, # Flat rate pricing costPer1kTokensOutput=PRICE_OUTPUT_PER_1K,
speedRating=9, # Fast due to small 2B model speedRating=9, # Fast due to small 2B model
qualityRating=8, # Good for document understanding qualityRating=8, # Good for document understanding
functionCall=self.callAiVision, functionCall=self.callAiVision,
@ -311,7 +316,7 @@ class AiPrivateLlm(BaseConnectorAi):
(OperationTypeEnum.NEUTRALIZATION_IMAGE, 9), (OperationTypeEnum.NEUTRALIZATION_IMAGE, 9),
), ),
version="granite3.2-vision", version="granite3.2-vision",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_VISION_PER_CALL calculatepriceCHF=_calcPrivatePriceCHF
), ),
"ollamaModel": "granite3.2-vision" "ollamaModel": "granite3.2-vision"
}, },
@ -326,8 +331,8 @@ class AiPrivateLlm(BaseConnectorAi):
temperature=0.1, temperature=0.1,
maxTokens=8192, maxTokens=8192,
contextLength=65536, contextLength=65536,
costPer1kTokensInput=0.0, costPer1kTokensInput=PRICE_INPUT_PER_1K,
costPer1kTokensOutput=0.0, costPer1kTokensOutput=PRICE_OUTPUT_PER_1K,
speedRating=5, speedRating=5,
qualityRating=10, qualityRating=10,
functionCall=self.callAiText, functionCall=self.callAiText,
@ -342,7 +347,7 @@ class AiPrivateLlm(BaseConnectorAi):
(OperationTypeEnum.AGENT, 9), (OperationTypeEnum.AGENT, 9),
), ),
version="deepseek-r1:70b", version="deepseek-r1:70b",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_REASONING_PER_CALL calculatepriceCHF=_calcPrivatePriceCHF
), ),
"ollamaModel": "deepseek-r1:70b" "ollamaModel": "deepseek-r1:70b"
}, },
@ -356,8 +361,8 @@ class AiPrivateLlm(BaseConnectorAi):
temperature=0.2, temperature=0.2,
maxTokens=4096, maxTokens=4096,
contextLength=131072, contextLength=131072,
costPer1kTokensInput=0.0, costPer1kTokensInput=PRICE_INPUT_PER_1K,
costPer1kTokensOutput=0.0, costPer1kTokensOutput=PRICE_OUTPUT_PER_1K,
speedRating=7, speedRating=7,
qualityRating=10, qualityRating=10,
functionCall=self.callAiVision, functionCall=self.callAiVision,
@ -368,7 +373,7 @@ class AiPrivateLlm(BaseConnectorAi):
(OperationTypeEnum.NEUTRALIZATION_IMAGE, 10), (OperationTypeEnum.NEUTRALIZATION_IMAGE, 10),
), ),
version="llama4:scout", version="llama4:scout",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_VISION_PER_CALL calculatepriceCHF=_calcPrivatePriceCHF
), ),
"ollamaModel": "llama4:scout" "ollamaModel": "llama4:scout"
}, },
@ -382,7 +387,7 @@ class AiPrivateLlm(BaseConnectorAi):
temperature=0.0, temperature=0.0,
maxTokens=0, maxTokens=0,
contextLength=8192, contextLength=8192,
costPer1kTokensInput=0.0, costPer1kTokensInput=PRICE_EMBED_PER_1K,
costPer1kTokensOutput=0.0, costPer1kTokensOutput=0.0,
speedRating=10, speedRating=10,
qualityRating=8, qualityRating=8,
@ -393,7 +398,7 @@ class AiPrivateLlm(BaseConnectorAi):
(OperationTypeEnum.EMBEDDING, 9), (OperationTypeEnum.EMBEDDING, 9),
), ),
version="nomic-embed-text", version="nomic-embed-text",
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_EMBED_PER_CALL calculatepriceCHF=_calcPrivateEmbedPriceCHF
), ),
"ollamaModel": "nomic-embed-text" "ollamaModel": "nomic-embed-text"
}, },

View file

@ -230,6 +230,7 @@ _CONNECT_TIMEOUT_S = 10
# `_BORROW_WAIT_TIMEOUT_S` seconds before giving up. # `_BORROW_WAIT_TIMEOUT_S` seconds before giving up.
_BORROW_WAIT_TIMEOUT_S = 30.0 _BORROW_WAIT_TIMEOUT_S = 30.0
_BORROW_WAIT_BACKOFF_S = 0.05 _BORROW_WAIT_BACKOFF_S = 0.05
_shuttingDown = False
def _resolvePoolMax() -> int: def _resolvePoolMax() -> int:
@ -315,7 +316,13 @@ class _PoolRegistry:
def closeAllPools() -> None: def closeAllPools() -> None:
"""Public entry point for FastAPI lifespan shutdown hook.""" """Public entry point for FastAPI lifespan shutdown hook.
Sets the shutdown flag first so that any in-flight ``_acquireConn`` loops
abort immediately instead of polling for up to 30 s.
"""
global _shuttingDown
_shuttingDown = True
_PoolRegistry.closeAll() _PoolRegistry.closeAll()
@ -590,7 +597,10 @@ class DatabaseConnector:
psycopg2's pool throws on exhaustion instead of queueing — this helper psycopg2's pool throws on exhaustion instead of queueing — this helper
polls with a short backoff so callers see queue semantics. polls with a short backoff so callers see queue semantics.
Aborts immediately when the application is shutting down.
""" """
if _shuttingDown:
raise psycopg2.pool.PoolError("Application is shutting down")
deadline = time.monotonic() + _BORROW_WAIT_TIMEOUT_S deadline = time.monotonic() + _BORROW_WAIT_TIMEOUT_S
attempt = 0 attempt = 0
while True: while True:
@ -598,6 +608,8 @@ class DatabaseConnector:
return pool.getconn() return pool.getconn()
except psycopg2.pool.PoolError as e: except psycopg2.pool.PoolError as e:
attempt += 1 attempt += 1
if _shuttingDown:
raise psycopg2.pool.PoolError("Application is shutting down")
if time.monotonic() >= deadline: if time.monotonic() >= deadline:
logger.error( logger.error(
"Connection pool exhausted after %.1fs wait (%d retries)", "Connection pool exhausted after %.1fs wait (%d retries)",

View file

@ -2,7 +2,9 @@
# All rights reserved. # All rights reserved.
"""Google ProviderConnector -- Drive and Gmail via Google OAuth.""" """Google ProviderConnector -- Drive and Gmail via Google OAuth."""
import asyncio
import logging import logging
import urllib.parse
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
import aiohttp import aiohttp
@ -18,6 +20,41 @@ _CALENDAR_BASE = "https://www.googleapis.com/calendar/v3"
_PEOPLE_BASE = "https://people.googleapis.com/v1" _PEOPLE_BASE = "https://people.googleapis.com/v1"
def _parseGoogleDateRange(text: Optional[str]) -> tuple:
"""Parse a date range from a filter/query string for Calendar timeMin/timeMax.
Supports two ISO dates, a single ISO date (~31 day window) or a YYYY-MM
month pattern. Returns RFC3339 UTC strings (timeMin, timeMax) or (None, None).
"""
import re
from datetime import datetime, timedelta
if not text:
return (None, None)
def _toRfc3339(value: str) -> str:
value = value.strip().rstrip("Z")
if "T" not in value:
value = f"{value}T00:00:00"
return f"{value}Z"
isoMatch = re.findall(r'\d{4}-\d{2}-\d{2}(?:T[\d:]+)?', text)
if len(isoMatch) >= 2:
return (_toRfc3339(isoMatch[0]), _toRfc3339(isoMatch[1]))
if len(isoMatch) == 1:
try:
dt = datetime.fromisoformat(isoMatch[0])
return (_toRfc3339(isoMatch[0]), _toRfc3339((dt + timedelta(days=31)).strftime('%Y-%m-%dT00:00:00')))
except ValueError:
pass
monthMatch = re.match(r'^(\d{4})-(\d{2})$', text.strip())
if monthMatch:
year, month = int(monthMatch.group(1)), int(monthMatch.group(2))
start = f"{year}-{month:02d}-01T00:00:00"
end = f"{year + 1}-01-01T00:00:00" if month == 12 else f"{year}-{month + 1:02d}-01T00:00:00"
return (_toRfc3339(start), _toRfc3339(end))
return (None, None)
async def _googleGet(token: str, url: str) -> Dict[str, Any]: async def _googleGet(token: str, url: str) -> Dict[str, Any]:
headers = {"Authorization": f"Bearer {token}"} headers = {"Authorization": f"Bearer {token}"}
timeout = aiohttp.ClientTimeout(total=20) timeout = aiohttp.ClientTimeout(total=20)
@ -33,6 +70,17 @@ async def _googleGet(token: str, url: str) -> Dict[str, Any]:
return {"error": str(e)} return {"error": str(e)}
def _raiseGoogleError(result: Dict[str, Any], ctx: str) -> None:
"""Raise a clear error for a failed Google API response.
Browse/search must NOT swallow API failures into an empty result list, which
masks a real error as 'empty'. Callers wrap these in try/except.
"""
err = result.get("error") if isinstance(result, dict) else None
logger.warning("Google error (%s): %s", ctx, err or result)
raise RuntimeError(f"Google error ({ctx}): {err or result}")
class DriveAdapter(ServiceAdapter): class DriveAdapter(ServiceAdapter):
"""Google Drive ServiceAdapter -- browse files and folders.""" """Google Drive ServiceAdapter -- browse files and folders."""
@ -53,8 +101,7 @@ class DriveAdapter(ServiceAdapter):
result = await _googleGet(self._token, url) result = await _googleGet(self._token, url)
if "error" in result: if "error" in result:
logger.warning(f"Google Drive browse failed: {result['error']}") _raiseGoogleError(result, "Google Drive browse")
return []
entries = [] entries = []
for f in result.get("files", []): for f in result.get("files", []):
@ -125,27 +172,51 @@ class DriveAdapter(ServiceAdapter):
path: Optional[str] = None, path: Optional[str] = None,
limit: Optional[int] = None, limit: Optional[int] = None,
) -> List[ExternalEntry]: ) -> List[ExternalEntry]:
safeQuery = query.replace("'", "\\'") safeQuery = query.replace("\\", "\\\\").replace("'", "\\'")
folderId = (path or "").strip("/") folderId = (path or "").strip("/")
qParts = [f"name contains '{safeQuery}'", "trashed=false"] # `fullText contains` matches file name AND content (and some metadata),
# which is what users expect from a search -- not just the file name.
qParts = [f"fullText contains '{safeQuery}'", "trashed=false"]
if folderId: if folderId:
qParts.append(f"'{folderId}' in parents") qParts.append(f"'{folderId}' in parents")
qStr = " and ".join(qParts) qStr = " and ".join(qParts)
pageSize = max(1, min(int(limit or 100), 1000)) effectiveLimit = max(1, int(limit)) if limit is not None else None
url = f"{_DRIVE_BASE}/files?q={qStr}&fields=files(id,name,mimeType,size)&pageSize={pageSize}" pageSize = min(effectiveLimit or 100, 1000)
logger.debug(f"Google Drive search: q={qStr}") logger.debug(f"Google Drive search: q={qStr}")
result = await _googleGet(self._token, url) entries: List[ExternalEntry] = []
if "error" in result: pageToken: Optional[str] = None
return [] hardCap = effectiveLimit or 1000
return [ while len(entries) < hardCap:
ExternalEntry( params = {
name=f.get("name", ""), "q": qStr,
path=f"/{f.get('id', '')}", "fields": "nextPageToken,files(id,name,mimeType,size,modifiedTime)",
isFolder=f.get("mimeType") == "application/vnd.google-apps.folder", "pageSize": str(pageSize),
size=int(f.get("size", 0)) if f.get("size") else None, }
) if pageToken:
for f in result.get("files", []) params["pageToken"] = pageToken
] url = f"{_DRIVE_BASE}/files?{urllib.parse.urlencode(params)}"
result = await _googleGet(self._token, url)
if "error" in result:
if not entries:
_raiseGoogleError(result, "Google Drive search")
break
for f in result.get("files", []):
entries.append(ExternalEntry(
name=f.get("name", ""),
path=f"/{f.get('id', '')}",
isFolder=f.get("mimeType") == "application/vnd.google-apps.folder",
size=int(f.get("size", 0)) if f.get("size") else None,
mimeType=f.get("mimeType"),
metadata={"id": f.get("id"), "modifiedTime": f.get("modifiedTime")},
))
if len(entries) >= hardCap:
break
pageToken = result.get("nextPageToken")
if not pageToken:
break
if effectiveLimit is not None:
entries = entries[:effectiveLimit]
return entries
class GmailAdapter(ServiceAdapter): class GmailAdapter(ServiceAdapter):
@ -155,7 +226,8 @@ class GmailAdapter(ServiceAdapter):
self._token = accessToken self._token = accessToken
_DEFAULT_MESSAGE_LIMIT = 100 _DEFAULT_MESSAGE_LIMIT = 100
_MAX_MESSAGE_LIMIT = 500 _MAX_MESSAGE_LIMIT = 1000
_METADATA_FETCH_CAP = 200
async def browse( async def browse(
self, self,
@ -169,8 +241,7 @@ class GmailAdapter(ServiceAdapter):
url = f"{_GMAIL_BASE}/users/me/labels" url = f"{_GMAIL_BASE}/users/me/labels"
result = await _googleGet(self._token, url) result = await _googleGet(self._token, url)
if "error" in result: if "error" in result:
logger.warning(f"Gmail labels failed: {result['error']}") _raiseGoogleError(result, "Gmail labels")
return []
_SYSTEM_LABELS = {"INBOX", "SENT", "DRAFT", "TRASH", "SPAM", "STARRED", "IMPORTANT"} _SYSTEM_LABELS = {"INBOX", "SENT", "DRAFT", "TRASH", "SPAM", "STARRED", "IMPORTANT"}
labels = [] labels = []
for lbl in result.get("labels", []): for lbl in result.get("labels", []):
@ -188,23 +259,116 @@ class GmailAdapter(ServiceAdapter):
return labels return labels
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT)) effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
url = f"{_GMAIL_BASE}/users/me/messages?labelIds={cleanPath}&maxResults={effectiveLimit}" labelId = await self._resolveLabelId(cleanPath)
result = await _googleGet(self._token, url) if not labelId:
if "error" in result: raise ValueError(
return [] f"Gmail label not found: '{cleanPath}'. Browse the mailbox root ('/') "
f"to list available labels."
)
msgIds, totalEstimate = await self._listMessageIds(
params={"labelIds": labelId}, limit=effectiveLimit,
)
entries = await self._fetchMessageEntries(
msgIds[:self._METADATA_FETCH_CAP], labelPath=labelId,
)
if totalEstimate and totalEstimate > len(msgIds):
entries.append(ExternalEntry(
name=f"(~{totalEstimate} total messages estimated, {len(msgIds)} listed)",
path=f"/{labelId}/_count", isFolder=False,
metadata={"totalEstimate": totalEstimate, "listed": len(msgIds)},
))
elif len(msgIds) > self._METADATA_FETCH_CAP:
entries.append(ExternalEntry(
name=f"({len(msgIds)} messages listed, metadata shown for first {self._METADATA_FETCH_CAP})",
path=f"/{labelId}/_count", isFolder=False,
metadata={"listed": len(msgIds), "metadataShown": self._METADATA_FETCH_CAP},
))
return entries
entries = [] async def _resolveLabelId(self, ref: str) -> Optional[str]:
for msg in result.get("messages", [])[:effectiveLimit]: """Resolve a Gmail label reference (display name / system name / id) to a
msgId = msg.get("id", "") label id. Returns None if nothing matches so the caller can raise a clear
detailUrl = f"{_GMAIL_BASE}/users/me/messages/{msgId}?format=metadata&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=Date" error instead of querying with an invalid label."""
if not ref:
return None
r = ref.strip()
result = await _googleGet(self._token, f"{_GMAIL_BASE}/users/me/labels")
if "error" in result:
_raiseGoogleError(result, "Gmail labels")
labels = result.get("labels", [])
# 1) exact id match (already-resolved id passes through)
for lbl in labels:
if lbl.get("id") == r:
return r
# 2) case-insensitive display-name match
for lbl in labels:
if (lbl.get("name") or "").strip().lower() == r.lower():
return lbl.get("id")
# 3) system label by uppercased name (INBOX, SENT, ...)
up = r.upper()
for lbl in labels:
if lbl.get("id") == up:
return up
return None
async def _listMessageIds(
self, params: Dict[str, str], limit: int,
) -> tuple:
"""Page through ``messages.list`` and return (msgIds, totalEstimate).
Gmail's ``maxResults`` caps at 500 per page, so we follow
``nextPageToken`` until we have ``limit`` ids or there are no more pages.
``resultSizeEstimate`` from the first page gives the agent an approximate
total count without having to download every message.
"""
msgIds: List[str] = []
totalEstimate: Optional[int] = None
pageToken: Optional[str] = None
pageSize = min(limit, 500)
while len(msgIds) < limit:
p = {**params, "maxResults": str(pageSize)}
if pageToken:
p["pageToken"] = pageToken
url = f"{_GMAIL_BASE}/users/me/messages?{urllib.parse.urlencode(p)}"
result = await _googleGet(self._token, url)
if "error" in result:
if not msgIds:
_raiseGoogleError(result, "Gmail list messages")
break
if totalEstimate is None:
totalEstimate = result.get("resultSizeEstimate")
for m in result.get("messages", []):
mid = m.get("id", "")
if mid:
msgIds.append(mid)
if len(msgIds) >= limit:
break
pageToken = result.get("nextPageToken")
if not pageToken:
break
return msgIds, totalEstimate
async def _fetchMessageEntries(self, msgIds: List[str], labelPath: str = "") -> List[ExternalEntry]:
"""Resolve a list of Gmail message ids into ExternalEntries with
Subject/From/Date metadata. Detail fetches run concurrently to avoid a
slow sequential N+1 round-trip per message."""
if not msgIds:
return []
pathPrefix = f"/{labelPath}" if labelPath else ""
async def _one(msgId: str) -> ExternalEntry:
detailUrl = (
f"{_GMAIL_BASE}/users/me/messages/{msgId}"
f"?format=metadata&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=Date"
)
detail = await _googleGet(self._token, detailUrl) detail = await _googleGet(self._token, detailUrl)
if "error" in detail: if "error" in detail:
entries.append(ExternalEntry(name=f"Message {msgId}", path=f"/{cleanPath}/{msgId}", isFolder=False)) return ExternalEntry(name=f"Message {msgId}", path=f"{pathPrefix}/{msgId}", isFolder=False,
continue metadata={"id": msgId})
headers = {h.get("name", ""): h.get("value", "") for h in detail.get("payload", {}).get("headers", [])} headers = {h.get("name", ""): h.get("value", "") for h in detail.get("payload", {}).get("headers", [])}
entries.append(ExternalEntry( return ExternalEntry(
name=headers.get("Subject", "(no subject)"), name=headers.get("Subject", "(no subject)"),
path=f"/{cleanPath}/{msgId}", path=f"{pathPrefix}/{msgId}",
isFolder=False, isFolder=False,
metadata={ metadata={
"id": msgId, "id": msgId,
@ -212,8 +376,9 @@ class GmailAdapter(ServiceAdapter):
"date": headers.get("Date", ""), "date": headers.get("Date", ""),
"snippet": detail.get("snippet", ""), "snippet": detail.get("snippet", ""),
}, },
)) )
return entries
return list(await asyncio.gather(*[_one(mid) for mid in msgIds]))
async def download(self, path: str) -> DownloadResult: async def download(self, path: str) -> DownloadResult:
"""Download a Gmail message as RFC 822 EML via format=raw.""" """Download a Gmail message as RFC 822 EML via format=raw."""
@ -261,19 +426,34 @@ class GmailAdapter(ServiceAdapter):
limit: Optional[int] = None, limit: Optional[int] = None,
) -> list: ) -> list:
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT)) effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
url = f"{_GMAIL_BASE}/users/me/messages?q={query}&maxResults={effectiveLimit}" params: Dict[str, str] = {"q": query}
result = await _googleGet(self._token, url) labelPath = (path or "").strip("/")
if "error" in result: if labelPath:
return [] labelId = await self._resolveLabelId(labelPath)
return [ if not labelId:
ExternalEntry( raise ValueError(
name=f"Message {m.get('id', '')}", f"Gmail label not found: '{labelPath}'. Browse the mailbox root ('/') "
path=f"/{m.get('id', '')}", f"to list available labels, or search without a label scope."
isFolder=False, )
metadata={"id": m.get("id")}, labelPath = labelId
) params["labelIds"] = labelId
for m in result.get("messages", []) msgIds, totalEstimate = await self._listMessageIds(params, limit=effectiveLimit)
] entries = await self._fetchMessageEntries(
msgIds[:self._METADATA_FETCH_CAP], labelPath=labelPath,
)
if totalEstimate and totalEstimate > len(msgIds):
entries.append(ExternalEntry(
name=f"(~{totalEstimate} total results estimated, {len(msgIds)} listed)",
path=f"/{labelPath or 'search'}/_count", isFolder=False,
metadata={"totalEstimate": totalEstimate, "listed": len(msgIds)},
))
elif len(msgIds) > self._METADATA_FETCH_CAP:
entries.append(ExternalEntry(
name=f"({len(msgIds)} results listed, metadata shown for first {self._METADATA_FETCH_CAP})",
path=f"/{labelPath or 'search'}/_count", isFolder=False,
metadata={"listed": len(msgIds), "metadataShown": self._METADATA_FETCH_CAP},
))
return entries
class CalendarAdapter(ServiceAdapter): class CalendarAdapter(ServiceAdapter):
@ -302,8 +482,7 @@ class CalendarAdapter(ServiceAdapter):
url = f"{_CALENDAR_BASE}/users/me/calendarList?maxResults=250" url = f"{_CALENDAR_BASE}/users/me/calendarList?maxResults=250"
result = await _googleGet(self._token, url) result = await _googleGet(self._token, url)
if "error" in result: if "error" in result:
logger.warning(f"Google Calendar list failed: {result['error']}") _raiseGoogleError(result, "Google Calendar list")
return []
calendars = result.get("items", []) calendars = result.get("items", [])
if filter: if filter:
f = filter.lower() f = filter.lower()
@ -331,10 +510,14 @@ class CalendarAdapter(ServiceAdapter):
f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events" f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
f"?maxResults={effectiveLimit}&orderBy=startTime&singleEvents=true" f"?maxResults={effectiveLimit}&orderBy=startTime&singleEvents=true"
) )
# Restrict to a date window when the filter is a date range, so large
# multi-year calendars only return the relevant period.
timeMin, timeMax = _parseGoogleDateRange(filter)
if timeMin and timeMax:
url += f"&timeMin={quote(timeMin, safe='')}&timeMax={quote(timeMax, safe='')}"
result = await _googleGet(self._token, url) result = await _googleGet(self._token, url)
if "error" in result: if "error" in result:
logger.warning(f"Google Calendar events failed: {result['error']}") _raiseGoogleError(result, "Google Calendar events")
return []
events = result.get("items", []) events = result.get("items", [])
return [ return [
ExternalEntry( ExternalEntry(
@ -387,13 +570,23 @@ class CalendarAdapter(ServiceAdapter):
from urllib.parse import quote from urllib.parse import quote
calendarId = (path or "").strip("/").split("/", 1)[0] or "primary" calendarId = (path or "").strip("/").split("/", 1)[0] or "primary"
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT)) effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
url = ( # A date-range query maps to timeMin/timeMax (efficient window fetch);
f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events" # otherwise fall back to the free-text q parameter.
f"?q={quote(query, safe='')}&maxResults={effectiveLimit}&singleEvents=true" timeMin, timeMax = _parseGoogleDateRange(query)
) if timeMin and timeMax:
url = (
f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
f"?timeMin={quote(timeMin, safe='')}&timeMax={quote(timeMax, safe='')}"
f"&maxResults={effectiveLimit}&orderBy=startTime&singleEvents=true"
)
else:
url = (
f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
f"?q={quote(query, safe='')}&maxResults={effectiveLimit}&singleEvents=true"
)
result = await _googleGet(self._token, url) result = await _googleGet(self._token, url)
if "error" in result: if "error" in result:
return [] _raiseGoogleError(result, "Google Calendar search")
return [ return [
ExternalEntry( ExternalEntry(
name=ev.get("summary", "(no title)"), name=ev.get("summary", "(no title)"),
@ -479,8 +672,7 @@ class ContactsAdapter(ServiceAdapter):
) )
result = await _googleGet(self._token, url) result = await _googleGet(self._token, url)
if "error" in result: if "error" in result:
logger.warning(f"Google People connections failed: {result['error']}") _raiseGoogleError(result, "Google People connections")
return []
people = result.get("connections", []) people = result.get("connections", [])
else: else:
groupResource = groupRef groupResource = groupRef
@ -490,8 +682,7 @@ class ContactsAdapter(ServiceAdapter):
) )
grpResult = await _googleGet(self._token, grpUrl) grpResult = await _googleGet(self._token, grpUrl)
if "error" in grpResult: if "error" in grpResult:
logger.warning(f"Google contactGroup detail failed: {grpResult['error']}") _raiseGoogleError(grpResult, "Google contactGroup detail")
return []
memberResourceNames = grpResult.get("memberResourceNames") or [] memberResourceNames = grpResult.get("memberResourceNames") or []
if not memberResourceNames: if not memberResourceNames:
return [] return []
@ -568,7 +759,7 @@ class ContactsAdapter(ServiceAdapter):
) )
result = await _googleGet(self._token, url) result = await _googleGet(self._token, url)
if "error" in result: if "error" in result:
return [] _raiseGoogleError(result, "Google Contacts search")
entries: List[ExternalEntry] = [] entries: List[ExternalEntry] = []
for r in result.get("results", []): for r in result.get("results", []):
p = r.get("person") or {} p = r.get("person") or {}
@ -581,6 +772,8 @@ class ContactsAdapter(ServiceAdapter):
metadata={ metadata={
"id": p.get("resourceName"), "id": p.get("resourceName"),
"emails": [e.get("value") for e in (p.get("emailAddresses") or []) if e.get("value")], "emails": [e.get("value") for e in (p.get("emailAddresses") or []) if e.get("value")],
"phones": [pn.get("value") for pn in (p.get("phoneNumbers") or []) if pn.get("value")],
"organization": (p.get("organizations") or [{}])[0].get("name") if p.get("organizations") else None,
}, },
) )
) )

View file

@ -96,6 +96,17 @@ async def _infomaniakGet(
return {"error": str(e)} return {"error": str(e)}
def _raiseInfomaniakError(result: Dict[str, Any], ctx: str) -> None:
"""Raise a clear error for a failed Infomaniak API response.
Browse/search must NOT swallow API failures into an empty result list, which
masks a real error as 'empty'. Callers wrap these in try/except.
"""
err = result.get("error") if isinstance(result, dict) else None
logger.warning("Infomaniak error (%s): %s", ctx, err or result)
raise RuntimeError(f"Infomaniak error ({ctx}): {err or result}")
async def _infomaniakDownload( async def _infomaniakDownload(
token: str, token: str,
endpoint: str, endpoint: str,
@ -358,10 +369,7 @@ class KdriveAdapter(ServiceAdapter):
result = await _infomaniakGet(self._token, endpoint) result = await _infomaniakGet(self._token, endpoint)
if isinstance(result, dict) and result.get("error"): if isinstance(result, dict) and result.get("error"):
logger.warning( _raiseInfomaniakError(result, f"kDrive list-children {driveId}/{fileId or 'root'}")
f"kDrive list-children {driveId}/{fileId or 'root'} failed: {result['error']}"
)
return []
data = _unwrapData(result) data = _unwrapData(result)
items = data if isinstance(data, list) else data.get("items", []) if isinstance(data, dict) else [] items = data if isinstance(data, list) else data.get("items", []) if isinstance(data, dict) else []
@ -426,7 +434,7 @@ class KdriveAdapter(ServiceAdapter):
endpoint = f"/2/drive/{driveId}/files/search?query={query}&per_page={pageSize}" endpoint = f"/2/drive/{driveId}/files/search?query={query}&per_page={pageSize}"
result = await _infomaniakGet(self._token, endpoint) result = await _infomaniakGet(self._token, endpoint)
if isinstance(result, dict) and result.get("error"): if isinstance(result, dict) and result.get("error"):
return [] _raiseInfomaniakError(result, "kDrive search")
data = _unwrapData(result) data = _unwrapData(result)
items = data if isinstance(data, list) else data.get("items", []) if isinstance(data, dict) else [] items = data if isinstance(data, list) else data.get("items", []) if isinstance(data, dict) else []
@ -495,7 +503,7 @@ class CalendarAdapter(ServiceAdapter):
if not segments: if not segments:
return await self._listCalendars() return await self._listCalendars()
if len(segments) == 1: if len(segments) == 1:
return await self._listEvents(segments[0], limit=limit) return await self._listEvents(segments[0], limit=limit, filter=filter)
return [] return []
async def _listCalendars(self) -> List[ExternalEntry]: async def _listCalendars(self) -> List[ExternalEntry]:
@ -503,8 +511,7 @@ class CalendarAdapter(ServiceAdapter):
self._token, f"{_PIM_PREFIX}/calendar", baseUrl=_CALENDAR_BASE self._token, f"{_PIM_PREFIX}/calendar", baseUrl=_CALENDAR_BASE
) )
if isinstance(result, dict) and result.get("error"): if isinstance(result, dict) and result.get("error"):
logger.warning(f"Calendar list-calendars failed: {result['error']}") _raiseInfomaniakError(result, "Calendar list-calendars")
return []
data = _unwrapData(result) data = _unwrapData(result)
calendars = data.get("calendars", []) if isinstance(data, dict) else [] calendars = data.get("calendars", []) if isinstance(data, dict) else []
entries: List[ExternalEntry] = [] entries: List[ExternalEntry] = []
@ -527,18 +534,64 @@ class CalendarAdapter(ServiceAdapter):
)) ))
return entries return entries
def _eventWindow(self) -> tuple: def _eventWindow(self, filter: Optional[str] = None) -> tuple:
# Honour an explicit date range from the agent (e.g. "2026-06" or
# "2026-06-01 2026-06-30"), clamped to the vendor's <3 month limit.
# Otherwise fall back to the default 90-day browsing window.
rng = self._parseFilterWindow(filter)
if rng:
return rng
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
fromStr = (now - timedelta(days=self._PAST_DAYS)).strftime("%Y-%m-%d %H:%M:%S") fromStr = (now - timedelta(days=self._PAST_DAYS)).strftime("%Y-%m-%d %H:%M:%S")
toStr = (now + timedelta(days=self._FUTURE_DAYS)).strftime("%Y-%m-%d %H:%M:%S") toStr = (now + timedelta(days=self._FUTURE_DAYS)).strftime("%Y-%m-%d %H:%M:%S")
return fromStr, toStr return fromStr, toStr
@staticmethod
def _parseFilterWindow(filter: Optional[str]) -> Optional[tuple]:
"""Parse a date range from a filter string into Infomaniak's
'Y-m-d H:i:s' from/to window, clamped to <3 months. Returns None when
the filter is not a parseable date range."""
if not filter:
return None
iso = re.findall(r'\d{4}-\d{2}-\d{2}', filter)
start = end = None
if len(iso) >= 2:
start, end = iso[0], iso[1]
elif len(iso) == 1:
start = iso[0]
else:
month = re.match(r'^(\d{4})-(\d{2})$', filter.strip())
if not month:
return None
year, mon = int(month.group(1)), int(month.group(2))
start = f"{year}-{mon:02d}-01"
end = f"{year + 1}-01-01" if mon == 12 else f"{year}-{mon + 1:02d}-01"
try:
startDt = datetime.fromisoformat(start)
except ValueError:
return None
if end:
try:
endDt = datetime.fromisoformat(end)
except ValueError:
endDt = startDt + timedelta(days=31)
else:
endDt = startDt + timedelta(days=31)
# Clamp to vendor limit (<3 months).
if endDt - startDt > timedelta(days=85):
endDt = startDt + timedelta(days=85)
return (
startDt.strftime("%Y-%m-%d %H:%M:%S"),
endDt.strftime("%Y-%m-%d %H:%M:%S"),
)
async def _listEvents( async def _listEvents(
self, self,
calendarId: str, calendarId: str,
limit: Optional[int], limit: Optional[int],
filter: Optional[str] = None,
) -> List[ExternalEntry]: ) -> List[ExternalEntry]:
fromStr, toStr = self._eventWindow() fromStr, toStr = self._eventWindow(filter)
endpoint = ( endpoint = (
f"{_PIM_PREFIX}/event" f"{_PIM_PREFIX}/event"
f"?calendar_id={calendarId}" f"?calendar_id={calendarId}"
@ -547,8 +600,7 @@ class CalendarAdapter(ServiceAdapter):
) )
result = await _infomaniakGet(self._token, endpoint, baseUrl=_CALENDAR_BASE) result = await _infomaniakGet(self._token, endpoint, baseUrl=_CALENDAR_BASE)
if isinstance(result, dict) and result.get("error"): if isinstance(result, dict) and result.get("error"):
logger.warning(f"Calendar list-events {calendarId} failed: {result['error']}") _raiseInfomaniakError(result, f"Calendar list-events {calendarId}")
return []
data = _unwrapData(result) data = _unwrapData(result)
events = data if isinstance(data, list) else data.get("events", []) if isinstance(data, dict) else [] events = data if isinstance(data, list) else data.get("events", []) if isinstance(data, dict) else []
entries: List[ExternalEntry] = [] entries: List[ExternalEntry] = []
@ -626,11 +678,14 @@ class CalendarAdapter(ServiceAdapter):
) )
if not calendars: if not calendars:
return [] return []
needle = (query or "").strip().lower() # A date-range query maps directly to the event window; a free-text
# query keeps the default window and filters on title/location.
dateWindow = self._parseFilterWindow(query)
needle = "" if dateWindow else (query or "").strip().lower()
results: List[ExternalEntry] = [] results: List[ExternalEntry] = []
for cal in calendars: for cal in calendars:
calId = (cal.metadata or {}).get("id") or cal.path.strip("/") calId = (cal.metadata or {}).get("id") or cal.path.strip("/")
for ev in await self._listEvents(calId, limit=limit): for ev in await self._listEvents(calId, limit=limit, filter=query if dateWindow else None):
hay = " ".join( hay = " ".join(
str(v) for v in ( str(v) for v in (
ev.name, ev.name,
@ -768,8 +823,7 @@ class ContactAdapter(ServiceAdapter):
self._token, f"{_PIM_PREFIX}/addressbook", baseUrl=_CONTACTS_BASE self._token, f"{_PIM_PREFIX}/addressbook", baseUrl=_CONTACTS_BASE
) )
if isinstance(result, dict) and result.get("error"): if isinstance(result, dict) and result.get("error"):
logger.warning(f"Contacts list-addressbooks failed: {result['error']}") _raiseInfomaniakError(result, "Contacts list-addressbooks")
return []
data = _unwrapData(result) data = _unwrapData(result)
books = data.get("addressbooks", []) if isinstance(data, dict) else [] books = data.get("addressbooks", []) if isinstance(data, dict) else []
entries: List[ExternalEntry] = [] entries: List[ExternalEntry] = []
@ -809,10 +863,7 @@ class ContactAdapter(ServiceAdapter):
) )
result = await _infomaniakGet(self._token, endpoint, baseUrl=_CONTACTS_BASE) result = await _infomaniakGet(self._token, endpoint, baseUrl=_CONTACTS_BASE)
if isinstance(result, dict) and result.get("error"): if isinstance(result, dict) and result.get("error"):
logger.warning( _raiseInfomaniakError(result, f"Contacts list-contacts {addressBookId}")
f"Contacts list-contacts {addressBookId} failed: {result['error']}"
)
return []
data = _unwrapData(result) data = _unwrapData(result)
if isinstance(data, list): if isinstance(data, list):
return [c for c in data if isinstance(c, dict)] return [c for c in data if isinstance(c, dict)]

View file

@ -9,6 +9,7 @@ UserConnection (authority=msft).
import logging import logging
import aiohttp import aiohttp
import asyncio import asyncio
import urllib.parse
from typing import Dict, Any, List, Optional from typing import Dict, Any, List, Optional
from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter, DownloadResult from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter, DownloadResult
@ -69,6 +70,8 @@ async def _makeGraphCall(
"Authorization": f"Bearer {token}", "Authorization": f"Bearer {token}",
"Content-Type": contentType, "Content-Type": contentType,
} }
if "$count=true" in endpoint:
headers["ConsistencyLevel"] = "eventual"
timeout = aiohttp.ClientTimeout(total=30) timeout = aiohttp.ClientTimeout(total=30)
try: try:
async with aiohttp.ClientSession(timeout=timeout) as session: async with aiohttp.ClientSession(timeout=timeout) as session:
@ -124,6 +127,18 @@ def _stripGraphBase(url: str) -> str:
return url return url
def _raiseGraphError(result: Dict[str, Any], ctx: str) -> None:
"""Raise a clear error for a failed Graph response.
Browse/search must NOT swallow API failures into an empty result list, which
makes a real error look like 'empty directory'. Callers (data-source tools,
tree-builder, sync jobs) already wrap these in try/except.
"""
err = result.get("error") if isinstance(result, dict) else None
logger.warning("Graph error (%s): %s", ctx, err or result)
raise RuntimeError(f"Graph error ({ctx}): {err or result}")
def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> ExternalEntry: def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> ExternalEntry:
isFolder = "folder" in item isFolder = "folder" in item
# Graph exposes the driveItem content hash as ``eTag`` (quoted) or # Graph exposes the driveItem content hash as ``eTag`` (quoted) or
@ -189,7 +204,8 @@ class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
while endpoint and len(items) < hardCap: while endpoint and len(items) < hardCap:
result = await self._graphGet(endpoint) result = await self._graphGet(endpoint)
if "error" in result: if "error" in result:
logger.warning(f"SharePoint browse failed: {result['error']}") if not items:
_raiseGraphError(result, "SharePoint browse")
break break
for raw in result.get("value", []) or []: for raw in result.get("value", []) or []:
items.append(raw) items.append(raw)
@ -211,8 +227,7 @@ class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
"""Discover accessible SharePoint sites.""" """Discover accessible SharePoint sites."""
result = await self._graphGet("sites?search=*&$top=50") result = await self._graphGet("sites?search=*&$top=50")
if "error" in result: if "error" in result:
logger.warning(f"SharePoint site discovery failed: {result['error']}") _raiseGraphError(result, "SharePoint site discovery")
return []
return [ return [
ExternalEntry( ExternalEntry(
name=s.get("displayName") or s.get("name", ""), name=s.get("displayName") or s.get("name", ""),
@ -253,17 +268,37 @@ class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
path: Optional[str] = None, path: Optional[str] = None,
limit: Optional[int] = None, limit: Optional[int] = None,
) -> List[ExternalEntry]: ) -> List[ExternalEntry]:
siteId, _ = _parseSharepointPath(path or "") siteId, folderPath = _parseSharepointPath(path or "")
if not siteId: if not siteId:
return [] return []
safeQuery = query.replace("'", "''") safeQuery = query.replace("'", "''")
endpoint = f"sites/{siteId}/drive/root/search(q='{safeQuery}')" cleanFolder = (folderPath or "").strip("/")
result = await self._graphGet(endpoint) # Scope the search to the attached folder when one is given, so the agent
if "error" in result: # does not get hits from unrelated parts of the site drive.
return [] if cleanFolder:
entries = [_graphItemToExternalEntry(item) for item in result.get("value", [])] endpoint: Optional[str] = f"sites/{siteId}/drive/root:/{cleanFolder}:/search(q='{safeQuery}')?$top=200"
if limit is not None: else:
entries = entries[: max(1, int(limit))] endpoint = f"sites/{siteId}/drive/root/search(q='{safeQuery}')?$top=200"
effectiveLimit = int(limit) if limit is not None else None
items: List[Dict[str, Any]] = []
hardCap = 1000
while endpoint and len(items) < hardCap:
result = await self._graphGet(endpoint)
if "error" in result:
if not items:
_raiseGraphError(result, "SharePoint search")
break
for raw in result.get("value", []) or []:
items.append(raw)
if effectiveLimit is not None and len(items) >= effectiveLimit:
break
if effectiveLimit is not None and len(items) >= effectiveLimit:
break
nextLink = result.get("@odata.nextLink")
endpoint = _stripGraphBase(nextLink) if nextLink else None
entries = [_graphItemToExternalEntry(item) for item in items]
if effectiveLimit is not None:
entries = entries[: max(1, effectiveLimit)]
return entries return entries
@ -273,6 +308,50 @@ class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
_CHARSET_META = '<meta charset="utf-8">' _CHARSET_META = '<meta charset="utf-8">'
def _parseDateRange(filterStr: Optional[str]) -> tuple:
"""Parse a date range from a filter/query string.
Supports two ISO dates ("2026-06-01 2026-06-30"), a single ISO date
(treated as a ~31 day window), or a YYYY-MM month pattern. Returns
(startDateTime, endDateTime) ISO strings, or (None, None) if not parseable.
"""
import re
from datetime import datetime, timedelta
if not filterStr:
return (None, None)
isoMatch = re.findall(r'\d{4}-\d{2}-\d{2}(?:T[\d:]+)?', filterStr)
if len(isoMatch) >= 2:
return (isoMatch[0], isoMatch[1])
if len(isoMatch) == 1:
try:
dt = datetime.fromisoformat(isoMatch[0])
return (isoMatch[0], (dt + timedelta(days=31)).strftime('%Y-%m-%dT00:00:00'))
except ValueError:
pass
monthMatch = re.match(r'^(\d{4})-(\d{2})$', filterStr.strip())
if monthMatch:
year, month = int(monthMatch.group(1)), int(monthMatch.group(2))
start = f"{year}-{month:02d}-01T00:00:00"
if month == 12:
end = f"{year + 1}-01-01T00:00:00"
else:
end = f"{year}-{month + 1:02d}-01T00:00:00"
return (start, end)
return (None, None)
def _toGraphUtc(isoStr: str) -> str:
"""Normalise an ISO date/datetime to a Graph-compatible UTC string
(always 'YYYY-MM-DDTHH:MM:SSZ')."""
if not isoStr:
return isoStr
value = isoStr.strip().rstrip("Z")
if "T" not in value:
value = f"{value}T00:00:00"
return f"{value}Z"
def _ensureHtmlCharset(html: str) -> str: def _ensureHtmlCharset(html: str) -> str:
"""Ensure HTML body has a charset meta tag so Outlook renders UTF-8 correctly.""" """Ensure HTML body has a charset meta tag so Outlook renders UTF-8 correctly."""
if "charset" in html.lower(): if "charset" in html.lower():
@ -350,25 +429,62 @@ class OutlookAdapter(_GraphApiMixin, ServiceAdapter):
for f in folders for f in folders
] ]
folderId = path.strip("/") # The incoming path segment may be a display name ("MGB-Ablage"), a
# well-known shortcut ("inbox") or an already-resolved Graph folder id.
# Resolve it to a real id first; otherwise Graph rejects the URL with
# 400 ErrorInvalidIdMalformed.
folderRef = path.strip("/")
folderId = await self._resolveFolderId(folderRef)
if not folderId:
raise ValueError(
f"Outlook folder not found: '{folderRef}'. Browse the mailbox root "
f"(path '/') or call listMailFolders to obtain a valid folder id."
)
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT)) effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
pageSize = min(self._PAGE_SIZE, effectiveLimit) pageSize = min(self._PAGE_SIZE, effectiveLimit)
endpoint: Optional[str] = ( # Optional date-range filter (e.g. "2026-06" or "2026-06-01 2026-06-30")
f"me/mailFolders/{folderId}/messages" # so only that period is fetched server-side instead of paging the whole
f"?$top={pageSize}&$orderby=receivedDateTime desc" # folder. Falls back to a plain newest-first listing otherwise.
) startDateTime, endDateTime = _parseDateRange(filter)
countParam = "&$count=true"
if startDateTime and endDateTime:
dateFilter = (
f"receivedDateTime ge {_toGraphUtc(startDateTime)} and "
f"receivedDateTime lt {_toGraphUtc(endDateTime)}"
)
endpoint: Optional[str] = (
f"me/mailFolders/{folderId}/messages"
f"?$top={pageSize}&$orderby=receivedDateTime desc"
f"&$filter={urllib.parse.quote(dateFilter)}{countParam}"
)
else:
endpoint = (
f"me/mailFolders/{folderId}/messages"
f"?$top={pageSize}&$orderby=receivedDateTime desc{countParam}"
)
messages: List[Dict[str, Any]] = [] messages: List[Dict[str, Any]] = []
totalCount: Optional[int] = None
firstPage = True
while endpoint and len(messages) < effectiveLimit: while endpoint and len(messages) < effectiveLimit:
result = await self._graphGet(endpoint) result = await self._graphGet(endpoint)
if "error" in result: if "error" in result:
if firstPage:
err = result.get("error") or {}
raise RuntimeError(
f"Graph error listing messages in folder '{folderRef}': "
f"{err.get('message') or err}"
)
break break
if firstPage and "@odata.count" in result:
totalCount = result["@odata.count"]
firstPage = False
for m in result.get("value", []): for m in result.get("value", []):
messages.append(m) messages.append(m)
if len(messages) >= effectiveLimit: if len(messages) >= effectiveLimit:
break break
nextLink = result.get("@odata.nextLink") nextLink = result.get("@odata.nextLink")
endpoint = _stripGraphBase(nextLink) if nextLink else None endpoint = _stripGraphBase(nextLink) if nextLink else None
return [ entries = [
ExternalEntry( ExternalEntry(
name=m.get("subject", "(no subject)"), name=m.get("subject", "(no subject)"),
path=f"{path}/{m.get('id', '')}", path=f"{path}/{m.get('id', '')}",
@ -382,6 +498,13 @@ class OutlookAdapter(_GraphApiMixin, ServiceAdapter):
) )
for m in messages for m in messages
] ]
if totalCount is not None and totalCount > len(entries):
entries.append(ExternalEntry(
name=f"({totalCount} total messages in folder, {len(entries)} listed)",
path=f"{path}/_count", isFolder=False,
metadata={"totalCount": totalCount, "listed": len(entries)},
))
return entries
async def download(self, path: str) -> DownloadResult: async def download(self, path: str) -> DownloadResult:
"""Download a mail message as RFC 822 EML via Graph API $value endpoint.""" """Download a mail message as RFC 822 EML via Graph API $value endpoint."""
@ -412,14 +535,28 @@ class OutlookAdapter(_GraphApiMixin, ServiceAdapter):
path: Optional[str] = None, path: Optional[str] = None,
limit: Optional[int] = None, limit: Optional[int] = None,
) -> List[ExternalEntry]: ) -> List[ExternalEntry]:
safeQuery = query.replace("'", "''") safeQuery = query.replace('"', '\\"')
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT)) effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
# Scope the search to the attached folder when one is given, so the agent
# gets hits only from e.g. the Inbox instead of the whole mailbox. Resolve
# the folder reference (display name / well-known / id) to a real id first.
folderRef = (path or "").strip("/")
base = "me/messages"
if folderRef:
folderId = await self._resolveFolderId(folderRef)
if not folderId:
raise ValueError(
f"Outlook folder not found: '{folderRef}'. Call listMailFolders "
f"to obtain a valid folder id, or search without a folder scope."
)
base = f"me/mailFolders/{folderId}/messages"
# NOTE: Graph $search does not support $orderby and may return a single # NOTE: Graph $search does not support $orderby and may return a single
# page (no @odata.nextLink). We still pass $top to lift the implicit 25. # page (no @odata.nextLink). We still pass $top to lift the implicit 25.
endpoint = f"me/messages?$search=\"{safeQuery}\"&$top={effectiveLimit}" endpoint = f"{base}?$search=\"{safeQuery}\"&$top={effectiveLimit}"
result = await self._graphGet(endpoint) result = await self._graphGet(endpoint)
if "error" in result: if "error" in result:
return [] err = result.get("error") or {}
raise RuntimeError(f"Graph error searching mail: {err.get('message') or err}")
return [ return [
ExternalEntry( ExternalEntry(
name=m.get("subject", "(no subject)"), name=m.get("subject", "(no subject)"),
@ -774,8 +911,7 @@ class TeamsAdapter(_GraphApiMixin, ServiceAdapter):
if not cleanPath: if not cleanPath:
result = await self._graphGet("me/joinedTeams") result = await self._graphGet("me/joinedTeams")
if "error" in result: if "error" in result:
logger.warning(f"Teams browse failed: {result['error']}") _raiseGraphError(result, "Teams browse")
return []
return [ return [
ExternalEntry( ExternalEntry(
name=t.get("displayName", ""), name=t.get("displayName", ""),
@ -791,7 +927,7 @@ class TeamsAdapter(_GraphApiMixin, ServiceAdapter):
if len(parts) == 1: if len(parts) == 1:
result = await self._graphGet(f"teams/{teamId}/channels") result = await self._graphGet(f"teams/{teamId}/channels")
if "error" in result: if "error" in result:
return [] _raiseGraphError(result, "Teams channels")
return [ return [
ExternalEntry( ExternalEntry(
name=ch.get("displayName", ""), name=ch.get("displayName", ""),
@ -834,18 +970,33 @@ class OneDriveAdapter(_GraphApiMixin, ServiceAdapter):
) -> List[ExternalEntry]: ) -> List[ExternalEntry]:
cleanPath = (path or "").strip("/") cleanPath = (path or "").strip("/")
if not cleanPath: if not cleanPath:
endpoint = "me/drive/root/children" endpoint: Optional[str] = "me/drive/root/children?$top=200"
else: else:
endpoint = f"me/drive/root:/{cleanPath}:/children" endpoint = f"me/drive/root:/{cleanPath}:/children?$top=200"
result = await self._graphGet(endpoint) effectiveLimit = int(limit) if limit is not None else None
if "error" in result: items: List[Dict[str, Any]] = []
return [] hardCap = 5000
entries = [_graphItemToExternalEntry(item, path) for item in result.get("value", [])] while endpoint and len(items) < hardCap:
result = await self._graphGet(endpoint)
if "error" in result:
if not items:
_raiseGraphError(result, "OneDrive browse")
break
for raw in result.get("value", []) or []:
items.append(raw)
if effectiveLimit is not None and len(items) >= effectiveLimit:
break
if effectiveLimit is not None and len(items) >= effectiveLimit:
break
nextLink = result.get("@odata.nextLink")
endpoint = _stripGraphBase(nextLink) if nextLink else None
entries = [_graphItemToExternalEntry(item, path) for item in items]
if filter: if filter:
entries = [e for e in entries if _matchFilter(e, filter)] entries = [e for e in entries if _matchFilter(e, filter)]
if limit is not None: if effectiveLimit is not None:
entries = entries[: max(1, int(limit))] entries = entries[: max(1, effectiveLimit)]
return entries return entries
async def download(self, path: str) -> bytes: async def download(self, path: str) -> bytes:
@ -868,13 +1019,32 @@ class OneDriveAdapter(_GraphApiMixin, ServiceAdapter):
limit: Optional[int] = None, limit: Optional[int] = None,
) -> List[ExternalEntry]: ) -> List[ExternalEntry]:
safeQuery = query.replace("'", "''") safeQuery = query.replace("'", "''")
endpoint = f"me/drive/root/search(q='{safeQuery}')" cleanPath = (path or "").strip("/")
result = await self._graphGet(endpoint) # Scope to the attached folder if given, otherwise search the whole drive.
if "error" in result: if cleanPath:
return [] endpoint: Optional[str] = f"me/drive/root:/{cleanPath}:/search(q='{safeQuery}')?$top=200"
entries = [_graphItemToExternalEntry(item) for item in result.get("value", [])] else:
if limit is not None: endpoint = f"me/drive/root/search(q='{safeQuery}')?$top=200"
entries = entries[: max(1, int(limit))] effectiveLimit = int(limit) if limit is not None else None
items: List[Dict[str, Any]] = []
hardCap = 1000
while endpoint and len(items) < hardCap:
result = await self._graphGet(endpoint)
if "error" in result:
if not items:
_raiseGraphError(result, "OneDrive search")
break
for raw in result.get("value", []) or []:
items.append(raw)
if effectiveLimit is not None and len(items) >= effectiveLimit:
break
if effectiveLimit is not None and len(items) >= effectiveLimit:
break
nextLink = result.get("@odata.nextLink")
endpoint = _stripGraphBase(nextLink) if nextLink else None
entries = [_graphItemToExternalEntry(item) for item in items]
if effectiveLimit is not None:
entries = entries[: max(1, effectiveLimit)]
return entries return entries
@ -908,8 +1078,7 @@ class CalendarAdapter(_GraphApiMixin, ServiceAdapter):
if not cleanPath: if not cleanPath:
result = await self._graphGet("me/calendars?$top=100") result = await self._graphGet("me/calendars?$top=100")
if "error" in result: if "error" in result:
logger.warning(f"MSFT Calendar list failed: {result['error']}") _raiseGraphError(result, "MSFT Calendar list")
return []
calendars = result.get("value", []) calendars = result.get("value", [])
if filter: if filter:
calendars = [c for c in calendars if filter.lower() in (c.get("name") or "").lower()] calendars = [c for c in calendars if filter.lower() in (c.get("name") or "").lower()]
@ -929,7 +1098,15 @@ class CalendarAdapter(_GraphApiMixin, ServiceAdapter):
for c in calendars for c in calendars
] ]
calendarId = cleanPath.split("/", 1)[0] # The path segment may be a calendar display name or an already-resolved
# calendar id; resolve first so a name does not produce a malformed URL.
calendarRef = cleanPath.split("/", 1)[0]
calendarId = await self._resolveCalendarId(calendarRef)
if not calendarId:
raise ValueError(
f"Calendar not found: '{calendarRef}'. Browse the root ('/') to list "
f"calendars and use the returned id."
)
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT)) effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
pageSize = min(self._PAGE_SIZE, effectiveLimit) pageSize = min(self._PAGE_SIZE, effectiveLimit)
@ -952,7 +1129,8 @@ class CalendarAdapter(_GraphApiMixin, ServiceAdapter):
while endpoint and len(events) < effectiveLimit: while endpoint and len(events) < effectiveLimit:
result = await self._graphGet(endpoint) result = await self._graphGet(endpoint)
if "error" in result: if "error" in result:
logger.warning(f"MSFT Calendar events failed: {result['error']}") if not events:
_raiseGraphError(result, "MSFT Calendar events")
break break
for ev in result.get("value", []): for ev in result.get("value", []):
events.append(ev) events.append(ev)
@ -980,32 +1158,34 @@ class CalendarAdapter(_GraphApiMixin, ServiceAdapter):
for ev in events for ev in events
] ]
async def _resolveCalendarId(self, ref: str) -> Optional[str]:
"""Resolve a calendar reference (display name / 'default' / id) to a Graph
calendar id. Returns None if nothing matches."""
if not ref:
return None
r = ref.strip()
# Heuristic: Graph ids are long URL-safe strings without spaces.
if len(r) > 60 and " " not in r:
return r
result = await self._graphGet("me/calendars?$top=100")
if "error" in result:
_raiseGraphError(result, "MSFT Calendar list")
cals = result.get("value", [])
for c in cals:
if c.get("id") == r:
return r
if r.lower() in ("default", "primary", "calendar", "kalender"):
for c in cals:
if c.get("isDefaultCalendar"):
return c.get("id")
for c in cals:
if (c.get("name") or "").strip().lower() == r.lower():
return c.get("id")
return None
@staticmethod @staticmethod
def _parseDateRange(filterStr: Optional[str]) -> tuple: def _parseDateRange(filterStr: Optional[str]) -> tuple:
"""Parse date range from filter string. Supports ISO dates or YYYY-MM patterns.""" return _parseDateRange(filterStr)
import re
from datetime import datetime, timedelta
if not filterStr:
return (None, None)
isoMatch = re.findall(r'\d{4}-\d{2}-\d{2}(?:T[\d:]+)?', filterStr)
if len(isoMatch) >= 2:
return (isoMatch[0], isoMatch[1])
if len(isoMatch) == 1:
try:
dt = datetime.fromisoformat(isoMatch[0])
return (isoMatch[0], (dt + timedelta(days=31)).strftime('%Y-%m-%dT00:00:00'))
except ValueError:
pass
monthMatch = re.match(r'^(\d{4})-(\d{2})$', filterStr.strip())
if monthMatch:
year, month = int(monthMatch.group(1)), int(monthMatch.group(2))
start = f"{year}-{month:02d}-01T00:00:00"
if month == 12:
end = f"{year + 1}-01-01T00:00:00"
else:
end = f"{year}-{month + 1:02d}-01T00:00:00"
return (start, end)
return (None, None)
async def download(self, path: str) -> DownloadResult: async def download(self, path: str) -> DownloadResult:
cleanPath = (path or "").strip("/") cleanPath = (path or "").strip("/")
@ -1050,7 +1230,7 @@ class CalendarAdapter(_GraphApiMixin, ServiceAdapter):
result = await self._graphGet(endpoint) result = await self._graphGet(endpoint)
if "error" in result: if "error" in result:
return [] _raiseGraphError(result, "MSFT Calendar search")
calendarId = (path or "").strip("/").split("/")[0] if path else "search" calendarId = (path or "").strip("/").split("/")[0] if path else "search"
return [ return [
ExternalEntry( ExternalEntry(
@ -1126,7 +1306,15 @@ class ContactsAdapter(_GraphApiMixin, ServiceAdapter):
logger.warning(f"MSFT contactFolders list failed: {result['error']}") logger.warning(f"MSFT contactFolders list failed: {result['error']}")
return folders return folders
folderId = cleanPath.split("/", 1)[0] # The path segment may be a contact-folder display name or an already-
# resolved folder id (or the virtual 'default'); resolve first.
folderRef = cleanPath.split("/", 1)[0]
folderId = await self._resolveContactFolderId(folderRef)
if not folderId:
raise ValueError(
f"Contact folder not found: '{folderRef}'. Browse the root ('/') to "
f"list folders and use the returned id."
)
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT)) effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
pageSize = min(self._PAGE_SIZE, effectiveLimit) pageSize = min(self._PAGE_SIZE, effectiveLimit)
if folderId == self._DEFAULT_FOLDER_ID: if folderId == self._DEFAULT_FOLDER_ID:
@ -1138,7 +1326,8 @@ class ContactsAdapter(_GraphApiMixin, ServiceAdapter):
while endpoint and len(contacts) < effectiveLimit: while endpoint and len(contacts) < effectiveLimit:
result = await self._graphGet(endpoint) result = await self._graphGet(endpoint)
if "error" in result: if "error" in result:
logger.warning(f"MSFT contacts list failed: {result['error']}") if not contacts:
_raiseGraphError(result, "MSFT contacts list")
break break
for c in result.get("value", []): for c in result.get("value", []):
contacts.append(c) contacts.append(c)
@ -1166,6 +1355,28 @@ class ContactsAdapter(_GraphApiMixin, ServiceAdapter):
for c in contacts for c in contacts
] ]
async def _resolveContactFolderId(self, ref: str) -> Optional[str]:
"""Resolve a contact-folder reference (display name / 'default' / id) to a
folder id. Returns None if nothing matches."""
if not ref:
return None
r = ref.strip()
if r == self._DEFAULT_FOLDER_ID or r.lower() in ("kontakte", "contacts", "default"):
return self._DEFAULT_FOLDER_ID
# Heuristic: Graph ids are long URL-safe strings without spaces.
if len(r) > 60 and " " not in r:
return r
result = await self._graphGet("me/contactFolders?$top=100")
if "error" in result:
_raiseGraphError(result, "MSFT contactFolders list")
for f in result.get("value", []):
if f.get("id") == r:
return r
for f in result.get("value", []):
if (f.get("displayName") or "").strip().lower() == r.lower():
return f.get("id")
return None
async def download(self, path: str) -> DownloadResult: async def download(self, path: str) -> DownloadResult:
cleanPath = (path or "").strip("/") cleanPath = (path or "").strip("/")
if "/" not in cleanPath: if "/" not in cleanPath:
@ -1193,19 +1404,27 @@ class ContactsAdapter(_GraphApiMixin, ServiceAdapter):
path: Optional[str] = None, path: Optional[str] = None,
limit: Optional[int] = None, limit: Optional[int] = None,
) -> List[ExternalEntry]: ) -> List[ExternalEntry]:
safeQuery = query.replace("'", "''") safeQuery = query.replace('"', '\\"')
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT)) effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
endpoint = f"me/contacts?$search=\"{safeQuery}\"&$top={effectiveLimit}" endpoint = f"me/contacts?$search=\"{safeQuery}\"&$top={effectiveLimit}"
result = await self._graphGet(endpoint) result = await self._graphGet(endpoint)
if "error" in result: if "error" in result:
return [] _raiseGraphError(result, "MSFT contacts search")
return [ return [
ExternalEntry( ExternalEntry(
name=c.get("displayName") or _personLabel(c) or "(no name)", name=c.get("displayName") or _personLabel(c) or "(no name)",
path=f"/search/{c.get('id', '')}", path=f"/search/{c.get('id', '')}",
isFolder=False, isFolder=False,
mimeType="text/vcard", mimeType="text/vcard",
metadata={"id": c.get("id")}, metadata={
"id": c.get("id"),
"givenName": c.get("givenName"),
"surname": c.get("surname"),
"companyName": c.get("companyName"),
"emailAddresses": [e.get("address") for e in (c.get("emailAddresses") or []) if e.get("address")],
"businessPhones": c.get("businessPhones") or [],
"mobilePhone": c.get("mobilePhone"),
},
) )
for c in result.get("value", []) for c in result.get("value", [])
] ]

View file

@ -111,7 +111,6 @@ class ChatMessage(PowerOnModel):
class WorkflowModeEnum(str, Enum): class WorkflowModeEnum(str, Enum):
WORKFLOW_DYNAMIC = "Dynamic" WORKFLOW_DYNAMIC = "Dynamic"
WORKFLOW_AUTOMATION = "Automation" WORKFLOW_AUTOMATION = "Automation"
WORKFLOW_CHATBOT = "Chatbot"
@i18nModel("Chat-Workflow") @i18nModel("Chat-Workflow")
class ChatWorkflow(PowerOnModel): class ChatWorkflow(PowerOnModel):
@ -169,10 +168,6 @@ class ChatWorkflow(PowerOnModel):
"value": WorkflowModeEnum.WORKFLOW_AUTOMATION.value, "value": WorkflowModeEnum.WORKFLOW_AUTOMATION.value,
"label": "Automation", "label": "Automation",
}, },
{
"value": WorkflowModeEnum.WORKFLOW_CHATBOT.value,
"label": "Chatbot",
},
]}) ]})
maxSteps: int = Field(default=10, description="Maximum number of iterations in dynamic mode", json_schema_extra={"label": "Max. Schritte", "frontend_type": "integer", "frontend_readonly": False, "frontend_required": False}) maxSteps: int = Field(default=10, description="Maximum number of iterations in dynamic mode", json_schema_extra={"label": "Max. Schritte", "frontend_type": "integer", "frontend_readonly": False, "frontend_required": False})
expectedFormats: Optional[List[str]] = Field(None, description="List of expected file format extensions from user request (e.g., ['xlsx', 'pdf']). Extracted during intent analysis.", json_schema_extra={"label": "Erwartete Formate", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) expectedFormats: Optional[List[str]] = Field(None, description="List of expected file format extensions from user request (e.g., ['xlsx', 'pdf']). Extracted during intent analysis.", json_schema_extra={"label": "Erwartete Formate", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False})

View file

@ -77,19 +77,14 @@ class DataSource(PowerOnModel):
description="Timestamp of last successful RAG indexing run", description="Timestamp of last successful RAG indexing run",
json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"}, json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"},
) )
# scope was removed (privacy, 2026-06). Personal sources must not be
# shared across scopes. Only Files (folder-files) retain scope.
# The DB column is kept as deprecated-nullable to avoid a migration;
# it is never read or written by UDB/ingest/knowledge anymore.
scope: Optional[str] = Field( scope: Optional[str] = Field(
default=None, default=None,
description=( description="DEPRECATED (2026-06, privacy). Always None. Use Files scope instead.",
"Data visibility scope with inherit semantics. " json_schema_extra={"frontend_readonly": True, "frontend_hidden": True},
"None = inherit; values: personal, featureInstance, mandate, global. "
"Cascade-reset on parent toggle."
),
json_schema_extra={"label": "Sichtbarkeit", "frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [
{"value": "personal", "label": "Persönlich"},
{"value": "featureInstance", "label": "Feature-Instanz"},
{"value": "mandate", "label": "Mandant"},
{"value": "global", "label": "Global"},
]},
) )
neutralize: Optional[bool] = Field( neutralize: Optional[bool] = Field(
default=None, default=None,

View file

@ -12,9 +12,9 @@ from modules.datamodels.datamodelUtils import TextMultilingual
@i18nModel("Feature") @i18nModel("Feature")
class Feature(PowerOnModel): class Feature(PowerOnModel):
"""Feature-Definition (global, z.B. 'trustee', 'chatbot'). Verfuegbare Funktionalitaeten der Plattform.""" """Feature-Definition (global, z.B. 'trustee', 'commcoach'). Verfuegbare Funktionalitaeten der Plattform."""
code: str = Field( code: str = Field(
description="Unique feature code (Primary Key), z.B. 'trustee', 'chatbot'", description="Unique feature code (Primary Key), z.B. 'trustee', 'commcoach'",
json_schema_extra={"label": "Code", "frontend_type": "text", "frontend_readonly": False, "frontend_required": True} json_schema_extra={"label": "Code", "frontend_type": "text", "frontend_readonly": False, "frontend_required": True}
) )
label: TextMultilingual = Field( label: TextMultilingual = Field(

View file

@ -16,6 +16,9 @@ supportedSectionTypes: List[str] = [
"paragraph", "paragraph",
"code_block", "code_block",
"image", "image",
# Layout primitives (A3): type-specific document layout.
"cover_page", # centered title page (subtitle/author/date/logo), ends with page break
"image_grid", # N-column arrangement of images (marketing-style layouts)
] ]
class InlineRun(TypedDict, total=False): class InlineRun(TypedDict, total=False):

View file

@ -1,15 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Chatbot feature - LangGraph-based chatbot implementation.
Lazy-loaded to avoid importing langgraph/langchain at boot time.
"""
async def chatProcess(*args, **kwargs):
"""Lazy wrapper - imports the real chatProcess on first call to defer langgraph loading."""
from .service import chatProcess as _chatProcess
return await _chatProcess(*args, **kwargs)
__all__ = ['chatProcess']

View file

@ -1,3 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Bridges to external systems (AI models, database, tools)."""

View file

@ -1,727 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
AI Center to LangChain bridge.
Implements LangChain BaseChatModel interface using AI center models.
"""
import logging
import asyncio
import time
from typing import Any, AsyncIterator, Callable, Dict, List, Optional
from datetime import datetime
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import (
BaseMessage,
HumanMessage,
SystemMessage,
AIMessage,
ToolMessage,
convert_to_openai_messages,
)
from langchain_core.outputs import ChatGeneration, ChatResult
from langchain_core.runnables import RunnableConfig
from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.aicore.aicoreModelSelector import modelSelector
from modules.datamodels.datamodelAi import (
AiModel,
AiModelCall,
AiModelResponse,
AiCallResponse,
AiCallOptions,
OperationTypeEnum,
ProcessingModeEnum,
)
from modules.datamodels.datamodelUam import User
logger = logging.getLogger(__name__)
# Workflow-level store for allowed_providers and RBAC context (survives LangGraph/bind_tools
# execution context where instance attributes may be lost when model is wrapped or serialized)
_workflow_allowed_providers: Dict[str, List[str]] = {}
_workflow_rbac_context: Dict[str, tuple] = {} # workflow_id -> (mandateId, featureInstanceId)
def clear_workflow_allowed_providers(workflow_id: str) -> None:
"""Remove workflow from registry when stream completes to avoid memory growth."""
_workflow_allowed_providers.pop(workflow_id, None)
class AICenterChatModel(BaseChatModel):
"""
LangChain-compatible chat model that uses AI center models.
Bridges AI center model selection and calling to LangChain's BaseChatModel interface.
"""
def __init__(
self,
user: User,
operation_type: OperationTypeEnum = OperationTypeEnum.DATA_ANALYSE,
processing_mode: ProcessingModeEnum = ProcessingModeEnum.DETAILED,
billing_callback: Optional[Callable[[AiCallResponse], None]] = None,
workflow_id: Optional[str] = None,
allowed_providers: Optional[List[str]] = None,
prefer_fast_model: bool = False,
mandate_id: Optional[str] = None,
feature_instance_id: Optional[str] = None,
**kwargs
):
"""
Initialize the AI center chat model bridge.
Args:
user: Current user for RBAC and model selection
operation_type: Operation type for model selection
processing_mode: Processing mode for model selection
billing_callback: Optional callback invoked after each _agenerate with AiCallResponse for billing
workflow_id: Optional workflow/conversation ID for billing context
allowed_providers: Optional list of allowed provider connector types (empty/None = all)
prefer_fast_model: When True, strongly prefer faster models (e.g. gpt-4o-mini for planner)
**kwargs: Additional arguments passed to BaseChatModel
"""
super().__init__(**kwargs)
# Use object.__setattr__ to bypass Pydantic validation for custom attributes
object.__setattr__(self, "user", user)
object.__setattr__(self, "operation_type", operation_type)
object.__setattr__(self, "processing_mode", processing_mode)
object.__setattr__(self, "_selected_model", None)
object.__setattr__(self, "_billing_callback", billing_callback)
object.__setattr__(self, "_workflow_id", workflow_id)
object.__setattr__(self, "_allowed_providers", allowed_providers or [])
object.__setattr__(self, "_prefer_fast_model", prefer_fast_model)
object.__setattr__(self, "_mandate_id", mandate_id)
object.__setattr__(self, "_feature_instance_id", feature_instance_id)
# Store in workflow-level registry so it survives when instance attrs are lost (e.g. bind_tools)
if workflow_id and allowed_providers:
_workflow_allowed_providers[workflow_id] = list(allowed_providers)
if workflow_id and (mandate_id is not None or feature_instance_id is not None):
_workflow_rbac_context[workflow_id] = (mandate_id, feature_instance_id)
@property
def _llm_type(self) -> str:
"""Return type of LLM."""
return "aicenter"
def _select_model(self, messages: List[BaseMessage]) -> AiModel:
"""
Select the best AI center model for the given messages.
Uses caching to avoid repeated model selection within same session.
Args:
messages: List of LangChain messages
Returns:
Selected AI model
"""
# Return cached model if already selected (significant performance improvement)
if self._selected_model is not None:
return self._selected_model
# Convert messages to prompt/context format for model selector
prompt_parts = []
context_parts = []
for msg in messages:
if isinstance(msg, SystemMessage):
prompt_parts.append(msg.content)
elif isinstance(msg, HumanMessage):
prompt_parts.append(msg.content)
elif isinstance(msg, AIMessage):
context_parts.append(msg.content)
elif isinstance(msg, ToolMessage):
context_parts.append(f"Tool {msg.name}: {msg.content}")
prompt = "\n".join(prompt_parts)
context = "\n".join(context_parts) if context_parts else ""
# Get available models with RBAC filtering
# Use cached/singleton interfaces for better performance
from modules.interfaces.interfaceDbApp import getRootInterface
workflow_id = getattr(self, "_workflow_id", None)
rootInterface = getRootInterface()
rbac_instance = rootInterface.rbac
mandate_id = getattr(self, "_mandate_id", None)
feature_instance_id = getattr(self, "_feature_instance_id", None)
if workflow_id and (mandate_id is None and feature_instance_id is None):
ctx = _workflow_rbac_context.get(workflow_id)
if ctx:
mandate_id, feature_instance_id = ctx
available_models = modelRegistry.getAvailableModels(
currentUser=self.user,
rbacInstance=rbac_instance,
mandateId=mandate_id,
featureInstanceId=feature_instance_id,
)
# Allowed providers: instance attr or workflow store (lost in LangGraph/bind_tools context)
allowed = (
(_workflow_allowed_providers.get(workflow_id) if workflow_id else None)
or getattr(self, '_allowed_providers', None)
or []
)
if allowed:
logger.info(f"AICenterChatModel _select_model: applying allowedProviders={allowed}")
filtered = [m for m in available_models if m.connectorType in allowed]
if filtered:
available_models = filtered
else:
logger.warning(f"No models match allowedProviders {allowed}, using all RBAC-permitted models")
options = AiCallOptions(
operationType=self.operation_type,
processingMode=self.processing_mode,
allowedProviders=allowed if allowed else None,
preferFastModel=getattr(self, "_prefer_fast_model", False),
)
# Select model
selected_model = modelSelector.selectModel(
prompt=prompt,
context=context,
options=options,
availableModels=available_models
)
if not selected_model:
raise ValueError(f"No suitable model found for operation type {self.operation_type.value}")
logger.info(f"Selected AI center model: {selected_model.displayName} ({selected_model.name})")
object.__setattr__(self, "_selected_model", selected_model)
return selected_model
def _convert_messages_to_ai_format(self, messages: List[BaseMessage]) -> List[Dict[str, Any]]:
"""
Convert LangChain messages to AI center format (OpenAI-style).
Args:
messages: List of LangChain messages
Returns:
List of messages in OpenAI format
"""
# Use LangChain's built-in conversion
openai_messages = convert_to_openai_messages(messages)
return openai_messages
def _convert_ai_response_to_langchain(
self,
response: AiModelResponse,
tool_calls: Optional[List[Dict[str, Any]]] = None
) -> AIMessage:
"""
Convert AI center response to LangChain AIMessage.
Args:
response: AI center response
tool_calls: Optional tool calls from the response (format: [{"id": "...", "name": "...", "args": {...}}])
Returns:
LangChain AIMessage with tool_calls if present
"""
# LangChain expects tool_calls in format: [{"id": "...", "name": "...", "args": {...}}]
# The tool_calls parameter should already be in this format
kwargs = {}
if tool_calls:
kwargs["tool_calls"] = tool_calls
return AIMessage(content=response.content or "", **kwargs)
def _generate(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[Any] = None,
**kwargs: Any,
) -> ChatResult:
"""
Synchronous generate method required by BaseChatModel.
Wraps the async _agenerate method.
Args:
messages: List of LangChain messages
stop: Optional stop sequences
run_manager: Optional callback manager
**kwargs: Additional arguments
Returns:
ChatResult with generations
"""
# Try to get the current event loop
try:
loop = asyncio.get_event_loop()
if loop.is_running():
# If we're in an async context, raise an error
raise RuntimeError(
"AICenterChatModel._generate() called from async context. "
"Use _agenerate() instead."
)
except RuntimeError:
# No event loop, we can create one
pass
# Run the async method synchronously
return asyncio.run(self._agenerate(messages, stop=stop, run_manager=run_manager, **kwargs))
async def _call_openai_streaming(
self,
ai_messages: List[dict],
run_manager: Optional[Any],
model_call: "AiModelCall",
input_bytes: int,
start_time: float,
) -> "AiModelResponse":
"""Call OpenAI/Ollama with stream=True, emit tokens via run_manager, return full response."""
import httpx
import json as _json
from modules.shared.configuration import APP_CONFIG
if self._selected_model.connectorType == "openai":
api_url = getattr(self._selected_model, "apiUrl", None) or "https://api.openai.com/v1/chat/completions"
api_key = APP_CONFIG.get("Connector_AiOpenai_API_SECRET")
if not api_key:
raise ValueError("OpenAI API key not configured")
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
ollama_model = self._selected_model.name
else:
base_url = getattr(self._selected_model, "apiUrl", "").replace("/api/analyze", "")
api_url = f"{base_url.rstrip('/')}/v1/chat/completions"
api_key = APP_CONFIG.get("Connector_AiPrivateLlm_API_SECRET")
headers = {"Content-Type": "application/json"}
if api_key:
headers["X-API-Key"] = api_key
ollama_model = getattr(self._selected_model, "version", None) or self._selected_model.name
payload = {
"model": ollama_model,
"messages": ai_messages,
"temperature": self._selected_model.temperature,
"max_tokens": self._selected_model.maxTokens,
"stream": True,
}
content_parts: List[str] = []
async with httpx.AsyncClient(timeout=600.0) as client:
async with client.stream("POST", api_url, headers=headers, json=payload) as resp:
if resp.status_code != 200:
raise ValueError(f"OpenAI stream error: {resp.status_code} - {await resp.aread()}")
buffer = ""
async for chunk in resp.aiter_text():
buffer += chunk
while "\n" in buffer or "\r\n" in buffer:
line, _, buffer = buffer.partition("\n")
line = line.strip()
if line.startswith("data: "):
data_str = line[6:].strip()
if data_str == "[DONE]":
break
try:
data = _json.loads(data_str)
choices = data.get("choices") or []
if choices:
delta = choices[0].get("delta") or {}
token = delta.get("content") or ""
if token and run_manager and hasattr(run_manager, "on_llm_new_token"):
run_manager.on_llm_new_token(token)
content_parts.append(token)
except _json.JSONDecodeError:
pass
content = "".join(content_parts)
processing_time = time.time() - start_time
output_bytes = len(content.encode("utf-8"))
price_chf = 0.0
if getattr(self._selected_model, "calculatepriceCHF", None):
try:
price_chf = self._selected_model.calculatepriceCHF(processing_time, input_bytes, output_bytes)
except Exception:
pass
billing_callback = getattr(self, "_billing_callback", None)
if billing_callback:
try:
billing_callback(AiCallResponse(
content=content,
modelName=self._selected_model.name,
provider=self._selected_model.connectorType or "unknown",
priceCHF=price_chf,
processingTime=processing_time,
bytesSent=input_bytes,
bytesReceived=output_bytes,
errorCount=0,
))
except Exception as e:
logger.error(f"Billing callback error: {e}")
return AiModelResponse(content=content, success=True, modelId=self._selected_model.name, metadata={})
async def _agenerate(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[Any] = None,
**kwargs: Any,
) -> ChatResult:
"""
Async generate method required by BaseChatModel.
Args:
messages: List of LangChain messages
stop: Optional stop sequences
run_manager: Optional callback manager
**kwargs: Additional arguments (may include tools for tool calling)
Returns:
ChatResult with generations
"""
# Select model if not already selected
if not self._selected_model:
self._select_model(messages)
# Check if tools are bound (for tool calling)
tools = getattr(self, "_bound_tools", None)
# Convert messages to AI center format
ai_messages = self._convert_messages_to_ai_format(messages)
# Compute input bytes for billing (sum of message content lengths)
input_bytes = sum(
len((m.get("content") or "").encode("utf-8"))
for m in ai_messages
if isinstance(m.get("content"), str)
)
start_time = time.time()
# If tools are bound, add tool definitions to the system message
# This ensures the model knows about available tools
# Some models need explicit tool definitions to enable tool calling
if tools:
# Find or create system message
system_message_idx = None
for i, msg in enumerate(ai_messages):
if msg.get("role") == "system":
system_message_idx = i
break
# Build tool descriptions for the system message
tool_descriptions = []
for tool in tools:
if hasattr(tool, "name") and hasattr(tool, "description"):
# Get tool parameters for better description
args_schema = getattr(tool, "args_schema", None)
params_info = ""
if args_schema:
try:
if hasattr(args_schema, "model_json_schema"):
schema = args_schema.model_json_schema()
if "properties" in schema:
params = list(schema["properties"].keys())
params_info = f" (Parameter: {', '.join(params)})"
except:
pass
tool_descriptions.append(f"- {tool.name}: {tool.description}{params_info}")
if tool_descriptions:
tools_text = "\n".join(tool_descriptions)
tools_note = f"\n\n⚠️⚠️⚠️ KRITISCH - TOOL-NUTZUNG ⚠️⚠️⚠️\n\nVERFÜGBARE TOOLS:\n{tools_text}\n\nABSOLUT VERBINDLICH:\n- Du MUSST diese Tools verwenden, um Anfragen zu bearbeiten!\n- Für Status-Updates MUSST du IMMER das Tool 'send_streaming_message' verwenden!\n- VERBOTEN: Normale Text-Nachrichten für Status-Updates!\n- Du MUSST Tools aufrufen, nicht nur darüber sprechen!\n\nBeispiel FALSCH: \"Ich werde die Datenbank durchsuchen...\"\nBeispiel RICHTIG: Rufe das Tool 'send_streaming_message' mit \"Durchsuche Datenbank...\" auf!"
if system_message_idx is not None:
# Append to existing system message
ai_messages[system_message_idx]["content"] += tools_note
else:
# Add new system message at the beginning
ai_messages.insert(0, {
"role": "system",
"content": tools_note.strip()
})
# Convert LangChain tools to OpenAI/function-calling format (used by OpenAI and Ollama-compatible APIs)
openai_tools = None
if tools and self._selected_model.connectorType in ("openai", "privatellm"):
# Build tool schema in OpenAI format (Ollama uses same format for tool calling)
openai_tools = []
for tool in tools:
if hasattr(tool, "name") and hasattr(tool, "description"):
# Get tool parameters schema
args_schema = getattr(tool, "args_schema", None)
parameters = {}
if args_schema:
# Check if it's a Pydantic model class or instance
from pydantic import BaseModel
# Check if it's a class (not an instance)
if isinstance(args_schema, type) and issubclass(args_schema, BaseModel):
# It's a Pydantic model class - get JSON schema
if hasattr(args_schema, "model_json_schema"):
# Pydantic v2
parameters = args_schema.model_json_schema()
elif hasattr(args_schema, "schema"):
# Pydantic v1
parameters = args_schema.schema()
elif isinstance(args_schema, BaseModel):
# It's a Pydantic model instance
if hasattr(args_schema, "model_dump"):
parameters = args_schema.model_dump()
elif hasattr(args_schema, "schema"):
# Has schema method (might be a class)
try:
parameters = args_schema.schema()
except TypeError:
# If schema() requires instance, try model_json_schema
if hasattr(args_schema, "model_json_schema"):
parameters = args_schema.model_json_schema()
else:
parameters = {}
elif isinstance(args_schema, dict):
# Already a dict
parameters = args_schema
tool_schema = {
"type": "function",
"function": {
"name": tool.name,
"description": tool.description or "",
"parameters": parameters
}
}
openai_tools.append(tool_schema)
# Store tools for potential use by connector
# Note: The connector may need to access tools from the model_call
# This is a workaround since AiModelCall doesn't have a tools field
# Tools are added to system message above to ensure model knows about them
# Create model call
model_call = AiModelCall(
messages=ai_messages,
model=self._selected_model,
options=AiCallOptions(
operationType=self.operation_type,
processingMode=self.processing_mode,
temperature=self._selected_model.temperature
)
)
# If tools are bound, use OpenAI-compatible API (OpenAI or Private-LLM Ollama endpoint)
if openai_tools and self._selected_model.connectorType in ("openai", "privatellm"):
import httpx
import json as _json
from modules.shared.configuration import APP_CONFIG
if self._selected_model.connectorType == "openai":
api_url = self._selected_model.apiUrl
api_key = APP_CONFIG.get("Connector_AiOpenai_API_SECRET")
if not api_key:
raise ValueError("OpenAI API key not configured")
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
ollama_model = self._selected_model.name
else:
# privatellm: use Ollama OpenAI-compatible /v1/chat/completions (same service, same provider)
base_url = self._selected_model.apiUrl.replace("/api/analyze", "")
api_url = f"{base_url}/v1/chat/completions"
api_key = APP_CONFIG.get("Connector_AiPrivateLlm_API_SECRET")
headers = {"Content-Type": "application/json"}
if api_key:
headers["X-API-Key"] = api_key
# Ollama needs the underlying model name (e.g. qwen2.5:7b), not poweron-text-general
ollama_model = getattr(self._selected_model, "version", None) or self._selected_model.name
payload = {
"model": ollama_model,
"messages": ai_messages,
"tools": openai_tools,
"tool_choice": "auto",
"temperature": self._selected_model.temperature,
"max_tokens": self._selected_model.maxTokens,
}
use_connector_fallback = False
async with httpx.AsyncClient(timeout=600.0) as client:
response_obj = await client.post(api_url, headers=headers, json=payload)
if response_obj.status_code == 404 and self._selected_model.connectorType == "privatellm":
logger.warning(
"Private-LLM /v1/chat/completions not found (404). Falling back to /api/analyze. "
"Tool calling will not work until the service exposes an OpenAI-compatible endpoint."
)
use_connector_fallback = True
elif response_obj.status_code != 200:
error_msg = f"AI API error ({self._selected_model.connectorType}): {response_obj.status_code} - {response_obj.text}"
logger.error(error_msg)
raise ValueError(error_msg)
if use_connector_fallback:
if not self._selected_model.functionCall:
raise ValueError(f"Model {self._selected_model.displayName} has no functionCall defined")
response = await self._selected_model.functionCall(model_call)
else:
response_json = response_obj.json()
choice = response_json["choices"][0]
message = choice["message"]
content = message.get("content", "")
tool_calls_raw = message.get("tool_calls")
tool_calls = None
if tool_calls_raw:
tool_calls = []
for tc in tool_calls_raw:
func_data = tc.get("function", {})
func_name = func_data.get("name")
func_args_str = func_data.get("arguments", "{}")
try:
func_args = _json.loads(func_args_str) if isinstance(func_args_str, str) else func_args_str
except Exception:
func_args = {}
tool_calls.append({
"id": tc.get("id"),
"name": func_name,
"args": func_args,
})
response = AiModelResponse(
content=content or "",
success=True,
modelId=self._selected_model.name,
metadata={
"response_id": response_json.get("id", ""),
"tool_calls": tool_calls,
},
)
elif not tools and self._selected_model.connectorType in ("openai", "privatellm"):
# Streaming path for OpenAI/Ollama without tools (ChatGPT-like token streaming)
response = await self._call_openai_streaming(
ai_messages, run_manager, model_call, input_bytes, start_time
)
else:
# No tools or not OpenAI - use connector normally
if not self._selected_model.functionCall:
raise ValueError(f"Model {self._selected_model.displayName} has no functionCall defined")
response: AiModelResponse = await self._selected_model.functionCall(model_call)
if not response.success:
raise ValueError(f"AI model call failed: {response.error or 'Unknown error'}")
# Billing: compute price and invoke callback
output_bytes = len((response.content or "").encode("utf-8"))
processing_time = time.time() - start_time
price_chf = 0.0
if getattr(self._selected_model, "calculatepriceCHF", None):
try:
price_chf = self._selected_model.calculatepriceCHF(
processing_time, input_bytes, output_bytes
)
except Exception as e:
logger.warning(f"Billing: price calculation failed: {e}")
billing_callback = getattr(self, "_billing_callback", None)
if billing_callback:
try:
ai_response = AiCallResponse(
content=response.content or "",
modelName=self._selected_model.name,
provider=getattr(self._selected_model, "connectorType", "unknown") or "unknown",
priceCHF=price_chf,
processingTime=processing_time,
bytesSent=input_bytes,
bytesReceived=output_bytes,
errorCount=0,
)
billing_callback(ai_response)
except Exception as e:
logger.error(f"Billing callback error: {e}")
# Extract tool calls from response metadata if present
tool_calls = None
if response.metadata:
# Check for tool calls in metadata (format may vary by connector)
tool_calls = response.metadata.get("tool_calls") or response.metadata.get("function_calls")
# Convert response to LangChain format with tool calls
ai_message = self._convert_ai_response_to_langchain(response, tool_calls=tool_calls)
# Create generation and result
generation = ChatGeneration(message=ai_message)
return ChatResult(generations=[generation])
def bind_tools(self, tools: List[Any], **kwargs: Any) -> "AICenterChatModel":
"""
Bind tools to the model (required for LangGraph tool calling).
Args:
tools: List of LangChain tools
**kwargs: Additional arguments
Returns:
New instance with tools bound
"""
# Create a new instance with tools bound
# Note: The actual tool binding happens in LangGraph's ToolNode
# This method is called by LangGraph to prepare the model
bound_model = AICenterChatModel(
user=self.user,
operation_type=self.operation_type,
processing_mode=self.processing_mode,
billing_callback=getattr(self, "_billing_callback", None),
workflow_id=getattr(self, "_workflow_id", None),
)
object.__setattr__(bound_model, "_selected_model", self._selected_model)
# Store tools for potential use in message conversion
object.__setattr__(bound_model, "_bound_tools", tools)
return bound_model
def invoke(
self,
input: List[BaseMessage],
config: Optional[RunnableConfig] = None,
**kwargs: Any,
) -> BaseMessage:
"""
Synchronous invoke method (required by BaseChatModel).
Note: This is a wrapper around async _agenerate.
Args:
input: List of LangChain messages
config: Optional runnable config
**kwargs: Additional arguments
Returns:
AIMessage response
"""
import asyncio
# Try to get existing event loop
try:
loop = asyncio.get_event_loop()
if loop.is_running():
# If loop is running, we need to use a different approach
# This shouldn't happen in LangGraph context, but handle it gracefully
raise RuntimeError("Cannot use synchronous invoke in async context. Use ainvoke instead.")
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
# Run async generation
result = loop.run_until_complete(self._agenerate(input, **kwargs))
return result.generations[0].message
async def ainvoke(
self,
input: List[BaseMessage],
config: Optional[RunnableConfig] = None,
**kwargs: Any,
) -> BaseMessage:
"""
Async invoke method (required by BaseChatModel).
Args:
input: List of LangChain messages
config: Optional runnable config
**kwargs: Additional arguments
Returns:
AIMessage response
"""
result = await self._agenerate(input, **kwargs)
return result.generations[0].message

View file

@ -1,576 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Custom LangGraph checkpointer using existing database interface.
Maps LangGraph state to existing message storage format.
"""
import contextvars
import logging
import uuid
from typing import Any, Dict, List, Optional, Tuple, NamedTuple
from datetime import datetime
from langgraph.checkpoint.base import BaseCheckpointSaver, Checkpoint, CheckpointMetadata
# CheckpointTuple might not be directly importable, so we define it as a NamedTuple
# Based on LangGraph's usage, it needs config, checkpoint, metadata, parent_config, and pending_writes
class CheckpointTuple(NamedTuple):
"""Tuple containing config, checkpoint, metadata, parent_config, and pending_writes."""
config: Dict[str, Any]
checkpoint: Checkpoint
metadata: CheckpointMetadata
parent_config: Optional[Dict[str, Any]] = None
pending_writes: Optional[List[Tuple[str, Any]]] = None
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage
from modules.features.chatbot.interfaceFeatureChatbot import getInterface as getChatbotInterface
from modules.features.chatbot.interfaceFeatureChatbot import ChatbotMessage
from modules.datamodels.datamodelUam import User
from modules.shared.timeUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
def _sanitize_llm_response(text: str) -> str:
"""Strip chat template tokens and trailing junk that some models leak."""
if not text or not isinstance(text, str):
return text or ""
for sentinel in ("<|im_start|>", "<|im_end|>", "<|endoftext|>", "<|user|>", "<|assistant|>"):
if sentinel in text:
text = text.split(sentinel)[0]
return text.strip()
class DatabaseCheckpointer(BaseCheckpointSaver):
"""
Custom LangGraph checkpointer that uses the chatbot's own database interface.
Maps LangGraph thread_id to conversation.id; stores messages via interface (workflowId maps to conversationId).
"""
def __init__(
self,
user: User,
workflow_id: str,
mandateId: Optional[str] = None,
featureInstanceId: Optional[str] = None,
*,
interface=None,
):
"""
Initialize the database checkpointer.
Args:
user: Current user for database access
workflow_id: Workflow ID (maps to LangGraph thread_id)
mandateId: Mandate ID for proper data isolation
featureInstanceId: Feature instance ID for proper data isolation
interface: Optional pre-created chatbot interface (avoids extra getInterface + DB init)
"""
self.user = user
self.workflow_id = workflow_id
self.interface = interface if interface is not None else getChatbotInterface(
user, mandateId=mandateId, featureInstanceId=featureInstanceId
)
def _convert_langchain_to_db_message(
self,
msg: BaseMessage,
sequence_nr: int,
round_number: int
) -> Dict[str, Any]:
"""
Convert LangChain message to database message format.
Args:
msg: LangChain message
sequence_nr: Sequence number for ordering
round_number: Round number in workflow
Returns:
Dictionary in database message format
"""
import uuid
role = "user"
content = ""
if isinstance(msg, HumanMessage):
role = "user"
content = msg.content if isinstance(msg.content, str) else str(msg.content)
elif isinstance(msg, AIMessage):
role = "assistant"
content = msg.content if isinstance(msg.content, str) else str(msg.content)
elif isinstance(msg, SystemMessage):
# System messages are stored but marked as system
role = "system"
content = msg.content if isinstance(msg.content, str) else str(msg.content)
elif isinstance(msg, ToolMessage):
# Tool messages are stored as assistant messages with tool info
role = "assistant"
content = f"Tool {msg.name}: {msg.content}"
return {
"id": f"msg_{uuid.uuid4()}",
"workflowId": self.workflow_id,
"message": content,
"role": role,
"status": "step" if sequence_nr > 1 else "first",
"sequenceNr": sequence_nr,
"publishedAt": getUtcTimestamp(),
"roundNumber": round_number,
"taskNumber": 0,
"actionNumber": 0
}
def _convert_db_to_langchain_messages(
self,
messages: List[ChatbotMessage]
) -> List[BaseMessage]:
"""
Convert database messages to LangChain messages.
Args:
messages: List of database ChatMessage objects
Returns:
List of LangChain BaseMessage objects
"""
langchain_messages = []
for msg in messages:
if msg.role == "user":
langchain_messages.append(HumanMessage(content=msg.message))
elif msg.role == "assistant":
langchain_messages.append(AIMessage(content=msg.message))
elif msg.role == "system":
langchain_messages.append(SystemMessage(content=msg.message))
# Skip other roles for now
return langchain_messages
def put(
self,
config: Dict[str, Any],
checkpoint: Checkpoint,
metadata: CheckpointMetadata,
new_versions: Dict[str, int],
) -> None:
"""
Store a checkpoint in the database.
Args:
config: LangGraph config (contains thread_id)
checkpoint: Checkpoint to store
metadata: Checkpoint metadata
new_versions: New version numbers
"""
try:
# Extract thread_id from config (maps to workflow_id)
thread_id = config.get("configurable", {}).get("thread_id", self.workflow_id)
# Get current workflow to determine round number
workflow = self.interface.getWorkflow(thread_id)
if not workflow:
logger.warning(f"Workflow {thread_id} not found, cannot store checkpoint")
return
round_number = workflow.currentRound if workflow else 1
# Extract messages from checkpoint
state = checkpoint.get("channel_values", {})
messages = state.get("messages", [])
if not messages:
logger.debug(f"No messages in checkpoint for workflow {thread_id}")
return
# Get existing messages to determine what's already stored
existing_messages = self.interface.getMessages(thread_id)
existing_count = len(existing_messages) if existing_messages else 0
# Create a set of existing message content+role for quick lookup
existing_content_set = set()
if existing_messages:
for existing_msg in existing_messages:
# Create a unique key from role and message content
content_key = (existing_msg.role, existing_msg.message)
existing_content_set.add(content_key)
# Filter checkpoint messages to only user/assistant (skip system)
# Skip intermediate AIMessages with tool_calls (these are tool call requests, not final answers)
checkpoint_user_assistant_messages = []
for msg in messages:
if isinstance(msg, HumanMessage):
# Always store user messages
checkpoint_user_assistant_messages.append(msg)
elif isinstance(msg, AIMessage):
# Check if this message has tool_calls
tool_calls = getattr(msg, "tool_calls", None)
if tool_calls and len(tool_calls) > 0:
logger.debug(f"Skipping intermediate AIMessage with tool_calls for workflow {thread_id}")
continue
# Skip agent_sql_plan output (raw SQL block) - only store agent_formulate final answer
content = msg.content if isinstance(msg.content, str) else str(msg.content)
cu = (content or "").strip().upper()
if content and (
content.strip().startswith("```")
or (cu.startswith("SELECT") and ("FROM" in cu or "JOIN" in cu))
):
logger.debug(f"Skipping intermediate SQL AIMessage for workflow {thread_id}")
continue
checkpoint_user_assistant_messages.append(msg)
# Only store new messages that aren't already in the database
new_messages_to_store = []
for msg in checkpoint_user_assistant_messages:
role = "user" if isinstance(msg, HumanMessage) else "assistant"
content = msg.content if isinstance(msg.content, str) else str(msg.content)
if isinstance(msg, AIMessage):
content = _sanitize_llm_response(content)
if not content or not content.strip():
continue
content_key = (role, content)
if content_key not in existing_content_set:
if isinstance(msg, AIMessage) and msg.content != content:
msg = AIMessage(content=content)
new_messages_to_store.append(msg)
existing_content_set.add(content_key)
# Store only the new messages
if new_messages_to_store:
for i, msg in enumerate(new_messages_to_store, 1):
sequence_nr = existing_count + i
# Convert to database format
db_message_data = self._convert_langchain_to_db_message(
msg,
sequence_nr,
round_number
)
# Store the message
try:
self.interface.createMessage(db_message_data)
logger.debug(f"Stored message {db_message_data['id']} for workflow {thread_id}")
existing_count += 1 # Update count for next message
except Exception as e:
logger.error(f"Error storing message: {e}", exc_info=True)
else:
logger.debug(f"No new messages to store for workflow {thread_id} (existing: {existing_count}, checkpoint: {len(checkpoint_user_assistant_messages)})")
# Update workflow last activity
self.interface.updateWorkflow(thread_id, {
"lastActivity": getUtcTimestamp()
})
except Exception as e:
logger.error(f"Error storing checkpoint: {e}", exc_info=True)
raise
def get(
self,
config: Dict[str, Any],
) -> Optional[Checkpoint]:
"""
Retrieve a checkpoint from the database.
Args:
config: LangGraph config (contains thread_id)
Returns:
Checkpoint if found, None otherwise
"""
try:
# Extract thread_id from config (maps to workflow_id)
thread_id = config.get("configurable", {}).get("thread_id", self.workflow_id)
# Get workflow
workflow = self.interface.getWorkflow(thread_id)
if not workflow:
logger.debug(f"Workflow {thread_id} not found")
return None
# Get messages
messages = self.interface.getMessages(thread_id)
checkpoint_id = str(uuid.uuid4())
if not messages:
# Return empty checkpoint for new workflow
return {
"id": checkpoint_id,
"v": 1,
"ts": getUtcTimestamp(),
"channel_values": {
"messages": []
},
"channel_versions": {},
"versions_seen": {}
}
# Convert to LangChain messages
langchain_messages = self._convert_db_to_langchain_messages(messages)
# Build checkpoint
checkpoint = {
"id": checkpoint_id,
"v": 1,
"ts": getUtcTimestamp(),
"channel_values": {
"messages": langchain_messages
},
"channel_versions": {},
"versions_seen": {}
}
return checkpoint
except Exception as e:
logger.error(f"Error retrieving checkpoint: {e}", exc_info=True)
return None
def list(
self,
config: Dict[str, Any],
filter: Optional[Dict[str, Any]] = None,
before: Optional[str] = None,
limit: Optional[int] = None,
) -> List[Checkpoint]:
"""
List checkpoints (not fully implemented - returns current checkpoint).
Args:
config: LangGraph config
filter: Optional filter
before: Optional timestamp before which to list
limit: Optional limit on number of results
Returns:
List of checkpoints
"""
checkpoint = self.get(config)
if checkpoint:
return [checkpoint]
return []
def put_writes(
self,
config: Dict[str, Any],
writes: List[Tuple[str, Any]],
task_id: str,
) -> None:
"""
Store checkpoint writes (not used in current implementation).
Args:
config: LangGraph config
writes: List of write operations
task_id: Task ID
"""
# Not implemented - using put() instead
pass
async def aget_tuple(
self,
config: Dict[str, Any],
) -> Optional[CheckpointTuple]:
"""
Async version of get that returns tuple of (config, checkpoint, metadata).
Args:
config: LangGraph config (contains thread_id)
Returns:
CheckpointTuple with config, checkpoint and metadata if found, None otherwise
"""
checkpoint = self.get(config)
if checkpoint:
# Return checkpoint with metadata including step
# CheckpointMetadata is typically a TypedDict
# LangGraph expects 'step' in metadata
metadata: CheckpointMetadata = {
"step": 0 # Start at step 0, LangGraph will increment
}
return CheckpointTuple(
config=config,
checkpoint=checkpoint,
metadata=metadata,
parent_config=None, # No parent checkpoint for our implementation
pending_writes=None # No pending writes in our implementation
)
return None
async def aput(
self,
config: Dict[str, Any],
checkpoint: Checkpoint,
metadata: CheckpointMetadata,
new_versions: Dict[str, int],
) -> None:
"""
Async version of put.
Args:
config: LangGraph config (contains thread_id)
checkpoint: Checkpoint to store
metadata: Checkpoint metadata
new_versions: New version numbers
"""
self.put(config, checkpoint, metadata, new_versions)
async def alist(
self,
config: Dict[str, Any],
filter: Optional[Dict[str, Any]] = None,
before: Optional[str] = None,
limit: Optional[int] = None,
) -> List[Checkpoint]:
"""
Async version of list.
Args:
config: LangGraph config
filter: Optional filter
before: Optional timestamp before which to list
limit: Optional limit on number of results
Returns:
List of checkpoints
"""
return self.list(config, filter, before, limit)
async def aput_writes(
self,
config: Dict[str, Any],
writes: List[Tuple[str, Any]],
task_id: str,
) -> None:
"""
Async version of put_writes.
Store checkpoint writes (not used in current implementation).
Args:
config: LangGraph config
writes: List of write operations
task_id: Task ID
"""
# Not implemented - using aput() instead
# This method is called by LangGraph but we handle writes through aput()
pass
# ContextVar for per-request checkpointer (used by CheckpointerResolver for graph caching)
_current_checkpointer: contextvars.ContextVar[Optional[BaseCheckpointSaver]] = contextvars.ContextVar(
"chatbot_current_checkpointer", default=None
)
def set_checkpointer(checkpointer: BaseCheckpointSaver) -> contextvars.Token:
"""Set the current request's checkpointer. Returns token to reset later."""
return _current_checkpointer.set(checkpointer)
def reset_checkpointer(token: contextvars.Token) -> None:
"""Reset checkpointer to prior value. Safe when called from a different async context."""
try:
_current_checkpointer.reset(token)
except ValueError:
# Token was created in a different context (e.g. after yield, generator cleanup)
pass
class CheckpointerResolver(BaseCheckpointSaver):
"""
Delegating checkpointer that reads the real checkpointer from context.
Used for graph caching: the compiled graph uses this resolver; at invoke time
the per-request checkpointer is set via set_checkpointer().
"""
def _get_checkpointer(self) -> BaseCheckpointSaver:
cp = _current_checkpointer.get()
if cp is None:
raise RuntimeError(
"CheckpointerResolver: no checkpointer in context. "
"Call set_checkpointer() before invoking the cached graph."
)
return cp
def put(
self,
config: Dict[str, Any],
checkpoint: Checkpoint,
metadata: CheckpointMetadata,
new_versions: Dict[str, int],
) -> None:
self._get_checkpointer().put(config, checkpoint, metadata, new_versions)
def get(self, config: Dict[str, Any]) -> Optional[Checkpoint]:
return self._get_checkpointer().get(config)
def list(
self,
config: Dict[str, Any],
filter: Optional[Dict[str, Any]] = None,
before: Optional[str] = None,
limit: Optional[int] = None,
) -> List[Checkpoint]:
return self._get_checkpointer().list(config, filter, before, limit)
def put_writes(
self,
config: Dict[str, Any],
writes: List[Tuple[str, Any]],
task_id: str,
) -> None:
self._get_checkpointer().put_writes(config, writes, task_id)
async def aget_tuple(self, config: Dict[str, Any]) -> Optional[CheckpointTuple]:
inner = self._get_checkpointer()
if hasattr(inner, "aget_tuple"):
return await inner.aget_tuple(config)
checkpoint = inner.get(config)
if checkpoint:
metadata: CheckpointMetadata = {"step": 0}
return CheckpointTuple(
config=config,
checkpoint=checkpoint,
metadata=metadata,
parent_config=None,
pending_writes=None,
)
return None
async def aput(
self,
config: Dict[str, Any],
checkpoint: Checkpoint,
metadata: CheckpointMetadata,
new_versions: Dict[str, int],
) -> None:
inner = self._get_checkpointer()
if hasattr(inner, "aput"):
await inner.aput(config, checkpoint, metadata, new_versions)
else:
inner.put(config, checkpoint, metadata, new_versions)
async def alist(
self,
config: Dict[str, Any],
filter: Optional[Dict[str, Any]] = None,
before: Optional[str] = None,
limit: Optional[int] = None,
) -> List[Checkpoint]:
inner = self._get_checkpointer()
if hasattr(inner, "alist"):
return await inner.alist(config, filter, before, limit)
return inner.list(config, filter, before, limit)
async def aput_writes(
self,
config: Dict[str, Any],
writes: List[Tuple[str, Any]],
task_id: str,
) -> None:
inner = self._get_checkpointer()
if hasattr(inner, "aput_writes"):
await inner.aput_writes(config, writes, task_id)

View file

@ -1,390 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Chatbot tools for LangGraph integration.
Includes SQL query tool, Tavily search tool, and streaming status tool.
Tools can be created with factory functions for dynamic configuration:
- create_sql_query_tool(connector_type) - SQL query tool with configurable connector
- create_tavily_search_tool() - Tavily web search tool
- create_send_streaming_message_tool(event_manager) - Streaming status updates
"""
import logging
import asyncio
from typing import Optional, Callable, Dict, Any
from langchain_core.tools import tool
from modules.connectors.connectorPreprocessor import PreprocessorConnector
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
# =============================================================================
# Connection pool for preprocessor connector (performance optimization)
# =============================================================================
class ConnectorPool:
"""Simple connection pool for preprocessor connectors."""
_instance: Optional['ConnectorPool'] = None
_connector: Optional[PreprocessorConnector] = None
_lock: asyncio.Lock = None
@classmethod
def get_instance(cls) -> 'ConnectorPool':
if cls._instance is None:
cls._instance = ConnectorPool()
cls._lock = asyncio.Lock()
return cls._instance
async def get_connector(self) -> PreprocessorConnector:
"""Get or create a connector instance."""
if self._connector is None:
self._connector = PreprocessorConnector()
return self._connector
async def close(self):
"""Close the connector."""
if self._connector:
try:
await self._connector.close()
except Exception as e:
logger.debug(f"Error closing connector: {e}")
self._connector = None
# Global pool instance
_connector_pool = ConnectorPool.get_instance()
# =============================================================================
# Factory functions for configurable tools
# =============================================================================
def create_sql_query_tool(connector_type: str = "preprocessor"):
"""
Create a SQL query tool with a specific connector type.
Args:
connector_type: Type of database connector to use (e.g., "preprocessor")
Returns:
LangChain tool for executing SQL queries
"""
@tool
async def sqlite_query(query: str) -> str:
"""
Execute a SQL SELECT query on the database.
This tool allows you to query the database to find articles, prices,
inventory levels, and other product information.
Args:
query: A valid SQL SELECT query. Must use double quotes for column names
with spaces or special characters (e.g., "Artikelnummer", "S_IST_BESTAND").
Only SELECT queries are allowed.
Returns:
Query results as a formatted string, or an error message if the query fails.
Examples:
- Find articles by name:
SELECT a."Artikelnummer", a."Artikelbezeichnung", a."Lieferant"
FROM Artikel a
WHERE a."Artikelbezeichnung" LIKE '%Motor%'
LIMIT 20
- Find articles with price and inventory:
SELECT a."Artikelnummer", a."Artikelbezeichnung", e."EP_CHF",
lp."Lagerplatz" as "Lagerplatzname", l."S_IST_BESTAND",
l."S_RESERVIERTER__BESTAND",
CASE WHEN l."S_IST_BESTAND" != 'Unbekannt'
THEN CAST(l."S_IST_BESTAND" AS INTEGER) - COALESCE(l."S_RESERVIERTER__BESTAND", 0)
ELSE NULL END as "Verfügbarer Bestand"
FROM Artikel a
LEFT JOIN Einkaufspreis e ON a."I_ID" = e."m_Artikel"
LEFT JOIN Lagerplatz_Artikel l ON a."I_ID" = l."R_ARTIKEL"
LEFT JOIN Lagerplatz lp ON l."R_LAGERPLATZ" = lp."I_ID"
WHERE a."Artikelbezeichnung" LIKE '%Netzgerät%'
LIMIT 20
"""
try:
# Use connection pool for better performance
connector = await _connector_pool.get_connector()
result = await connector.executeQuery(query, return_json=True)
if result.get("text", "").startswith(("Error:", "Query failed:")):
error_msg = result.get("text", "Query failed")
logger.error(f"SQL query failed: {error_msg}")
return error_msg
# Format results
data = result.get("data", [])
row_count = result.get("row_count", len(data))
if not data:
return f"Query executed successfully. Returned {row_count} rows (no data)."
# Format as readable string - optimized for faster output
lines = [f"Query returned {row_count} rows:"]
# Show column headers from first row
if data and isinstance(data[0], dict):
headers = list(data[0].keys())
lines.append("Columns: " + ", ".join(headers))
# Show first 30 rows (reduced for faster response)
max_rows = min(30, len(data))
for i, row in enumerate(data[:max_rows], 1):
row_str = ", ".join([f"{k}: {v}" for k, v in row.items()])
lines.append(f"{i}. {row_str}")
if row_count > max_rows:
lines.append(f"(Showing first {max_rows} of {row_count} rows)")
else:
# Fallback for non-dict rows
for i, row in enumerate(data[:30], 1):
lines.append(f"{i}. {row}")
return "\n".join(lines)
# Note: Connection is reused, not closed after each query
except Exception as e:
error_msg = f"Error executing SQL query: {str(e)}"
logger.error(error_msg, exc_info=True)
return error_msg
return sqlite_query
def create_tavily_search_tool():
"""
Create a Tavily web search tool.
Returns:
LangChain tool for executing Tavily web searches
"""
@tool
async def tavily_search(query: str) -> str:
"""
Search the internet for comprehensive information using Tavily search via AI Center.
Use this tool when you need to find detailed product information, datasheets,
certifications, technical specifications, market trends, or other comprehensive
information that is not in the database.
IMPORTANT: This tool returns FULL content from search results (not truncated).
Use all available information to provide comprehensive, detailed answers with
specific facts, numbers, dates, and technical details.
Args:
query: Search query string. Be specific and include product names,
model numbers, or other relevant keywords. For comprehensive
research, use broad queries like "latest developments in LED technology 2026"
Returns:
Comprehensive search results with full content, titles, URLs, and sources.
Results include up to 15 sources with complete content for detailed analysis.
Examples:
- Search for comprehensive product information:
tavily_search("latest LED technology developments 2026")
- Search for product datasheet:
tavily_search("Siemens 6AV2 181-8XP00-0AX0 datasheet")
- Search for market trends:
tavily_search("LED market trends efficiency breakthroughs 2025")
"""
try:
# Use AI Center Tavily plugin instead of direct langchain-tavily
from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.aicore.aicoreModelSelector import modelSelector
from modules.datamodels.datamodelAi import (
AiModelCall,
AiModelResponse,
AiCallOptions,
OperationTypeEnum,
ProcessingModeEnum,
AiCallPromptWebSearch
)
import json
# Discover and register connectors if not already registered
if not modelRegistry._connectors:
discovered_connectors = modelRegistry.discoverConnectors()
for connector in discovered_connectors:
modelRegistry.registerConnector(connector)
# Refresh models to ensure Tavily is available
modelRegistry.refreshModels()
# Get available Tavily models (without RBAC filtering since tools don't have user context)
available_models = modelRegistry.getAvailableModels()
tavily_models = [m for m in available_models if m.connectorType == "tavily"]
if not tavily_models:
return "Error: Tavily model not available in AI Center. Please check configuration."
# Select the best Tavily model for web search
options = AiCallOptions(
operationType=OperationTypeEnum.WEB_SEARCH_DATA,
processingMode=ProcessingModeEnum.BASIC
)
# Use model selector to choose the best Tavily model
# Since we only have Tavily models, we can just pick the first one
# or use selector if multiple Tavily models exist
if len(tavily_models) == 1:
selected_model = tavily_models[0]
else:
selected_model = modelSelector.selectModel(
prompt=query,
context="",
options=options,
availableModels=tavily_models
)
if not selected_model:
return "Error: Could not select Tavily model for web search."
# Create web search prompt with more results and deeper research
web_search_prompt = AiCallPromptWebSearch(
instruction=query,
maxNumberPages=15, # Request more results for comprehensive information
country=None, # No country filter by default
language=None, # No language filter by default
researchDepth="deep" # Deep research for comprehensive results
)
# Create model call with JSON prompt
model_call = AiModelCall(
messages=[
{
"role": "user",
"content": json.dumps(web_search_prompt.model_dump())
}
],
model=selected_model,
options=options
)
# Call the model's functionCall (which routes to _routeWebOperation)
if not selected_model.functionCall:
return "Error: Tavily model has no functionCall defined."
response: AiModelResponse = await selected_model.functionCall(model_call)
if not response.success:
error_msg = response.error or "Unknown error"
logger.error(f"Tavily search failed: {error_msg}")
return f"Error performing Tavily search: {error_msg}"
# Parse response content (should be JSON with URLs and content)
try:
result_data = json.loads(response.content) if response.content else {}
# Handle different response formats
if isinstance(result_data, list):
# List of URLs or results
results = result_data
elif isinstance(result_data, dict):
# Dictionary with URLs or results key
results = result_data.get("urls", []) or result_data.get("results", []) or []
else:
results = []
if not results:
return f"No results found for query: {query}"
# Format results with full content (not truncated)
lines = [f"Internet search results for: {query}\n"]
# Return all results with full content (up to 15 results)
for i, result in enumerate(results[:15], 1):
if isinstance(result, str):
# Simple URL string
lines.append(f"{i}. {result}")
lines.append(f" URL: {result}")
elif isinstance(result, dict):
# Dictionary with url, title, content
url = result.get("url", "")
title = result.get("title", url)
content = result.get("content", "")
lines.append(f"{i}. {title}")
lines.append(f" URL: {url}")
if content:
# Return FULL content, not truncated - let the LLM decide what to use
lines.append(f" Content: {content}")
else:
# Fallback
lines.append(f"{i}. {str(result)}")
lines.append("")
return "\n".join(lines)
except json.JSONDecodeError:
# If response is not JSON, try to parse as plain text
if response.content:
return f"Internet search results for: {query}\n\n{response.content}"
return f"No results found for query: {query}"
except Exception as e:
error_msg = f"Error performing Tavily search via AI Center: {str(e)}"
logger.error(error_msg, exc_info=True)
return error_msg
return tavily_search
# =============================================================================
# Legacy tool definitions (kept for backwards compatibility)
# =============================================================================
# Legacy sqlite_query tool using default preprocessor connector
sqlite_query = create_sql_query_tool("preprocessor")
# Legacy tavily_search tool
tavily_search = create_tavily_search_tool()
# =============================================================================
# Streaming message tool factory
# =============================================================================
def create_send_streaming_message_tool(event_manager=None):
"""
Create the send_streaming_message tool with access to event manager.
Args:
event_manager: Event manager instance for emitting events (not used directly,
events are captured via LangGraph tool events)
Returns:
LangChain tool for sending streaming messages
"""
@tool
async def send_streaming_message(message: str) -> str:
"""
Send a streaming status update to the user.
Use this tool frequently to keep the user informed about what you are doing.
This helps provide a better user experience by showing progress updates.
Args:
message: A short message describing what you are currently doing.
Examples:
- "Durchsuche Datenbank nach Lampen, LED, Leuchten, und Ähnlichem."
- "Suche im Internet nach Produktinformationen."
- "Analysiere Suchergebnisse und bereite Antwort vor."
Returns:
Confirmation that the message was sent.
"""
# This tool doesn't actually do anything in the tool execution
# The actual event emission happens in the streaming bridge
# This is just for LangGraph to recognize it as a tool call
return f"Status-Update gesendet: {message}"
return send_streaming_message

File diff suppressed because it is too large Load diff

View file

@ -1,170 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Chatbot constants and helper functions.
"""
import logging
from typing import Optional
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
logger = logging.getLogger(__name__)
async def generate_conversation_name(
services,
prompt: str,
user_language: Optional[str] = None
) -> str:
"""
Generate a conversation name from the user's prompt using AI.
Creates a concise, informative summary name in German based on the user input.
Args:
services: Services object with AI service
prompt: User's input prompt (always in German)
user_language: User's language preference (not used, always German)
Returns:
A short, informative conversation name in German
"""
if not prompt or not prompt.strip():
return "Neue Unterhaltung"
try:
# Check if AI service is available
if not hasattr(services, 'ai') or services.ai is None:
logger.warning("AI service not available, generating name from prompt")
return generate_name_from_prompt(prompt)
# Ensure AI service is initialized before use
await services.ai.ensureAiObjectsInitialized()
# Create AI prompt - very explicit that answer must be in German
ai_prompt = f"""Du bist ein deutscher Assistent. Der Benutzer hat folgende Anfrage auf Deutsch gestellt:
"{prompt.strip()}"
Erstelle einen kurzen, zusammenfassenden Titel für diese Unterhaltung. Der Titel muss:
- Auf Deutsch sein (KEIN Englisch!)
- Maximal 50 Zeichen lang sein
- Das Hauptthema zusammenfassen
- Informativ sein
Beispiele für gute deutsche Titel:
- "LED-Artikel Suche"
- "Lagerbestandsabfrage"
- "Produktinformationen"
- "Artikel-Suche"
Antworte NUR mit dem deutschen Titel, ohne Anführungszeichen oder Erklärungen."""
# Create AI request
request = AiCallRequest(
prompt=ai_prompt,
context="",
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
priority=PriorityEnum.SPEED,
processingMode=ProcessingModeEnum.BASIC,
compressPrompt=False,
compressContext=False,
temperature=0.3 # Lower temperature for more consistent German output
)
)
# Call AI service
logger.info(f"Calling AI to generate conversation name for prompt: {prompt[:50]}...")
response = await services.ai.callAi(request)
if not response or not hasattr(response, 'content') or not response.content:
logger.warning("AI response invalid, generating name from prompt")
return generate_name_from_prompt(prompt)
logger.info(f"AI response received: {response.content[:100]}...")
# Clean up the AI response
name = str(response.content).strip()
name = name.strip('"\'')
# Remove markdown code blocks if present
if name.startswith('```'):
lines = name.split('\n')
if len(lines) > 1:
name = '\n'.join(lines[1:-1]) if lines[-1].strip() == '```' else '\n'.join(lines[1:])
# Remove newlines and extra spaces
name = " ".join(name.split())
# Check if name contains English words - if so, generate from prompt instead
name_lower = name.lower()
english_words = ["search", "find", "show", "display", "query", "article", "product", "item", "led articles", "product search"]
if any(word in name_lower for word in english_words):
logger.warning(f"AI generated English name '{name}', generating from prompt instead")
return generate_name_from_prompt(prompt)
# Limit to 50 characters
if len(name) > 50:
name = name[:47] + "..."
# If we got a valid name, return it
if name and len(name) >= 3:
logger.info(f"Successfully generated conversation name via AI: '{name}'")
return name
else:
logger.warning(f"Generated name is too short: '{name}', generating from prompt")
return generate_name_from_prompt(prompt)
except Exception as e:
logger.error(f"Error generating conversation name with AI: {e}", exc_info=True)
return generate_name_from_prompt(prompt)
def generate_name_from_prompt(prompt: str) -> str:
"""
Generate a conversation name directly from the German prompt.
Creates a concise title by extracting key words and formatting them.
Args:
prompt: User's input prompt in German
Returns:
A short conversation name in German
"""
if not prompt or not prompt.strip():
return "Neue Unterhaltung"
# Clean up the prompt
name = prompt.strip()
# Remove newlines and extra spaces
name = " ".join(name.split())
# Remove common question words and phrases
question_words = ["wie", "was", "wo", "wann", "wer", "welche", "welcher", "welches"]
words = name.split()
filtered_words = [w for w in words if w.lower() not in question_words]
if filtered_words:
name = " ".join(filtered_words)
# Capitalize first letter
if name:
name = name[0].upper() + name[1:] if len(name) > 1 else name.upper()
# Limit to 50 characters
if len(name) > 50:
# Try to cut at word boundary
truncated = name[:47]
last_space = truncated.rfind(' ')
if last_space > 20: # Only cut at word boundary if reasonable
name = truncated[:last_space] + "..."
else:
name = truncated + "..."
# If name is empty or too short, use default
if not name or len(name) < 3:
return "Neue Unterhaltung"
return name

View file

@ -1,302 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Configuration system for chatbot instances.
Loads configuration from the database (FeatureInstance.config JSONB field).
"""
import logging
from dataclasses import dataclass, field
from typing import Optional, Dict, Any, List, TYPE_CHECKING
if TYPE_CHECKING:
from modules.datamodels.datamodelFeatures import FeatureInstance
logger = logging.getLogger(__name__)
# Cache for loaded configs (by instance ID for database configs)
_config_cache: Dict[str, 'ChatbotConfig'] = {}
# Default system prompt when none is configured
DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant. You have access to SQL query tools and web search tools. Use them to help answer user questions."
@dataclass
class DatabaseConfig:
"""Database configuration for a chatbot instance."""
schema: Dict[str, Any] = field(default_factory=dict)
connector: str = "preprocessor"
def is_sql_enabled(self) -> bool:
"""Check if SQL queries are possible (has connector)."""
return bool(self.connector)
@dataclass
class ToolConfig:
"""Tool configuration for a chatbot instance."""
sql: Dict[str, Any] = field(default_factory=lambda: {"enabled": True})
tavily: Optional[Dict[str, Any]] = None
streaming: Dict[str, Any] = field(default_factory=lambda: {"enabled": True})
def is_sql_enabled(self) -> bool:
"""Check if SQL tool is enabled."""
if self.sql is None:
return True # Default enabled
return self.sql.get("enabled", True)
def is_tavily_enabled(self) -> bool:
"""Check if Tavily web search tool is enabled."""
if self.tavily is None:
return False # Default disabled
return self.tavily.get("enabled", False)
def is_streaming_enabled(self) -> bool:
"""Check if streaming status tool is enabled."""
if self.streaming is None:
return True # Default enabled
return self.streaming.get("enabled", True)
@dataclass
class ModelConfig:
"""Model configuration for a chatbot instance."""
operationType: str = "DATA_ANALYSE"
processingMode: str = "BASIC" # Changed from DETAILED for faster responses
allowedProviders: List[str] = field(default_factory=list) # Empty = all providers allowed
@dataclass
class ChatbotConfig:
"""Configuration for a chatbot instance."""
id: str
name: str
systemPrompt: str
database: DatabaseConfig
tools: ToolConfig
model: ModelConfig
@classmethod
def from_dict(cls, data: Dict[str, Any], config_id: str = "default") -> 'ChatbotConfig':
"""
Create ChatbotConfig from dictionary.
Supports two config formats:
1. New format (file-based): systemPrompt, database, tools, model
2. Legacy frontend format: connector, prompts, behavior
Args:
data: Configuration dictionary (from JSON file or FeatureInstance.config)
config_id: Identifier for this config (instance ID or file name)
Returns:
ChatbotConfig instance with validated values
"""
# Detect config format and normalize
if "prompts" in data or "connector" in data or "behavior" in data:
# Legacy frontend format - convert to new format
data = cls._convert_legacy_config(data)
# Get system prompt - required field, use default if not provided
system_prompt = data.get("systemPrompt")
if not system_prompt:
logger.warning(f"Config {config_id}: No systemPrompt provided, using default")
system_prompt = DEFAULT_SYSTEM_PROMPT
# Parse database config
db_data = data.get("database", {})
database_config = DatabaseConfig(
schema=db_data.get("schema", {}),
connector=db_data.get("connector", "preprocessor")
)
# Parse tools config with defaults
tools_data = data.get("tools", {})
tools_config = ToolConfig(
sql=tools_data.get("sql", {"enabled": True}),
tavily=tools_data.get("tavily", {"enabled": False}),
streaming=tools_data.get("streaming", {"enabled": True})
)
# Parse model config with defaults
model_data = data.get("model", {})
allowed_providers = model_data.get("allowedProviders") or data.get("allowedProviders", [])
model_config = ModelConfig(
operationType=model_data.get("operationType", "DATA_ANALYSE"),
processingMode=model_data.get("processingMode", "DETAILED"),
allowedProviders=allowed_providers if isinstance(allowed_providers, list) else []
)
return cls(
id=data.get("id", config_id),
name=data.get("name", "Chatbot"),
systemPrompt=system_prompt,
database=database_config,
tools=tools_config,
model=model_config
)
@staticmethod
def _convert_legacy_config(data: Dict[str, Any]) -> Dict[str, Any]:
"""
Convert legacy frontend config format to new format.
Legacy format (from AdminFeatureAccessPage.tsx):
{
"connector": {"types": [...], "type": "preprocessor"},
"prompts": {"customAnalysisPrompt": "...", "customFinalAnswerPrompt": "..."},
"behavior": {"enableWebResearch": true, ...}
}
New format:
{
"systemPrompt": "...",
"database": {"connector": "preprocessor"},
"tools": {"sql": {"enabled": true}, "tavily": {"enabled": true}}
}
"""
converted = {}
# Extract system prompt from prompts section
prompts = data.get("prompts", {})
system_prompt = prompts.get("customAnalysisPrompt") or prompts.get("customFinalAnswerPrompt")
if system_prompt:
converted["systemPrompt"] = system_prompt
# Extract connector from connector section
connector_data = data.get("connector", {})
connector_type = connector_data.get("type") or "preprocessor"
if isinstance(connector_data.get("types"), list) and connector_data["types"]:
connector_type = connector_data["types"][0] # Use first connector as primary
converted["database"] = {
"connector": connector_type,
"schema": {}
}
# Extract tool settings from behavior section
behavior = data.get("behavior", {})
enable_web_research = behavior.get("enableWebResearch", False)
converted["tools"] = {
"sql": {"enabled": True}, # SQL always enabled if connector is set
"tavily": {"enabled": enable_web_research},
"streaming": {"enabled": True} # Streaming always enabled
}
# Model config defaults - use BASIC for faster responses
converted["model"] = {
"operationType": "DATA_ANALYSE",
"processingMode": "BASIC",
"allowedProviders": data.get("allowedProviders", [])
}
# Copy other fields
if "id" in data:
converted["id"] = data["id"]
if "name" in data:
converted["name"] = data["name"]
logger.debug(f"Converted legacy config format to new format")
return converted
def to_dict(self) -> Dict[str, Any]:
"""Convert config to dictionary for serialization."""
return {
"id": self.id,
"name": self.name,
"systemPrompt": self.systemPrompt,
"database": {
"schema": self.database.schema,
"connector": self.database.connector
},
"tools": {
"sql": self.tools.sql,
"tavily": self.tools.tavily,
"streaming": self.tools.streaming
},
"model": {
"operationType": self.model.operationType,
"processingMode": self.model.processingMode,
"allowedProviders": self.model.allowedProviders
}
}
def load_chatbot_config_from_instance(instance: 'FeatureInstance') -> ChatbotConfig:
"""
Load chatbot configuration from a FeatureInstance's config field.
This is the primary method for loading chatbot configuration.
The config is stored in the FeatureInstance.config JSONB field.
Args:
instance: FeatureInstance object with config field
Returns:
ChatbotConfig instance
Raises:
ValueError: If instance has no config and no fallback available
"""
instance_id = instance.id
# Check cache first (by instance ID)
cache_key = f"instance_{instance_id}"
if cache_key in _config_cache:
logger.debug(f"Returning cached config for instance {instance_id}")
return _config_cache[cache_key]
# Get config from instance
config_data = instance.config
if not config_data:
logger.warning(f"Instance {instance_id} has no config, using minimal defaults")
config_data = {}
logger.debug(f"Instance {instance_id} raw config keys: {list(config_data.keys()) if config_data else []}, allowedProviders: {config_data.get('allowedProviders')}")
# Create config from dictionary
config = ChatbotConfig.from_dict(config_data, config_id=instance_id)
logger.debug(f"Instance {instance_id} parsed config.model.allowedProviders: {config.model.allowedProviders}")
# Cache the config
_config_cache[cache_key] = config
logger.info(f"Loaded chatbot config from instance {instance_id}: {config.name}")
return config
def load_chatbot_config_from_dict(config_data: Dict[str, Any], config_id: str = "custom") -> ChatbotConfig:
"""
Load chatbot configuration from a dictionary.
Useful for testing or when config data is already available.
Args:
config_data: Configuration dictionary
config_id: Identifier for this config
Returns:
ChatbotConfig instance
"""
return ChatbotConfig.from_dict(config_data, config_id=config_id)
def clear_config_cache(instance_id: Optional[str] = None):
"""
Clear the configuration cache.
Args:
instance_id: Optional instance ID to clear specific cache entry.
If None, clears entire cache.
"""
global _config_cache
if instance_id:
cache_key = f"instance_{instance_id}"
if cache_key in _config_cache:
del _config_cache[cache_key]
logger.debug(f"Cleared chatbot config cache for instance {instance_id}")
else:
_config_cache.clear()
logger.debug("Cleared all chatbot config cache")

File diff suppressed because it is too large Load diff

View file

@ -1,497 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Chatbot Feature Container - Main Module.
Handles feature initialization and RBAC catalog registration.
"""
import logging
from typing import Dict, List, Any, Optional
from modules.shared.i18nRegistry import t
logger = logging.getLogger(__name__)
# Feature metadata
FEATURE_CODE = "chatbot"
FEATURE_LABEL = t("Chatbot", context="UI")
FEATURE_ICON = "mdi-robot"
# UI Objects for RBAC catalog
UI_OBJECTS = [
{
"objectKey": "ui.feature.chatbot.conversations",
"label": t("Konversationen", context="UI"),
"meta": {"area": "conversations"}
}
]
# Resource Objects for RBAC catalog
RESOURCE_OBJECTS = [
{
"objectKey": "resource.feature.chatbot.startStream",
"label": t("Chat starten (Stream)", context="UI"),
"meta": {"endpoint": "/api/chatbot/{instanceId}/start/stream", "method": "POST"}
},
{
"objectKey": "resource.feature.chatbot.stop",
"label": t("Chat stoppen", context="UI"),
"meta": {"endpoint": "/api/chatbot/{instanceId}/stop/{workflowId}", "method": "POST"}
},
{
"objectKey": "resource.feature.chatbot.threads",
"label": t("Threads abrufen", context="UI"),
"meta": {"endpoint": "/api/chatbot/{instanceId}/threads", "method": "GET"}
},
{
"objectKey": "resource.feature.chatbot.delete",
"label": t("Chat löschen", context="UI"),
"meta": {"endpoint": "/api/chatbot/{instanceId}/{workflowId}", "method": "DELETE"}
},
]
# Service requirements - services this feature needs from the service center
# Format: [{serviceKey, meta}]. Used by getChatbotServices() to resolve only needed services.
REQUIRED_SERVICES = [
{
"serviceKey": "chat",
"meta": {"usage": "File info, document handling"}
},
{
"serviceKey": "ai",
"meta": {"usage": "AI calls, conversation name generation"}
},
{
"serviceKey": "billing",
"meta": {"usage": "Usage tracking, balance checks"}
},
{
"serviceKey": "streaming",
"meta": {"usage": "Event manager, ChatStreamingHelper"}
},
]
# Template roles for this feature
# Role names MUST follow convention: {featureCode}-{roleName}
TEMPLATE_ROLES = [
{
"roleLabel": "chatbot-viewer",
"description": "Chatbot Betrachter - Chat-Threads ansehen (nur lesen)",
"accessRules": [
# UI: only threads view, NO active chat
{"context": "UI", "item": "ui.feature.chatbot.threads", "view": True},
# RESOURCE: can list threads only
{"context": "RESOURCE", "item": "resource.feature.chatbot.threads", "view": True},
# DATA access (own records, read-only)
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "n", "update": "n", "delete": "n"},
]
},
{
"roleLabel": "chatbot-user",
"description": "Chatbot Benutzer - Chatbot nutzen und eigene Threads verwalten",
"accessRules": [
# UI: full access to all views
{"context": "UI", "item": "ui.feature.chatbot.conversations", "view": True},
{"context": "UI", "item": "ui.feature.chatbot.threads", "view": True},
# Resource access: can start/stop chats, view threads, delete own
{"context": "RESOURCE", "item": "resource.feature.chatbot.startStream", "view": True},
{"context": "RESOURCE", "item": "resource.feature.chatbot.stop", "view": True},
{"context": "RESOURCE", "item": "resource.feature.chatbot.threads", "view": True},
{"context": "RESOURCE", "item": "resource.feature.chatbot.delete", "view": True},
# DATA access (own records)
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "m", "update": "m", "delete": "m"},
]
},
{
"roleLabel": "chatbot-admin",
"description": "Chatbot Admin - Vollzugriff auf alle Chatbot-Funktionen",
"accessRules": [
# Full UI access
{"context": "UI", "item": None, "view": True},
# Full resource access
{"context": "RESOURCE", "item": None, "view": True},
# Full DATA access
{"context": "DATA", "item": None, "view": True, "read": "a", "create": "a", "update": "a", "delete": "a"},
]
},
]
def getFeatureDefinition() -> Dict[str, Any]:
"""Return the feature definition for registration.
The chatbot feature is currently soft-disabled via ``enabled=False``: its
catalog objects, template roles and routes stay loaded so already-running
instances keep working, but it is filtered out of the Store and the
Admin Feature-Instances "Neue Instanz" selection list.
"""
return {
"code": FEATURE_CODE,
"label": FEATURE_LABEL,
"icon": FEATURE_ICON,
"enabled": False,
}
def getRequiredServiceKeys() -> List[str]:
"""Return list of service keys this feature requires."""
return [s["serviceKey"] for s in REQUIRED_SERVICES]
def getChatbotServices(
user,
mandateId: Optional[str] = None,
featureInstanceId: Optional[str] = None,
workflow=None,
) -> Any:
"""
Get a service hub for the chatbot feature using the service center.
Resolves only the services declared in REQUIRED_SERVICES.
Returns a hub-like object with: chat, ai, billing, streaming,
plus interfaceDbComponent, user, mandateId, featureInstanceId.
"""
from modules.serviceCenter import getService
from modules.serviceCenter.context import ServiceCenterContext
from modules.interfaces.interfaceDbManagement import getInterface as getComponentInterface
# Provide workflow or placeholder so billing/etc get featureCode
_workflow = workflow
if _workflow is None:
_workflow = type("_Placeholder", (), {"featureCode": FEATURE_CODE})()
ctx = ServiceCenterContext(
user=user,
mandate_id=mandateId,
feature_instance_id=featureInstanceId,
workflow=_workflow,
)
hub = _ChatbotServiceHub()
hub.user = user
hub.mandateId = mandateId
hub.featureInstanceId = featureInstanceId
hub.workflow = workflow
hub.interfaceDbComponent = getComponentInterface(user, mandateId=mandateId, featureInstanceId=featureInstanceId)
for spec in REQUIRED_SERVICES:
key = spec["serviceKey"]
try:
svc = getService(key, ctx)
setattr(hub, key, svc)
except Exception as e:
logger.warning(f"Could not resolve service '{key}' for chatbot: {e}")
setattr(hub, key, None)
return hub
def getChatStreamingHelper():
"""
Get ChatStreamingHelper utility class (used by chatbot for message normalization).
Resolves via service center streaming service.
"""
from modules.serviceCenter import getService
from modules.serviceCenter.context import ServiceCenterContext
# Minimal context - streaming service only needs it for resolver
ctx = ServiceCenterContext(user=__get_placeholder_user(), mandate_id=None, feature_instance_id=None)
streaming = getService("streaming", ctx)
return streaming.getChatStreamingHelper() if streaming else None
def __get_placeholder_user():
"""Placeholder user for contexts that only need service resolution (e.g. ChatStreamingHelper)."""
from modules.interfaces.interfaceDbApp import getRootInterface
return getRootInterface().currentUser
def getEventManager(user, mandateId: Optional[str] = None, featureInstanceId: Optional[str] = None):
"""
Get the global event manager for SSE streaming (used by chatbot routes).
"""
from modules.serviceCenter import getService
from modules.serviceCenter.context import ServiceCenterContext
ctx = ServiceCenterContext(
user=user,
mandate_id=mandateId,
feature_instance_id=featureInstanceId,
)
streaming = getService("streaming", ctx)
return streaming.getEventManager()
class _ChatbotServiceHub:
"""Lightweight hub exposing only services required by the chatbot feature."""
user = None
mandateId = None
featureInstanceId = None
workflow = None
interfaceDbComponent = None
chat = None
ai = None
billing = None
streaming = None
featureCode = "chatbot"
allowedProviders = None
def getUiObjects() -> List[Dict[str, Any]]:
"""Return UI objects for RBAC catalog registration."""
return UI_OBJECTS
def getResourceObjects() -> List[Dict[str, Any]]:
"""Return resource objects for RBAC catalog registration."""
return RESOURCE_OBJECTS
def getTemplateRoles() -> List[Dict[str, Any]]:
"""Return template roles for this feature."""
return TEMPLATE_ROLES
def registerFeature(catalogService) -> bool:
"""
Register this feature's RBAC objects in the catalog.
Args:
catalogService: The RBAC catalog service instance
Returns:
True if registration was successful
"""
try:
# Register UI objects
for uiObj in UI_OBJECTS:
catalogService.registerUiObject(
featureCode=FEATURE_CODE,
objectKey=uiObj["objectKey"],
label=uiObj["label"],
meta=uiObj.get("meta")
)
# Register Resource objects
for resObj in RESOURCE_OBJECTS:
catalogService.registerResourceObject(
featureCode=FEATURE_CODE,
objectKey=resObj["objectKey"],
label=resObj["label"],
meta=resObj.get("meta")
)
# Sync template roles to database
_syncTemplateRolesToDb()
logger.info(f"Feature '{FEATURE_CODE}' registered {len(UI_OBJECTS)} UI objects and {len(RESOURCE_OBJECTS)} resource objects")
return True
except Exception as e:
logger.error(f"Failed to register feature '{FEATURE_CODE}': {e}")
return False
def getChatbotServices(
user,
mandateId: Optional[str] = None,
featureInstanceId: Optional[str] = None,
workflow=None,
) -> "_ChatbotServiceHub":
"""
Get lightweight service hub for chatbot (chat, ai, streaming) without loading
the full legacy Services hub. Avoids ~90 ms from _loadFeatureInterfaces +
_loadFeatureServices; only instantiates required services.
Uses interfaceFeatureChatbot (ChatObjects) for interfaceDbChat to avoid
duplicate DB init - chatProcess reuses hub.interfaceDbChat.
"""
from modules.serviceHub import PublicService
from modules.interfaces.interfaceDbApp import getInterface as getAppInterface
from modules.features.chatbot.interfaceFeatureChatbot import getInterface as getChatbotInterface
from modules.serviceCenter.services.serviceChat.mainServiceChat import ChatService
from modules.serviceCenter.services.serviceAi.mainServiceAi import AiService
from modules.serviceCenter.core.serviceStreaming.mainServiceStreaming import StreamingService
hub = _ChatbotServiceHub()
hub.user = user
hub.mandateId = mandateId
hub.featureInstanceId = featureInstanceId
hub.workflow = workflow
hub.featureCode = "chatbot"
hub.allowedProviders = None
hub.interfaceDbApp = getAppInterface(user, mandateId=mandateId)
# interfaceDbComponent: lazy-loaded on first access (saves ~100300 ms when no file uploads)
hub._interfaceDbComponent_val = None
# Use ChatObjects (interfaceFeatureChatbot) - same as chatProcess, avoids extra interfaceDbChat init
hub.interfaceDbChat = getChatbotInterface(
user, mandateId=mandateId, featureInstanceId=featureInstanceId
)
hub.chat = PublicService(ChatService(hub))
hub.ai = PublicService(AiService(hub), functionsOnly=False)
hub.streaming = PublicService(StreamingService(hub))
# Resolve billing from service center (required for _preflight_billing_check and billing callback)
try:
from modules.serviceCenter import getService
from modules.serviceCenter.context import ServiceCenterContext
_workflow = workflow or type("_Placeholder", (), {"featureCode": FEATURE_CODE})()
ctx = ServiceCenterContext(
user=user,
mandate_id=mandateId,
feature_instance_id=featureInstanceId,
workflow=_workflow,
)
hub.billing = getService("billing", ctx)
except Exception as e:
logger.warning(f"Could not resolve billing service for chatbot: {e}")
hub.billing = None
return hub
class _ChatbotServiceHub:
"""Lightweight hub with chat, ai, streaming for chatbot; avoids full Services init."""
user = None
mandateId = None
featureInstanceId = None
workflow = None
interfaceDbApp = None
_interfaceDbComponent_val = None
interfaceDbChat = None
@property
def interfaceDbComponent(self):
"""Lazy-load interfaceDbComponent on first access (saves ~100300 ms when no files)."""
if self._interfaceDbComponent_val is None:
from modules.interfaces.interfaceDbManagement import getInterface as getComponentInterface
self._interfaceDbComponent_val = getComponentInterface(
self.user, mandateId=self.mandateId, featureInstanceId=self.featureInstanceId
)
return self._interfaceDbComponent_val
chat = None
ai = None
billing = None
streaming = None
featureCode = "chatbot"
allowedProviders = None
def _syncTemplateRolesToDb() -> int:
"""
Sync template roles and their AccessRules to the database.
Creates global template roles (mandateId=None) if they don't exist.
Returns:
Number of roles created/updated
"""
try:
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.datamodels.datamodelRbac import Role, AccessRule, AccessRuleContext
from modules.datamodels.datamodelUtils import coerce_text_multilingual
rootInterface = getRootInterface()
# Get existing template roles for this feature (Pydantic models)
existingRoles = rootInterface.getRolesByFeatureCode(FEATURE_CODE)
# Filter to template roles (mandateId is None)
templateRoles = [r for r in existingRoles if r.mandateId is None]
existingRoleLabels = {r.roleLabel: str(r.id) for r in templateRoles}
createdCount = 0
for roleTemplate in TEMPLATE_ROLES:
roleLabel = roleTemplate["roleLabel"]
if roleLabel in existingRoleLabels:
roleId = existingRoleLabels[roleLabel]
# Ensure AccessRules exist for this role
_ensureAccessRulesForRole(rootInterface, roleId, roleTemplate.get("accessRules", []))
else:
# Create new template role
newRole = Role(
roleLabel=roleLabel,
description=coerce_text_multilingual(roleTemplate.get("description", {})),
featureCode=FEATURE_CODE,
mandateId=None, # Global template
featureInstanceId=None,
isSystemRole=False
)
createdRole = rootInterface.db.recordCreate(Role, newRole.model_dump())
roleId = createdRole.get("id")
# Create AccessRules for this role
_ensureAccessRulesForRole(rootInterface, roleId, roleTemplate.get("accessRules", []))
logger.info(f"Created template role '{roleLabel}' with ID {roleId}")
createdCount += 1
if createdCount > 0:
logger.info(f"Feature '{FEATURE_CODE}': Created {createdCount} template roles")
return createdCount
except Exception as e:
logger.error(f"Error syncing template roles for feature '{FEATURE_CODE}': {e}")
return 0
def _ensureAccessRulesForRole(rootInterface, roleId: str, ruleTemplates: List[Dict[str, Any]]) -> int:
"""
Ensure AccessRules exist for a role based on templates.
Args:
rootInterface: Root interface instance
roleId: Role ID
ruleTemplates: List of rule templates
Returns:
Number of rules created
"""
from modules.datamodels.datamodelRbac import AccessRule, AccessRuleContext
# Get existing rules for this role (Pydantic models)
existingRules = rootInterface.getAccessRulesByRole(roleId)
# Create a set of existing rule signatures to avoid duplicates
# IMPORTANT: Use .value for enum comparison, not str() which gives "AccessRuleContext.DATA" in Python 3.11+
existingSignatures = set()
for rule in existingRules:
sig = (rule.context.value if rule.context else None, rule.item)
existingSignatures.add(sig)
createdCount = 0
for template in ruleTemplates:
context = template.get("context", "UI")
item = template.get("item")
sig = (context, item)
if sig in existingSignatures:
continue
# Map context string to enum
if context == "UI":
contextEnum = AccessRuleContext.UI
elif context == "DATA":
contextEnum = AccessRuleContext.DATA
elif context == "RESOURCE":
contextEnum = AccessRuleContext.RESOURCE
else:
contextEnum = context
newRule = AccessRule(
roleId=roleId,
context=contextEnum,
item=item,
view=template.get("view", False),
read=template.get("read"),
create=template.get("create"),
update=template.get("update"),
delete=template.get("delete"),
)
rootInterface.db.recordCreate(AccessRule, newRule.model_dump())
createdCount += 1
if createdCount > 0:
logger.debug(f"Created {createdCount} AccessRules for role {roleId}")
return createdCount

View file

@ -1,571 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Chatbot routes for the backend API.
Implements chatbot endpoints using LangGraph-based conversation workflows.
"""
import logging
import json
import asyncio
import math
import uuid
from typing import Optional, Any, Dict, Union
from fastapi import APIRouter, HTTPException, Depends, Body, Path, Query, Request, status
from fastapi.responses import StreamingResponse
from modules.shared.timeUtils import parseTimestamp, getUtcTimestamp
# Import auth modules
from modules.auth import limiter, getRequestContext, RequestContext
# Import interfaces
from . import interfaceFeatureChatbot as interfaceDbChat
from modules.interfaces.interfaceRbac import getRecordsetWithRBAC
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.interfaces.interfaceFeatures import getFeatureInterface
# Import models
from modules.datamodels.datamodelChat import UserInputRequest
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata
from modules.features.chatbot.interfaceFeatureChatbot import ChatbotConversation
# Import chatbot feature
from modules.features.chatbot import chatProcess
from modules.features.chatbot.mainChatbot import getEventManager
from modules.shared.i18nRegistry import apiRouteContext
routeApiMsg = apiRouteContext("routeFeatureChatbot")
# Configure logger
logger = logging.getLogger(__name__)
# Create router for chatbot endpoints
router = APIRouter(
prefix="/api/chatbot",
tags=["Chatbot"],
responses={404: {"description": "Not found"}}
)
def _getServiceChat(context: RequestContext, instanceId: Optional[str] = None, mandateId: Optional[str] = None):
"""Get chatbot interface with instance context.
Pass mandateId when available (e.g. from _validateInstanceAccess) to ensure cache hit with getChatbotServices.
"""
effective_mandate = mandateId if mandateId is not None else (str(context.mandateId) if context.mandateId else None)
return interfaceDbChat.getInterface(
context.user,
mandateId=effective_mandate,
featureInstanceId=instanceId
)
def _validateInstanceAccess(instanceId: str, context: RequestContext) -> str:
"""
Validate that the user has access to the feature instance.
Returns the mandateId for the instance.
Args:
instanceId: The FeatureInstance ID from URL
context: The request context with user info
Returns:
mandateId of the instance
Raises:
HTTPException 404 if instance not found
HTTPException 403 if user doesn't have access
"""
rootInterface = getRootInterface()
featureInterface = getFeatureInterface(rootInterface.db)
instance = featureInterface.getFeatureInstance(instanceId)
if not instance:
raise HTTPException(
status_code=404,
detail=f"Feature instance '{instanceId}' not found"
)
# Verify it's a chatbot instance
if instance.featureCode != "chatbot":
raise HTTPException(
status_code=400,
detail=f"Instance '{instanceId}' is not a chatbot instance"
)
# Verify user has access to this instance
if not context.isPlatformAdmin:
# Check if user has FeatureAccess for this instance
featureAccesses = rootInterface.getFeatureAccessesForUser(str(context.user.id))
hasAccess = any(
str(fa.featureInstanceId) == instanceId and fa.enabled
for fa in featureAccesses
)
if not hasAccess:
raise HTTPException(
status_code=403,
detail=f"Access denied to feature instance '{instanceId}'"
)
return str(instance.mandateId)
# =============================================================================
# List threads - MUST be first to avoid /{instanceId}/{workflowId} matching
# GET /api/chatbot/{instanceId}/threads before DELETE /api/chatbot/{instanceId}/{workflowId}
# =============================================================================
@router.get("/{instanceId}/threads")
@limiter.limit("120/minute")
def get_chatbot_threads(
request: Request,
instanceId: str = Path(..., description="Feature Instance ID"),
workflowId: Optional[str] = Query(None, description="Optional workflow ID to get details and chat data for a specific thread"),
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object (only used when workflowId is not provided)"),
context: RequestContext = Depends(getRequestContext)
) -> Union[PaginatedResponse[ChatbotConversation], Dict[str, Any]]:
"""
List all chatbot workflows (threads) for the current user, or get details and chat data for a specific thread.
- If workflowId is provided: Returns the workflow details and all chat data (messages, logs, stats)
- If workflowId is not provided: Returns a paginated list of all workflows
"""
mandateId = _validateInstanceAccess(instanceId, context)
try:
interfaceDbChat = _getServiceChat(context, instanceId, mandateId=mandateId)
if workflowId:
workflow = interfaceDbChat.getWorkflow(workflowId)
if not workflow:
raise HTTPException(
status_code=404,
detail=f"Workflow with ID {workflowId} not found"
)
if hasattr(workflow, 'model_dump'):
workflow_dict = workflow.model_dump()
elif isinstance(workflow, dict):
workflow_dict = dict(workflow)
else:
workflow_dict = workflow
if workflow_dict.get("maxSteps") is None:
workflow_dict["maxSteps"] = 10
chatData = interfaceDbChat.getUnifiedChatData(workflowId, None)
return {
"workflow": workflow_dict,
"chatData": chatData
}
paginationParams = None
if pagination:
try:
paginationDict = json.loads(pagination)
paginationParams = PaginationParams(**paginationDict) if paginationDict else None
except (json.JSONDecodeError, ValueError) as e:
raise HTTPException(
status_code=400,
detail=f"Invalid pagination parameter: {str(e)}"
)
all_workflows = interfaceDbChat.getWorkflows(pagination=None)
chatbot_workflows_data = [
wf for wf in all_workflows
if (wf.get("workflowMode") or getattr(wf, "workflowMode", None)) == "Chatbot"
]
if paginationParams:
if paginationParams.sort:
chatbot_workflows_data = interfaceDbChat._applySorting(chatbot_workflows_data, paginationParams.sort)
totalItems = len(chatbot_workflows_data)
totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
startIdx = (paginationParams.page - 1) * paginationParams.pageSize
endIdx = startIdx + paginationParams.pageSize
workflows = chatbot_workflows_data[startIdx:endIdx]
else:
workflows = chatbot_workflows_data
totalItems = len(chatbot_workflows_data)
totalPages = 1
normalized_workflows = []
for wf in workflows:
normalized_wf = dict(wf)
if normalized_wf.get("maxSteps") is None:
normalized_wf["maxSteps"] = 10
normalized_workflows.append(normalized_wf)
from modules.routes.routeHelpers import enrichRowsWithFkLabels
enriched = enrichRowsWithFkLabels(normalized_workflows, ChatbotConversation)
return {
"items": enriched,
"pagination": PaginationMetadata(
currentPage=paginationParams.page if paginationParams else 1,
pageSize=paginationParams.pageSize if paginationParams else len(workflows),
totalItems=totalItems,
totalPages=totalPages,
sort=paginationParams.sort if paginationParams else [],
filters=paginationParams.filters if paginationParams else None
).model_dump(),
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting chatbot threads: {str(e)}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Error getting chatbot threads: {str(e)}"
)
# Chatbot streaming endpoint (SSE)
@router.post("/{instanceId}/start/stream")
@limiter.limit("120/minute")
async def stream_chatbot_start(
request: Request,
instanceId: str = Path(..., description="Feature Instance ID"),
workflowId: Optional[str] = Query(None, description="Optional ID of the workflow to continue (can also be in request body)"),
userInput: UserInputRequest = Body(...),
context: RequestContext = Depends(getRequestContext)
) -> StreamingResponse:
"""
Starts a new chatbot workflow or continues an existing one with SSE streaming.
Streams progress updates in real-time via Server-Sent Events.
workflowId can be provided either:
- As a query parameter: /api/chatbot/{instanceId}/start/stream?workflowId=xxx
- In the request body as part of UserInputRequest
- Query parameter takes precedence if both are provided
"""
# Validate instance access
mandateId = _validateInstanceAccess(instanceId, context)
event_manager = getEventManager(context.user, mandateId=mandateId, featureInstanceId=instanceId)
try:
# Use workflowId from query parameter if provided, otherwise from request body
final_workflow_id = workflowId or userInput.workflowId
# Start background processing (this will create the workflow and event queue)
# Pass featureInstanceId and event_manager to chatProcess
workflow = await chatProcess(
context.user, mandateId, userInput, final_workflow_id,
featureInstanceId=instanceId, event_manager=event_manager
)
# Check if workflow was created successfully
if not workflow:
raise HTTPException(
status_code=500,
detail=routeApiMsg("Failed to create or load workflow")
)
# Get event queue for the workflow
queue = event_manager.get_queue(workflow.id)
if not queue:
# Create queue if it doesn't exist
queue = event_manager.create_queue(workflow.id)
async def event_stream():
"""Async generator for SSE events - pure event-driven streaming (no polling)."""
try:
# Yield keepalive immediately so client gets 200 + first byte fast (normal chatbot feel)
yield ": keepalive\n\n"
# Use same mandateId as chatProcess so we hit interface cache (avoid duplicate DB init)
interfaceDbChat = _getServiceChat(context, instanceId, mandateId=mandateId)
# Use workflow from chatProcess (no refetch)
current_round = workflow.currentRound if workflow else None
is_resuming = final_workflow_id is not None and current_round and current_round > 1
# Send initial chat data (exact format as chatData endpoint) - only once at start
try:
chatData = interfaceDbChat.getUnifiedChatData(workflow.id, None)
if chatData.get("items"):
# Filter items by round number if resuming
filtered_items = []
for item in chatData["items"]:
if is_resuming and current_round:
# Get round number from item
item_round = None
item_data = item.get("item")
if item_data:
# Handle both dict and object access
if isinstance(item_data, dict):
item_round = item_data.get("roundNumber")
elif hasattr(item_data, "roundNumber"):
item_round = item_data.roundNumber
# When resuming, only include items from current round onwards
# Exclude items without roundNumber (they're from old rounds before roundNumber was added)
# Exclude items with roundNumber < current_round (from previous rounds)
if item_round is None or item_round < current_round:
continue # Skip items from previous rounds or without round info
filtered_items.append(item)
# Emit filtered items
for item in filtered_items:
_inner = item.get("item")
serializable_item = {
"type": item.get("type"),
"createdAt": item.get("createdAt"),
"item": _inner.model_dump() if _inner is not None and hasattr(_inner, "model_dump") else _inner,
}
# Emit item directly in exact chatData format: {type, createdAt, item}
yield f"data: {json.dumps(serializable_item)}\n\n"
except Exception as e:
logger.warning(f"Error fetching initial chat data: {e}")
# Keepalive interval (30 seconds)
keepalive_interval = 30.0
last_keepalive = asyncio.get_event_loop().time()
# Status check interval (check workflow status every 5 seconds - less frequent since we're event-driven)
status_check_interval = 5.0
last_status_check = asyncio.get_event_loop().time()
# Stream events until completion or timeout - pure event-driven (no polling)
timeout = 300.0 # 5 minutes max
start_time = asyncio.get_event_loop().time()
while True:
# Check timeout
elapsed = asyncio.get_event_loop().time() - start_time
if elapsed > timeout:
logger.info(f"Stream timeout for workflow {workflow.id}")
break
# Check for client disconnection
if await request.is_disconnected():
logger.info(f"Client disconnected for workflow {workflow.id}")
break
current_time = asyncio.get_event_loop().time()
# Periodically check workflow status (less frequent since we're event-driven)
if current_time - last_status_check >= status_check_interval:
try:
current_workflow = interfaceDbChat.getWorkflow(workflow.id)
if current_workflow and current_workflow.status == "stopped":
logger.info(f"Workflow {workflow.id} was stopped, closing stream")
break
except Exception as e:
logger.warning(f"Error checking workflow status: {e}")
last_status_check = current_time
# Get event from queue (pure event-driven - no polling database)
try:
event = await asyncio.wait_for(queue.get(), timeout=1.0)
# Handle different event types
event_type = event.get("type")
event_data = event.get("data", {})
# Emit chatdata events (messages, logs, stats, status, chunk) in exact chatData format
if event_type == "chatdata" and event_data:
# Handle status events (transient UI feedback)
if event_data.get("type") == "status":
# Status events have simple structure: {type: "status", label: "..."}
status_item = {
"type": "status",
"label": event_data.get("label", "")
}
yield f"data: {json.dumps(status_item)}\n\n"
elif event_data.get("type") == "chunk":
# Token chunks for ChatGPT-like streaming
chunk_item = {
"type": "chunk",
"content": event_data.get("content", "")
}
yield f"data: {json.dumps(chunk_item)}\n\n"
else:
# Emit other chatdata items (messages, logs, stats) in exact chatData format
chatdata_item = event_data
# Ensure item field is serializable (convert Pydantic models to dicts)
if isinstance(chatdata_item, dict) and "item" in chatdata_item:
item_obj = chatdata_item.get("item")
if hasattr(item_obj, "model_dump"):
chatdata_item = chatdata_item.copy()
chatdata_item["item"] = item_obj.model_dump()
yield f"data: {json.dumps(chatdata_item)}\n\n"
# Handle completion/stopped events to close stream
elif event_type == "complete":
logger.info(f"Workflow {workflow.id} completed, closing stream")
break
elif event_type == "stopped":
logger.info(f"Workflow {workflow.id} stopped, closing stream")
break
elif event_type == "error" and event.get("step") == "error":
logger.warning(f"Workflow {workflow.id} error, closing stream")
break
last_keepalive = current_time
except asyncio.TimeoutError:
# Send keepalive if needed (no events received, but keep connection alive)
current_time = asyncio.get_event_loop().time()
if current_time - last_keepalive >= keepalive_interval:
yield f": keepalive\n\n"
last_keepalive = current_time
continue
except Exception as e:
logger.error(f"Error in event stream: {e}")
break
except Exception as e:
logger.error(f"Error in event stream generator: {e}", exc_info=True)
finally:
# Stream ends - cleanup handled by event manager
pass
return StreamingResponse(
event_stream(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no" # Disable buffering for nginx
}
)
except Exception as e:
logger.error(f"Error in stream_chatbot_start: {str(e)}", exc_info=True)
raise HTTPException(
status_code=500,
detail=str(e)
)
# Workflow stop endpoint
@router.post("/{instanceId}/stop/{workflowId}", response_model=ChatbotConversation)
@limiter.limit("120/minute")
async def stop_chatbot(
request: Request,
instanceId: str = Path(..., description="Feature Instance ID"),
workflowId: str = Path(..., description="ID of the workflow to stop"),
context: RequestContext = Depends(getRequestContext)
) -> ChatbotConversation:
"""Stops a running chatbot workflow."""
# Validate instance access
mandateId = _validateInstanceAccess(instanceId, context)
try:
# Get chatbot interface with instance context
interfaceDbChat = _getServiceChat(context, instanceId, mandateId=mandateId)
# Get workflow to verify it exists and belongs to this instance
workflow = interfaceDbChat.getWorkflow(workflowId)
if not workflow:
raise HTTPException(
status_code=404,
detail=f"Workflow {workflowId} not found"
)
# Verify workflow belongs to this instance
if workflow.featureInstanceId and workflow.featureInstanceId != instanceId:
raise HTTPException(
status_code=403,
detail=f"Workflow {workflowId} does not belong to instance {instanceId}"
)
# Update workflow status to stopped
interfaceDbChat.updateWorkflow(workflowId, {
"status": "stopped",
"lastActivity": getUtcTimestamp()
})
event_manager = getEventManager(context.user, mandateId=mandateId, featureInstanceId=instanceId)
# Store log entry (createLog emits when event_manager is provided)
interfaceDbChat.createLog({
"id": f"log_{uuid.uuid4()}",
"workflowId": workflowId,
"message": "Workflow stopped by user",
"type": "warning",
"status": "stopped",
"timestamp": getUtcTimestamp(),
"roundNumber": workflow.currentRound if workflow else 1
}, event_manager=event_manager)
# Reload workflow to return updated version
workflow = interfaceDbChat.getWorkflow(workflowId)
# Emit stopped event to active streams
await event_manager.emit_event(
context_id=workflowId,
event_type="stopped",
data={"workflowId": workflowId},
event_category="workflow",
message="Workflow stopped by user",
step="stopped"
)
logger.info(f"Stopped workflow {workflowId} and emitted stopped event")
return workflow
except HTTPException:
raise
except Exception as e:
logger.error(f"Error in stop_chatbot: {str(e)}", exc_info=True)
raise HTTPException(
status_code=500,
detail=str(e)
)
# Delete chatbot workflow endpoint
# NOTE: This catch-all route MUST be defined AFTER more specific routes like /threads
@router.delete("/{instanceId}/{workflowId}", response_model=Dict[str, Any])
@limiter.limit("120/minute")
def delete_chatbot(
request: Request,
instanceId: str = Path(..., description="Feature Instance ID"),
workflowId: str = Path(..., description="ID of the workflow to delete"),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Deletes a chatbot workflow and its associated data."""
# Validate instance access - if user has access to instance, they can delete their workflows
mandateId = _validateInstanceAccess(instanceId, context)
try:
interfaceDbChat = _getServiceChat(context, instanceId, mandateId=mandateId)
# Get workflow directly (interface already handles mandate filtering)
workflow = interfaceDbChat.getWorkflow(workflowId)
if not workflow:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Workflow with ID {workflowId} not found"
)
# Check if workflow is a chatbot workflow
if (workflow.workflowMode or getattr(workflow, "workflowMode", None)) != "Chatbot":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Workflow {workflowId} is not a chatbot workflow"
)
# User has instance access, allow delete (no complex RBAC checks needed)
logger.info(f"User {context.user.id} deleting workflow {workflowId} from instance {instanceId}")
# Delete workflow
success = interfaceDbChat.deleteWorkflow(workflowId)
if not success:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=routeApiMsg("Failed to delete workflow")
)
return {
"id": workflowId,
"message": "Chatbot workflow and associated data deleted successfully"
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error in delete_chatbot: {str(e)}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Error deleting chatbot workflow: {str(e)}"
)

File diff suppressed because it is too large Load diff

View file

@ -33,7 +33,7 @@ Feature Registration → mainCommcoach.py
| DatabaseConnector | `connectorDbPostgre.py` | New DB `poweron_commcoach` | | DatabaseConnector | `connectorDbPostgre.py` | New DB `poweron_commcoach` |
| VoiceObjects (STT/TTS) | `interfaceVoiceObjects.py` | Voice pipeline | | VoiceObjects (STT/TTS) | `interfaceVoiceObjects.py` | Voice pipeline |
| MessagingInterface | `interfaceMessaging.py` | Email summaries | | MessagingInterface | `interfaceMessaging.py` | Email summaries |
| SSE Pattern | chatbot `routeFeatureChatbot.py` | Chat streaming | | SSE Pattern | workspace `routeFeatureWorkspace.py` | Chat streaming |
| PDF Renderer | `rendererPdf.py` | Dossier export (Iteration 2) | | PDF Renderer | `rendererPdf.py` | Dossier export (Iteration 2) |
| EventManagement | `eventManagement.py` | Scheduled reminders | | EventManagement | `eventManagement.py` | Scheduled reminders |

View file

@ -1,12 +1,15 @@
""" """
LangGraph-based pipeline for extracting structured content from BZO PDFs. Pipeline for extracting structured content from BZO PDFs.
The extraction runs as a plain sequential pipeline of step functions; each step
takes the shared state dict, mutates/returns it, and the steps are chained
directly (no external workflow-orchestration framework).
""" """
import logging import logging
import re import re
from typing import TypedDict, List, Dict, Any, Optional from typing import TypedDict, List, Dict, Any, Optional
from dataclasses import dataclass from dataclasses import dataclass
from langgraph.graph import StateGraph, START, END
from modules.features.realEstate.bzoPdfExtractor import BZOPdfExtractor, TextBlock from modules.features.realEstate.bzoPdfExtractor import BZOPdfExtractor, TextBlock
from modules.features.realEstate.bzoRuleTaxonomy import RULE_TAXONOMY from modules.features.realEstate.bzoRuleTaxonomy import RULE_TAXONOMY
@ -14,7 +17,7 @@ from modules.features.realEstate.bzoRuleTaxonomy import RULE_TAXONOMY
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# ===== BZO Params Extraction State (LangGraph with LLM) ===== # ===== BZO Params Extraction State (LLM step) =====
class BZOParamsExtractionState(TypedDict): class BZOParamsExtractionState(TypedDict):
"""State for BZO params extraction via LLM.""" """State for BZO params extraction via LLM."""
@ -947,7 +950,7 @@ def extract_wohnzone_params(
} }
# ===== LangGraph: LLM-based BZO Params Extraction ===== # ===== LLM-based BZO Params Extraction =====
def _build_bauzone_context_for_llm(state: BZOParamsExtractionState) -> str: def _build_bauzone_context_for_llm(state: BZOParamsExtractionState) -> str:
"""Build context string for LLM from extracted BZO content.""" """Build context string for LLM from extracted BZO content."""
@ -1089,7 +1092,7 @@ Antwort NUR mit den relevanten Artikelnummern, eine pro Zeile (z.B. "Art. 15", "
async def llm_extract_bauzone_params_node(state: BZOParamsExtractionState) -> BZOParamsExtractionState: async def llm_extract_bauzone_params_node(state: BZOParamsExtractionState) -> BZOParamsExtractionState:
"""LangGraph node: use LLM to extract BZO parameters for Bauzone as bullet list.""" """Use LLM to extract BZO parameters for Bauzone as bullet list."""
bauzone = state.get("bauzone", "") bauzone = state.get("bauzone", "")
gemeinde = state.get("gemeinde", "") gemeinde = state.get("gemeinde", "")
ai_service = state.get("ai_service") ai_service = state.get("ai_service")
@ -1192,15 +1195,6 @@ Antwort NUR mit der Bullet-Liste, sonst nichts:"""
} }
def create_bzo_params_extraction_graph():
"""Create LangGraph for LLM-based BZO params extraction."""
workflow = StateGraph(BZOParamsExtractionState)
workflow.add_node("llm_extract", llm_extract_bauzone_params_node)
workflow.set_entry_point("llm_extract")
workflow.add_edge("llm_extract", END)
return workflow.compile()
def _filter_articles_by_bauzone(articles: List[Dict[str, Any]], bauzone: str) -> List[Dict[str, Any]]: def _filter_articles_by_bauzone(articles: List[Dict[str, Any]], bauzone: str) -> List[Dict[str, Any]]:
"""Filter articles that mention the Bauzone.""" """Filter articles that mention the Bauzone."""
bauzone_upper = (bauzone or "").upper() bauzone_upper = (bauzone or "").upper()
@ -1247,7 +1241,7 @@ async def run_bzo_params_extraction(
total_area_m2: Optional[float] = None, total_area_m2: Optional[float] = None,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Run LangGraph workflow to extract BZO parameters for a Bauzone via LLM. Extract BZO parameters for a Bauzone via LLM.
Returns fakten (item/value/source), bauzone_params_list (bullet strings), zusatzinformationen. Returns fakten (item/value/source), bauzone_params_list (bullet strings), zusatzinformationen.
""" """
rules = relevant_rules if relevant_rules is not None else _bzo_filter_rules_by_bauzone( rules = relevant_rules if relevant_rules is not None else _bzo_filter_rules_by_bauzone(
@ -1276,8 +1270,7 @@ async def run_bzo_params_extraction(
"errors": [], "errors": [],
} }
graph = create_bzo_params_extraction_graph() final_state = await llm_extract_bauzone_params_node(state)
final_state = await graph.ainvoke(state)
return { return {
"bauzone": bauzone, "bauzone": bauzone,
@ -1288,19 +1281,14 @@ async def run_bzo_params_extraction(
} }
# ===== Graph Construction ===== # ===== Pipeline Execution =====
def create_bzo_extraction_graph(): def _run_bzo_extraction_pipeline(state: BZOExtractionState) -> BZOExtractionState:
"""Create and compile the BZO extraction graph (simplified 4-node pipeline).""" """Run the BZO extraction steps sequentially on the shared state."""
workflow = StateGraph(BZOExtractionState) state = classify_and_assemble(state)
workflow.add_node("classify_and_assemble", classify_and_assemble) state = extract_zones_and_tables(state)
workflow.add_node("extract_zones_and_tables", extract_zones_and_tables) state = extract_rules(state)
workflow.add_node("extract_rules", extract_rules) return state
workflow.set_entry_point("classify_and_assemble")
workflow.add_edge("classify_and_assemble", "extract_zones_and_tables")
workflow.add_edge("extract_zones_and_tables", "extract_rules")
workflow.add_edge("extract_rules", END)
return workflow.compile()
def run_extraction(pdf_bytes: bytes, pdf_id: str = None, dokument_id: str = None) -> Dict[str, Any]: def run_extraction(pdf_bytes: bytes, pdf_id: str = None, dokument_id: str = None) -> Dict[str, Any]:
@ -1357,9 +1345,8 @@ def run_extraction(pdf_bytes: bytes, pdf_id: str = None, dokument_id: str = None
for tb in text_blocks_objects for tb in text_blocks_objects
] ]
# Create and run graph # Run the extraction pipeline
graph = create_bzo_extraction_graph() final_state = _run_bzo_extraction_pipeline(state)
final_state = graph.invoke(state)
# Sort and structure results # Sort and structure results
articles = sorted( articles = sorted(

View file

@ -278,7 +278,7 @@ from .interfaceFeatureRealEstate import getInterface as getRealEstateInterface
from modules.interfaces.interfaceDbManagement import getInterface as getComponentInterface from modules.interfaces.interfaceDbManagement import getInterface as getComponentInterface
from modules.connectors.connectorSwissTopoMapServer import SwissTopoMapServerConnector from modules.connectors.connectorSwissTopoMapServer import SwissTopoMapServerConnector
from modules.features.realEstate.bzoDocumentRetriever import BZODocumentRetriever from modules.features.realEstate.bzoDocumentRetriever import BZODocumentRetriever
from modules.features.realEstate.bzoExtractionLangGraph import run_extraction, run_bzo_params_extraction from modules.features.realEstate.bzoExtraction import run_extraction, run_bzo_params_extraction
from modules.features.realEstate.parcelSelectionService import compute_selection_summary from modules.features.realEstate.parcelSelectionService import compute_selection_summary
from modules.features.realEstate.realEstateGemeindeService import ( from modules.features.realEstate.realEstateGemeindeService import (
ensure_single_gemeinde, ensure_single_gemeinde,
@ -2331,7 +2331,7 @@ async def extract_bzo_information(
Extract BZO information from PDF documents for a specific Bauzone in a Gemeinde. Extract BZO information from PDF documents for a specific Bauzone in a Gemeinde.
Retrieves BZO documents for the specified Gemeinde, extracts content using Retrieves BZO documents for the specified Gemeinde, extracts content using
langgraph workflow, filters by Bauzone, and uses AI to find relevant information. the BZO extraction pipeline, filters by Bauzone, and uses AI to find relevant information.
When total_area_m2 or parcels are provided, runs Machbarkeitsstudie for structured output. When total_area_m2 or parcels are provided, runs Machbarkeitsstudie for structured output.
Args: Args:
@ -2489,7 +2489,7 @@ async def extract_bzo_information(
) )
continue continue
# Run extraction using langgraph workflow # Run extraction using the BZO extraction pipeline
extraction_result = run_extraction( extraction_result = run_extraction(
pdf_bytes=pdf_bytes, pdf_bytes=pdf_bytes,
pdf_id=dokument.dokumentReferenz or f"dok_{dokument.id}", pdf_id=dokument.dokumentReferenz or f"dok_{dokument.id}",
@ -2543,7 +2543,7 @@ async def extract_bzo_information(
selection_summary = compute_selection_summary(parcels) selection_summary = compute_selection_summary(parcels)
_total_area_m2 = selection_summary.get("total_area_m2") or 0.0 _total_area_m2 = selection_summary.get("total_area_m2") or 0.0
# Extract BZO parameters for Wohnzone via LangGraph + LLM (bullet list with sources) # Extract BZO parameters for Wohnzone via LLM (bullet list with sources)
bzo_params_result = None bzo_params_result = None
try: try:
services = getServices( services = getServices(

View file

@ -706,7 +706,7 @@ async def get_parcel_documents(
""" """
Ensure BZO document exists for Gemeinde, return documents for parcel info display. Ensure BZO document exists for Gemeinde, return documents for parcel info display.
Creates Gemeinde (Swiss Topo) and BZO (Tavily) if not in DB. Creates Gemeinde (Swiss Topo) and BZO (Tavily) if not in DB.
Returns documents for preview - does NOT run LangGraph. Returns documents for preview - does NOT run the BZO extraction pipeline.
""" """
mandateId = _validateInstanceAccess(instanceId, context) mandateId = _validateInstanceAccess(instanceId, context)
interface = getRealEstateInterface( interface = getRealEstateInterface(
@ -787,7 +787,7 @@ async def get_instance_bzo_information(
parcel_ids: Optional[str] = Query(None, description="Comma-separated parcel IDs; total area computed from parcels"), parcel_ids: Optional[str] = Query(None, description="Comma-separated parcel IDs; total area computed from parcels"),
context: RequestContext = Depends(getRequestContext), context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Extract BZO information for a Bauzone in a Gemeinde. Runs LangGraph workflow. With total_area_m2 or parcel_ids, includes Machbarkeitsstudie.""" """Extract BZO information for a Bauzone in a Gemeinde. Runs the BZO extraction pipeline. With total_area_m2 or parcel_ids, includes Machbarkeitsstudie."""
mandateId = _validateInstanceAccess(instanceId, context) mandateId = _validateInstanceAccess(instanceId, context)
parcels = None parcels = None
if parcel_ids: if parcel_ids:

View file

@ -203,8 +203,9 @@ def buildDataSourceContext(chatService, dataSourceIds: List[str]) -> str:
"- Use ONLY browseDataSource, searchDataSource, and downloadFromDataSource to access these sources.", "- Use ONLY browseDataSource, searchDataSource, and downloadFromDataSource to access these sources.",
"- Use the dataSourceId (UUID) exactly as shown below.", "- Use the dataSourceId (UUID) exactly as shown below.",
"- Do NOT use listFiles, externalBrowse, or externalSearch for attached data sources -- those tools are for other purposes.", "- Do NOT use listFiles, externalBrowse, or externalSearch for attached data sources -- those tools are for other purposes.",
"- browseDataSource returns BOTH files and folders at the given path.", "- SEARCH FIRST: for any targeted question (specific topic, sender, keyword, date range), use searchDataSource. It queries the source server-side and is far cheaper than listing everything. These sources can be huge (gigabytes); NEVER browse and download an entire source to find a few items.",
"- When downloading files, ALWAYS provide the human-readable fileName (with extension) from the browse results.", "- browseDataSource is for directory listing or getting the newest items of a folder. Download ONLY the items that match the user's request.",
"- When downloading files, ALWAYS provide the human-readable fileName (with extension) from the browse/search results.",
"", "",
] ]
found = False found = False

View file

@ -50,7 +50,7 @@ class FeatureInterface:
Get a feature by code. Get a feature by code.
Args: Args:
featureCode: Feature code (e.g., "trustee", "chatbot") featureCode: Feature code (e.g., "trustee", "commcoach")
Returns: Returns:
Feature object or None Feature object or None

View file

@ -196,11 +196,6 @@ TABLE_NAMESPACE = {
"ChatLog": "chat", "ChatLog": "chat",
"ChatDocument": "chat", "ChatDocument": "chat",
"Prompt": "chat", "Prompt": "chat",
# Chatbot (poweron_chatbot) - per feature-instance isolation
"ChatbotConversation": "chatbot",
"ChatbotMessage": "chatbot",
"ChatbotDocument": "chatbot",
"ChatbotLog": "chatbot",
# Files - benutzer-eigen # Files - benutzer-eigen
"FileItem": "files", "FileItem": "files",
"FileData": "files", "FileData": "files",
@ -228,7 +223,7 @@ TABLE_NAMESPACE = {
# Namespaces ohne Mandantenkontext - GROUP wird auf MY gemappt # Namespaces ohne Mandantenkontext - GROUP wird auf MY gemappt
# NOTE: "files" is NOT in this set files use scope-based visibility for GROUP # NOTE: "files" is NOT in this set files use scope-based visibility for GROUP
USER_OWNED_NAMESPACES = {"chat", "chatbot", "automation", "knowledge", "datasource"} USER_OWNED_NAMESPACES = {"chat", "automation", "knowledge", "datasource"}
def buildDataObjectKey(tableName: str, featureCode: Optional[str] = None) -> str: def buildDataObjectKey(tableName: str, featureCode: Optional[str] = None) -> str:

View file

@ -53,7 +53,7 @@ router = APIRouter(
class FeatureInstanceCreate(BaseModel): class FeatureInstanceCreate(BaseModel):
"""Request model for creating a feature instance""" """Request model for creating a feature instance"""
featureCode: str = Field(..., description="Feature code (e.g., 'trustee', 'chatbot')") featureCode: str = Field(..., description="Feature code (e.g., 'trustee', 'commcoach')")
label: str = Field(..., description="Instance label (e.g., 'Buchhaltung 2025')") label: str = Field(..., description="Instance label (e.g., 'Buchhaltung 2025')")
enabled: bool = Field(True, description="Whether this feature instance is enabled") enabled: bool = Field(True, description="Whether this feature instance is enabled")
copyTemplateRoles: bool = Field(True, description="Whether to copy template roles on creation") copyTemplateRoles: bool = Field(True, description="Whether to copy template roles on creation")
@ -778,11 +778,6 @@ def updateFeatureInstance(
detail=routeApiMsg("Failed to update feature instance") detail=routeApiMsg("Failed to update feature instance")
) )
# Clear chatbot config cache when config was updated for chatbot instances
if "config" in updateData and instance.featureCode == "chatbot":
from modules.features.chatbot.config import clear_config_cache
clear_config_cache(instanceId)
logger.info(f"User {context.user.id} updated feature instance {instanceId}: {updateData}") logger.info(f"User {context.user.id} updated feature instance {instanceId}: {updateData}")
return updated.model_dump() return updated.model_dump()
@ -1637,7 +1632,7 @@ def get_feature(
/instances, /my, /templates, etc. /instances, /my, /templates, etc.
Args: Args:
featureCode: Feature code (e.g., 'trustee', 'chatbot') featureCode: Feature code (e.g., 'trustee', 'commcoach')
""" """
try: try:
# Features come from the RBAC Catalog (code-defined, not DB-stored) # Features come from the RBAC Catalog (code-defined, not DB-stored)

View file

@ -1,217 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""STT Benchmark route — compare Speech-to-Text v1 (latest_long) vs v2 (Chirp 2).
Sysadmin-only page for evaluating STT model quality and latency.
"""
import json
import time
import logging
from typing import Any, Dict
from fastapi import APIRouter, HTTPException, Depends, Request, UploadFile, File, Form
from modules.auth import limiter, getCurrentUser
from modules.datamodels.datamodelUam import User
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/api/admin/stt-benchmark",
tags=["Admin STT Benchmark"],
responses={401: {"description": "Unauthorized"}, 403: {"description": "Forbidden"}},
)
def _requireSysAdmin(currentUser: User = Depends(getCurrentUser)) -> User:
if not getattr(currentUser, "isSysAdmin", False) and not getattr(currentUser, "isPlatformAdmin", False):
raise HTTPException(status_code=403, detail="SysAdmin required")
return currentUser
def _getCredentials():
apiKey = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
if not apiKey or apiKey.startswith("YOUR_"):
raise HTTPException(status_code=500, detail="Google Speech API key not configured")
from google.oauth2 import service_account
return service_account.Credentials.from_service_account_info(json.loads(apiKey))
def _runV1(audioBytes: bytes, language: str, model: str) -> Dict[str, Any]:
"""Run Speech-to-Text v1 recognition."""
from google.cloud import speech
credentials = _getCredentials()
client = speech.SpeechClient(credentials=credentials)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
language_code=language,
model=model,
enable_automatic_punctuation=True,
enable_word_time_offsets=True,
enable_word_confidence=True,
max_alternatives=3,
use_enhanced=True,
)
audio = speech.RecognitionAudio(content=audioBytes)
t0 = time.perf_counter()
response = client.recognize(config=config, audio=audio)
elapsed = time.perf_counter() - t0
results = []
for r in response.results:
for alt in r.alternatives:
results.append({
"transcript": alt.transcript,
"confidence": round(alt.confidence, 4),
"words": len(alt.words) if alt.words else 0,
})
return {
"api": "v1",
"model": model,
"latencyMs": round(elapsed * 1000, 1),
"results": results,
"resultCount": len(response.results),
}
def _runV2(audioBytes: bytes, language: str, model: str, location: str) -> Dict[str, Any]:
"""Run Speech-to-Text v2 recognition (Chirp 2)."""
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech
credentials = _getCredentials()
credInfo = json.loads(APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET"))
projectId = credInfo.get("project_id", "")
client = SpeechClient(
credentials=credentials,
client_options={"api_endpoint": f"{location}-speech.googleapis.com"},
)
config = cloud_speech.RecognitionConfig(
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
language_codes=[language],
model=model,
features=cloud_speech.RecognitionFeatures(
enable_automatic_punctuation=True,
enable_word_time_offsets=True,
enable_word_confidence=True,
),
)
recognizer = f"projects/{projectId}/locations/{location}/recognizers/_"
request = cloud_speech.RecognizeRequest(
recognizer=recognizer,
config=config,
content=audioBytes,
)
t0 = time.perf_counter()
response = client.recognize(request=request)
elapsed = time.perf_counter() - t0
results = []
for r in response.results:
for alt in r.alternatives:
results.append({
"transcript": alt.transcript,
"confidence": round(alt.confidence, 4),
"words": len(alt.words) if alt.words else 0,
})
return {
"api": "v2",
"model": model,
"location": location,
"latencyMs": round(elapsed * 1000, 1),
"results": results,
"resultCount": len(getattr(response, "results", [])),
}
@router.post("/run")
@limiter.limit("10/minute")
async def runBenchmark(
request: Request,
file: UploadFile = File(...),
language: str = Form(default="de-DE"),
v1Model: str = Form(default="latest_long"),
v2Model: str = Form(default="chirp_2"),
v2Location: str = Form(default="europe-west4"),
currentUser: User = Depends(_requireSysAdmin),
) -> Dict[str, Any]:
"""Upload audio and compare v1 vs v2 STT results."""
audioBytes = await file.read()
if len(audioBytes) > 10 * 1024 * 1024:
raise HTTPException(status_code=400, detail="Audio file too large (max 10 MB)")
if len(audioBytes) < 100:
raise HTTPException(status_code=400, detail="Audio file too small")
logger.info("STT benchmark: %s, %d bytes, language=%s, v1=%s, v2=%s@%s",
file.filename, len(audioBytes), language, v1Model, v2Model, v2Location)
v1Result = None
v1Error = None
try:
v1Result = _runV1(audioBytes, language, v1Model)
except Exception as e:
v1Error = str(e)
logger.warning("STT v1 benchmark failed: %s", e)
v2Result = None
v2Error = None
try:
v2Result = _runV2(audioBytes, language, v2Model, v2Location)
except Exception as e:
v2Error = str(e)
logger.warning("STT v2 benchmark failed: %s", e)
return {
"filename": file.filename,
"fileSizeBytes": len(audioBytes),
"language": language,
"v1": v1Result or {"error": v1Error},
"v2": v2Result or {"error": v2Error},
}
@router.get("/models")
@limiter.limit("30/minute")
async def getAvailableModels(
request: Request,
currentUser: User = Depends(_requireSysAdmin),
) -> Dict[str, Any]:
"""Return available STT models for the benchmark UI."""
return {
"v1Models": [
{"value": "latest_long", "label": "latest_long (default)"},
{"value": "latest_short", "label": "latest_short"},
{"value": "phone_call", "label": "phone_call"},
{"value": "video", "label": "video"},
{"value": "command_and_search", "label": "command_and_search"},
],
"v2Models": [
{"value": "chirp_2", "label": "Chirp 2 (recommended)"},
{"value": "chirp", "label": "Chirp (original)"},
{"value": "long", "label": "long"},
{"value": "short", "label": "short"},
],
"locations": [
{"value": "europe-west4", "label": "Europe West (NL)"},
{"value": "us-central1", "label": "US Central"},
{"value": "asia-southeast1", "label": "Asia Southeast"},
],
"languages": [
{"value": "de-DE", "label": "Deutsch (DE)"},
{"value": "de-CH", "label": "Deutsch (CH)"},
{"value": "en-US", "label": "English (US)"},
{"value": "en-GB", "label": "English (GB)"},
{"value": "fr-FR", "label": "Francais (FR)"},
{"value": "it-IT", "label": "Italiano (IT)"},
],
}

View file

@ -2,7 +2,7 @@
# All rights reserved. # All rights reserved.
"""DataSource auxiliary endpoints: settings (ragLimits) and cost estimate. """DataSource auxiliary endpoints: settings (ragLimits) and cost estimate.
Flag toggles (neutralize / scope / ragIndexEnabled) have moved to the Flag toggles (neutralize / ragIndexEnabled) have moved to the
generic UDB router (`POST /api/udb/node/{key}/flag/{flag}`); see generic UDB router (`POST /api/udb/node/{key}/flag/{flag}`); see
`modules/routes/routeUdb.py` and the wiki UDB reference page. `modules/routes/routeUdb.py` and the wiki UDB reference page.
""" """
@ -127,9 +127,8 @@ def _updateDataSourceSettings(
Currently supports `ragLimits` only. Unknown top-level keys in the body are Currently supports `ragLimits` only. Unknown top-level keys in the body are
rejected to avoid silently storing garbage that no consumer reads. rejected to avoid silently storing garbage that no consumer reads.
DataSource: owner-only (or sysadmin). For mandate/feature scopes the DataSource: owner-only (or sysadmin). FeatureDataSource requires
mandateAdmin also passes. FeatureDataSource has no userId/scope; for a feature-admin role on the FDS's featureInstanceId.
those we require a feature-admin role on the FDS's featureInstanceId.
""" """
if not isinstance(settings, dict): if not isinstance(settings, dict):
raise HTTPException(status_code=400, detail="settings must be an object") raise HTTPException(status_code=400, detail="settings must be an object")
@ -148,13 +147,7 @@ def _updateDataSourceSettings(
if model is DataSource: if model is DataSource:
ownerId = str(rec.get("userId") or "") ownerId = str(rec.get("userId") or "")
if ownerId and ownerId != currentUserId and not context.isSysAdmin: if ownerId and ownerId != currentUserId and not context.isSysAdmin:
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag raise HTTPException(status_code=403, detail="Not allowed to modify this DataSource's settings")
connectionId = rec.get("connectionId", "")
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
scope = str(getEffectiveFlag(rec, "scope", allDs, mode="walk"))
isMandateAdmin = getattr(context, "isMandateAdmin", False)
if scope == "personal" or not isMandateAdmin:
raise HTTPException(status_code=403, detail="Not allowed to modify this DataSource's settings")
else: else:
from modules.serviceCenter.services.serviceKnowledge.udbNodes import _isFeatureAdmin from modules.serviceCenter.services.serviceKnowledge.udbNodes import _isFeatureAdmin
featureInstanceId = str(rec.get("featureInstanceId") or "") featureInstanceId = str(rec.get("featureInstanceId") or "")

View file

@ -2234,13 +2234,13 @@ async def get_bzo_information(
""" """
Extract BZO information from PDF documents for a specific Bauzone in a Gemeinde. Extract BZO information from PDF documents for a specific Bauzone in a Gemeinde.
Uses a langgraph workflow to extract content from BZO PDF documents for the Uses the BZO extraction pipeline to extract content from BZO PDF documents for the
specified Gemeinde, then uses AI to search for relevant information specific specified Gemeinde, then uses AI to search for relevant information specific
to the specified Bauzone. to the specified Bauzone.
The workflow: The workflow:
1. Finds BZO documents for the Gemeinde (by name or ID) 1. Finds BZO documents for the Gemeinde (by name or ID)
2. Extracts content from PDFs using langgraph workflow 2. Extracts content from PDFs using the BZO extraction pipeline
3. Filters rules, zones, and articles by Bauzone 3. Filters rules, zones, and articles by Bauzone
4. Uses AI to generate a summary and find relevant information 4. Uses AI to generate a summary and find relevant information

View file

@ -114,9 +114,6 @@ def _getFeatureUiObjects(featureCode: str) -> List[Dict[str, Any]]:
elif featureCode == "neutralization": elif featureCode == "neutralization":
from modules.features.neutralization.mainNeutralization import UI_OBJECTS from modules.features.neutralization.mainNeutralization import UI_OBJECTS
return UI_OBJECTS return UI_OBJECTS
elif featureCode == "chatbot":
from modules.features.chatbot.mainChatbot import UI_OBJECTS
return UI_OBJECTS
elif featureCode == "commcoach": elif featureCode == "commcoach":
from modules.features.commcoach.mainCommcoach import UI_OBJECTS from modules.features.commcoach.mainCommcoach import UI_OBJECTS
return UI_OBJECTS return UI_OBJECTS

View file

@ -50,9 +50,6 @@ router = APIRouter(
) )
_VALID_SCOPES = {"personal", "featureInstance", "mandate", "global"}
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# POST /api/udb/tree/children # POST /api/udb/tree/children
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -98,7 +95,6 @@ class _UdbFlagBody(BaseModel):
the flag (see `_extractFlagValue` for the mapping). `value` is typed the flag (see `_extractFlagValue` for the mapping). `value` is typed
as Any because the legal type depends on the flag: as Any because the legal type depends on the flag:
- neutralize/ragIndexEnabled : bool | null (null = inherit) - neutralize/ragIndexEnabled : bool | null (null = inherit)
- scope : str | null (one of _VALID_SCOPES, null = inherit)
""" """
value: Any = Field(default=None, description="New flag value or null to reset to inherit.") value: Any = Field(default=None, description="New flag value or null to reset to inherit.")
@ -108,7 +104,7 @@ class _UdbFlagBody(BaseModel):
async def _udbNodeFlag( async def _udbNodeFlag(
request: Request, request: Request,
nodeKey: str = Path(..., description="Tree key of the node to modify"), nodeKey: str = Path(..., description="Tree key of the node to modify"),
flag: str = Path(..., description="One of: neutralize | scope | ragIndexEnabled"), flag: str = Path(..., description="One of: neutralize | ragIndexEnabled"),
body: _UdbFlagBody = Body(default_factory=_UdbFlagBody), body: _UdbFlagBody = Body(default_factory=_UdbFlagBody),
context: RequestContext = Depends(getRequestContext), context: RequestContext = Depends(getRequestContext),
) -> Dict[str, Any]: ) -> Dict[str, Any]:
@ -122,7 +118,7 @@ async def _udbNodeFlag(
RBAC: `node.canEdit(context, rootIf)` decides; the route never RBAC: `node.canEdit(context, rootIf)` decides; the route never
re-implements ownership rules. re-implements ownership rules.
""" """
if flag not in ("neutralize", "scope", "ragIndexEnabled"): if flag not in ("neutralize", "ragIndexEnabled"):
raise HTTPException(status_code=400, detail=f"Unknown flag: {flag}") raise HTTPException(status_code=400, detail=f"Unknown flag: {flag}")
value = _validateFlagValue(flag, body.value, context) value = _validateFlagValue(flag, body.value, context)
@ -188,15 +184,6 @@ def _validateFlagValue(flag: str, value: Any, context: RequestContext) -> Any:
""" """
if value is None: if value is None:
return None return None
if flag == "scope":
if not isinstance(value, str) or value not in _VALID_SCOPES:
raise HTTPException(
status_code=400,
detail=f"Invalid scope: {value!r}. Must be one of {sorted(_VALID_SCOPES)}",
)
if value == "global" and not context.isSysAdmin:
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
return value
# neutralize / ragIndexEnabled # neutralize / ragIndexEnabled
if isinstance(value, bool): if isinstance(value, bool):
return value return value

View file

@ -53,7 +53,7 @@ def checkUiAccess(
Args: Args:
RbacInstance: RbacClass instance RbacInstance: RbacClass instance
currentUser: Current user object currentUser: Current user object
uiPath: UI path (e.g., "playground.voice.settings", "chatbot.search") uiPath: UI path (e.g., "playground.voice.settings", "workspace.search")
Returns: Returns:
True if user has view permission for the UI element, False otherwise True if user has view permission for the UI element, False otherwise

View file

@ -3,7 +3,6 @@
"""Streaming core service for SSE event management.""" """Streaming core service for SSE event management."""
from .eventManager import EventManager, get_event_manager from .eventManager import EventManager, get_event_manager
from .helpers import ChatStreamingHelper
from .mainServiceStreaming import StreamingService from .mainServiceStreaming import StreamingService
__all__ = ["EventManager", "get_event_manager", "ChatStreamingHelper", "StreamingService"] __all__ = ["EventManager", "get_event_manager", "StreamingService"]

View file

@ -1,242 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Streaming helper utilities for chat message processing and normalization."""
from __future__ import annotations
from typing import Any, Dict, List, Literal, Mapping, Optional
from langchain_core.messages import (
AIMessage,
BaseMessage,
HumanMessage,
SystemMessage,
ToolMessage,
)
Role = Literal["user", "assistant", "system", "tool"]
class ChatStreamingHelper:
"""Pure helper methods for streaming and message normalization.
This class provides static utility methods for converting between different
message formats, extracting content, and normalizing message structures
for streaming chat applications.
"""
@staticmethod
def role_from_message(*, msg: BaseMessage) -> Role:
"""Extract the role from a BaseMessage instance.
Args:
msg: The BaseMessage instance to extract the role from.
Returns:
The role as a string literal: "user", "assistant", "system", or "tool".
Defaults to "assistant" if the message type is not recognized.
Examples:
>>> from langchain_core.messages import HumanMessage
>>> msg = HumanMessage(content="Hello")
>>> ChatStreamingHelper.role_from_message(msg=msg)
'user'
"""
if isinstance(msg, HumanMessage):
return "user"
if isinstance(msg, AIMessage):
return "assistant"
if isinstance(msg, SystemMessage):
return "system"
if isinstance(msg, ToolMessage):
return "tool"
return getattr(msg, "role", "assistant")
@staticmethod
def flatten_content(*, content: Any) -> str:
"""Convert complex content structures to plain text.
This method handles various content formats including strings, lists of
content parts, and dictionaries with text fields. It's designed to
normalize content from different message sources into a consistent
plain text format.
Args:
content: The content to flatten. Can be:
- str: Returned as-is after stripping whitespace
- list: Each item processed and joined with newlines
- dict: Text extracted from "text" or "content" fields
- None: Returns empty string
- Any other type: Converted to string
Returns:
The flattened content as a plain text string with whitespace stripped.
Examples:
>>> content = [{"type": "text", "text": "Hello"}, {"type": "text", "text": "world"}]
>>> ChatStreamingHelper.flatten_content(content=content)
'Hello\nworld'
>>> content = {"text": "Simple message"}
>>> ChatStreamingHelper.flatten_content(content=content)
'Simple message'
"""
if content is None:
return ""
if isinstance(content, str):
return content.strip()
if isinstance(content, list):
parts: List[str] = []
for part in content:
if isinstance(part, dict):
if "text" in part and isinstance(part["text"], str):
parts.append(part["text"])
elif part.get("type") == "text" and isinstance(
part.get("text"), str
):
parts.append(part["text"])
elif "content" in part and isinstance(part["content"], str):
parts.append(part["content"])
else:
# Fallback for unknown dictionary structures
val = part.get("value")
if isinstance(val, str):
parts.append(val)
else:
parts.append(str(part))
return "\n".join(p.strip() for p in parts if p is not None)
if isinstance(content, dict):
if "text" in content and isinstance(content["text"], str):
return content["text"].strip()
if "content" in content and isinstance(content["content"], str):
return content["content"].strip()
return str(content).strip()
@staticmethod
def message_to_dict(*, msg: BaseMessage) -> Dict[str, Any]:
"""Convert a BaseMessage instance to a dictionary for streaming output.
This method normalizes BaseMessage instances into a consistent dictionary
format suitable for JSON serialization and streaming to clients.
Args:
msg: The BaseMessage instance to convert.
Returns:
A dictionary containing:
- "role": The message role (user, assistant, system, tool)
- "content": The flattened message content as plain text
- "tool_calls": Tool calls if present (optional)
- "name": Message name if present (optional)
Examples:
>>> from langchain_core.messages import HumanMessage
>>> msg = HumanMessage(content="Hello there")
>>> result = ChatStreamingHelper.message_to_dict(msg=msg)
>>> result["role"]
'user'
>>> result["content"]
'Hello there'
"""
payload: Dict[str, Any] = {
"role": ChatStreamingHelper.role_from_message(msg=msg),
"content": ChatStreamingHelper.flatten_content(
content=getattr(msg, "content", "")
),
}
tool_calls = getattr(msg, "tool_calls", None)
if tool_calls:
payload["tool_calls"] = tool_calls
name = getattr(msg, "name", None)
if name:
payload["name"] = name
return payload
@staticmethod
def dict_message_to_dict(*, obj: Mapping[str, Any]) -> Dict[str, Any]:
"""Convert a dictionary-shaped message to a normalized dictionary.
This method handles messages that come from serialized state and are
represented as dictionaries rather than BaseMessage instances. It
normalizes various dictionary formats into a consistent structure.
Args:
obj: The dictionary-shaped message to convert. Expected to contain
fields like "role", "type", "content", "text", etc.
Returns:
A normalized dictionary containing:
- "role": The message role (user, assistant, system, tool)
- "content": The flattened message content as plain text
- "tool_calls": Tool calls if present (optional)
- "name": Message name if present (optional)
Examples:
>>> obj = {"type": "human", "content": "Hello"}
>>> result = ChatStreamingHelper.dict_message_to_dict(obj=obj)
>>> result["role"]
'user'
>>> result["content"]
'Hello'
"""
role: Optional[str] = obj.get("role")
if not role:
# Handle alternative type field mappings
typ = obj.get("type")
if typ in ("human", "user"):
role = "user"
elif typ in ("ai", "assistant"):
role = "assistant"
elif typ in ("system",):
role = "system"
elif typ in ("tool", "function"):
role = "tool"
content = obj.get("content")
if content is None and "text" in obj:
content = obj["text"]
out: Dict[str, Any] = {
"role": role or "assistant",
"content": ChatStreamingHelper.flatten_content(content=content),
}
if "tool_calls" in obj:
out["tool_calls"] = obj["tool_calls"]
if obj.get("name"):
out["name"] = obj["name"]
return out
@staticmethod
def extract_messages_from_output(*, output_obj: Any) -> List[Any]:
"""Extract messages from LangGraph output objects.
This method handles various output formats from LangGraph execution,
extracting the messages list from different possible structures.
Args:
output_obj: The output object from LangGraph execution. Can be:
- An object with a "messages" attribute
- A dictionary with a "messages" key
- Any other object (returns empty list)
Returns:
A list of extracted messages, or an empty list if no messages
are found or if the output object is None.
Examples:
>>> output = {"messages": [{"role": "user", "content": "Hello"}]}
>>> messages = ChatStreamingHelper.extract_messages_from_output(output_obj=output)
>>> len(messages)
1
"""
if output_obj is None:
return []
# Try to parse dicts first
if isinstance(output_obj, dict):
msgs = output_obj.get("messages")
return msgs if isinstance(msgs, list) else []
# Then try to get messages attribute
msgs = getattr(output_obj, "messages", None)
return msgs if isinstance(msgs, list) else []

View file

@ -9,7 +9,6 @@ import logging
from typing import Any, Callable from typing import Any, Callable
from modules.serviceCenter.core.serviceStreaming.eventManager import EventManager, get_event_manager from modules.serviceCenter.core.serviceStreaming.eventManager import EventManager, get_event_manager
from modules.serviceCenter.core.serviceStreaming.helpers import ChatStreamingHelper
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -25,7 +24,3 @@ class StreamingService:
def getEventManager(self) -> EventManager: def getEventManager(self) -> EventManager:
"""Get the global event manager instance for SSE streaming.""" """Get the global event manager instance for SSE streaming."""
return get_event_manager() return get_event_manager()
def getChatStreamingHelper(self):
"""Get ChatStreamingHelper utility for message normalization (no legacy import at call site)."""
return ChatStreamingHelper

View file

@ -21,6 +21,90 @@ from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _formatMailLine(entry) -> str:
"""Format a mail ExternalEntry with sender + date so the agent can decide
which messages to download without fetching every body."""
meta = entry.metadata or {}
sender = meta.get("from") or ""
received = (meta.get("receivedDateTime") or meta.get("date") or "")[:24]
parts = []
if received:
parts.append(received)
if sender:
parts.append(f"from: {sender}")
metaStr = f" [{', '.join(parts)}]" if parts else ""
return f"- ✉️ {entry.name}{metaStr} path: {entry.path}"
def _formatContactLine(entry) -> str:
"""Format a contact ExternalEntry with email/phone/company inline so the
agent does not have to download a .vcf for every contact. Handles the
differing metadata keys across MSFT, Google and Infomaniak adapters."""
meta = entry.metadata or {}
# email: MSFT 'emailAddresses' (list), Google 'emails' (list), Infomaniak 'email' (str)
emails = meta.get("emailAddresses") or meta.get("emails") or []
if isinstance(emails, str):
emails = [emails]
email = next((e for e in emails if e), None) or meta.get("email") or ""
# phone: MSFT 'businessPhones'/'mobilePhone', Google 'phones', Infomaniak 'phone'
phones = meta.get("phones") or meta.get("businessPhones") or []
if isinstance(phones, str):
phones = [phones]
phone = next((p for p in phones if p), None) or meta.get("mobilePhone") or meta.get("phone") or ""
company = meta.get("companyName") or meta.get("organization") or ""
parts = []
if email:
parts.append(email)
if phone:
parts.append(phone)
if company:
parts.append(company)
metaStr = f" [{', '.join(parts)}]" if parts else ""
return f"- 👤 {entry.name}{metaStr} path: {entry.path}"
def _formatTaskLine(entry) -> str:
"""Format a ClickUp task with status/assignee/due-date inline so the agent
can answer task questions without downloading every task JSON."""
meta = entry.metadata or {}
task = meta.get("task") or {}
parts = []
status = ((task.get("status") or {}).get("status")) if isinstance(task.get("status"), dict) else task.get("status")
if status:
parts.append(f"status: {status}")
assignees = [a.get("username") or a.get("email") for a in (task.get("assignees") or []) if a]
assignees = [a for a in assignees if a]
if assignees:
parts.append(f"assignee: {', '.join(assignees)}")
dueMs = task.get("due_date")
if dueMs:
try:
from datetime import datetime, timezone
due = datetime.fromtimestamp(int(dueMs) / 1000, tz=timezone.utc).strftime("%Y-%m-%d")
parts.append(f"due: {due}")
except (TypeError, ValueError, OverflowError):
pass
metaStr = f" [{', '.join(parts)}]" if parts else ""
return f"- ☑️ {entry.name}{metaStr} path: {entry.path}"
def _buildCountLine(entries, limit) -> str:
"""Build a summary count line, including total estimate if available."""
realCount = sum(1 for e in entries if not (e.path or "").endswith("/_count"))
line = f"\n\n({realCount} entries returned"
if limit is not None:
line += f", limit={limit}"
for e in entries:
if (e.path or "").endswith("/_count"):
meta = e.metadata or {}
total = meta.get("totalEstimate") or meta.get("totalCount")
if total:
line += f", ~{total} total in source"
break
line += ")"
return line
def _registerDataSourceTools(registry: ToolRegistry, services): def _registerDataSourceTools(registry: ToolRegistry, services):
"""Auto-extracted from registerCoreTools.""" """Auto-extracted from registerCoreTools."""
@ -81,6 +165,8 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
_MAIL_SERVICES = {"outlook", "gmail"} _MAIL_SERVICES = {"outlook", "gmail"}
_CALENDAR_SERVICES = {"calendar", "calendarFolder"} _CALENDAR_SERVICES = {"calendar", "calendarFolder"}
_CONTACT_SERVICES = {"contact", "contactFolder"}
_CLICKUP_SERVICES = {"clickup", "clickupList"}
async def _browseDataSource(args: Dict[str, Any], context: Dict[str, Any]): async def _browseDataSource(args: Dict[str, Any], context: Dict[str, Any]):
dsId = args.get("dataSourceId", "") dsId = args.get("dataSourceId", "")
@ -118,6 +204,9 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
return ToolResult(toolCallId="", toolName="browseDataSource", success=True, data="Empty directory.") return ToolResult(toolCallId="", toolName="browseDataSource", success=True, data="Empty directory.")
lines = [] lines = []
isCalendar = service in _CALENDAR_SERVICES isCalendar = service in _CALENDAR_SERVICES
isMail = service in _MAIL_SERVICES
isContact = service in _CONTACT_SERVICES
isClickup = service in _CLICKUP_SERVICES
for e in entries: for e in entries:
prefix = "[DIR]" if e.isFolder else "[FILE]" prefix = "[DIR]" if e.isFolder else "[FILE]"
sizeInfo = f" ({e.size} bytes)" if e.size else "" sizeInfo = f" ({e.size} bytes)" if e.size else ""
@ -127,18 +216,24 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
loc = e.metadata.get("location") or "" loc = e.metadata.get("location") or ""
locStr = f" 📍 {loc}" if loc else "" locStr = f" 📍 {loc}" if loc else ""
lines.append(f"- 📅 {start} {end} {e.name}{locStr}") lines.append(f"- 📅 {start} {end} {e.name}{locStr}")
elif isMail and not e.isFolder:
lines.append(_formatMailLine(e))
elif isContact and not e.isFolder:
lines.append(_formatContactLine(e))
elif isClickup and not e.isFolder and (e.metadata or {}).get("cuType") == "task":
lines.append(_formatTaskLine(e))
else: else:
lines.append(f"- {prefix} {e.name}{sizeInfo} path: {e.path}") lines.append(f"- {prefix} {e.name}{sizeInfo} path: {e.path}")
result = "\n".join(lines) result = "\n".join(lines)
countLine = f"\n\n({len(entries)} entries returned" result += _buildCountLine(entries, limit)
if limit is not None:
countLine += f", limit={limit}"
countLine += ")"
result += countLine
if service in _MAIL_SERVICES: if service in _MAIL_SERVICES:
result += "\n\nIMPORTANT: These are email subjects only. To read the full email content, use downloadFromDataSource with the path, then readFile on the returned file ID." result += "\n\nIMPORTANT: These are email subjects only. To read the full email content, use downloadFromDataSource with the path, then readFile on the returned file ID."
if isCalendar and not any(e.isFolder for e in entries): if isCalendar and not any(e.isFolder for e in entries):
result += "\n\nThese are calendar event summaries with date/time. You do NOT need to download individual events — this listing already contains subject, start, end, and location. Use the filter parameter with a date range (e.g. '2026-06') for specific periods." result += "\n\nThese are calendar event summaries with date/time. You do NOT need to download individual events — this listing already contains subject, start, end, and location. Use the filter parameter with a date range (e.g. '2026-06') for specific periods."
if isContact and not any(e.isFolder for e in entries):
result += "\n\nThese are contacts with name, email, phone and company shown inline. You do NOT need to download a vCard for each contact — only download when you need the full record."
if isClickup and any((e.metadata or {}).get("cuType") == "task" for e in entries):
result += "\n\nThese are ClickUp tasks with status, assignee and due-date shown inline. Only download a task (JSON) when you need its full description, comments or custom fields."
return ToolResult(toolCallId="", toolName="browseDataSource", success=True, data=result) return ToolResult(toolCallId="", toolName="browseDataSource", success=True, data=result)
except Exception as e: except Exception as e:
return ToolResult(toolCallId="", toolName="browseDataSource", success=False, error=str(e)) return ToolResult(toolCallId="", toolName="browseDataSource", success=False, error=str(e))
@ -173,6 +268,9 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
if not entries: if not entries:
return ToolResult(toolCallId="", toolName="searchDataSource", success=True, data="No results found.") return ToolResult(toolCallId="", toolName="searchDataSource", success=True, data="No results found.")
isCalendar = service in _CALENDAR_SERVICES isCalendar = service in _CALENDAR_SERVICES
isMail = service in _MAIL_SERVICES
isContact = service in _CONTACT_SERVICES
isClickup = service in _CLICKUP_SERVICES
lines = [] lines = []
for e in entries: for e in entries:
if isCalendar and e.metadata: if isCalendar and e.metadata:
@ -181,18 +279,24 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
loc = e.metadata.get("location") or "" loc = e.metadata.get("location") or ""
locStr = f" 📍 {loc}" if loc else "" locStr = f" 📍 {loc}" if loc else ""
lines.append(f"- 📅 {start} {end} {e.name}{locStr}") lines.append(f"- 📅 {start} {end} {e.name}{locStr}")
elif isMail and not e.isFolder:
lines.append(_formatMailLine(e))
elif isContact and not e.isFolder:
lines.append(_formatContactLine(e))
elif isClickup and not e.isFolder and (e.metadata or {}).get("cuType") == "task":
lines.append(_formatTaskLine(e))
else: else:
lines.append(f"- {e.name} (path: {e.path})") lines.append(f"- {e.name} (path: {e.path})")
result = "\n".join(lines) result = "\n".join(lines)
countLine = f"\n\n({len(entries)} entries returned" result += _buildCountLine(entries, limit)
if limit is not None:
countLine += f", limit={limit}"
countLine += ")"
result += countLine
if service in _MAIL_SERVICES: if service in _MAIL_SERVICES:
result += "\n\nIMPORTANT: These are email subjects only. To read the full email content, use downloadFromDataSource with the path, then readFile on the returned file ID." result += "\n\nIMPORTANT: These are email subjects only. To read the full email content, use downloadFromDataSource with the path, then readFile on the returned file ID."
if isCalendar: if isCalendar:
result += "\n\nThese are calendar event summaries. You do NOT need to download individual events — subject, start, end, and location are shown above. For date-specific queries, use a date range as query (e.g. '2026-06')." result += "\n\nThese are calendar event summaries. You do NOT need to download individual events — subject, start, end, and location are shown above. For date-specific queries, use a date range as query (e.g. '2026-06')."
if isContact:
result += "\n\nThese are contacts with name, email, phone and company shown inline. You do NOT need to download a vCard for each contact — only download when you need the full record."
if isClickup:
result += "\n\nThese are ClickUp tasks with status, assignee and due-date shown inline. Only download a task (JSON) when you need its full description, comments or custom fields."
return ToolResult(toolCallId="", toolName="searchDataSource", success=True, data=result) return ToolResult(toolCallId="", toolName="searchDataSource", success=True, data=result)
except Exception as e: except Exception as e:
return ToolResult(toolCallId="", toolName="searchDataSource", success=False, error=str(e)) return ToolResult(toolCallId="", toolName="searchDataSource", success=False, error=str(e))
@ -295,15 +399,20 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
registry.register( registry.register(
"browseDataSource", _browseDataSource, "browseDataSource", _browseDataSource,
description=( description=(
"Browse files, folders, or emails in a data source. Accepts either:\n" "List the contents of a folder/directory in a data source. Accepts either:\n"
"- dataSourceId (for attached data sources shown in the prompt), OR\n" "- dataSourceId (for attached data sources shown in the prompt), OR\n"
"- connectionId + service (for direct connection access via listConnections).\n" "- connectionId + service (for direct connection access via listConnections).\n"
"\n" "\n"
"DEFAULT BEHAVIOUR: omit `limit` to get the connector's full default page. " "WHEN TO USE: Use browse to enumerate a directory's contents, or to get the "
"For mail folders (Outlook/Gmail) the default returns up to 100 newest " "newest items of a mail/calendar folder. For TARGETED queries (find emails "
"messages -- DO NOT pass a smaller limit just to be safe; users almost " "from a person, files about a topic, events in a date range), prefer "
"always want the full default page or explicitly more. Only set `limit` " "searchDataSource -- it queries the source server-side and avoids listing/"
"when the user asks for a specific number (e.g. 'show me the latest 5 mails')." "downloading large amounts of irrelevant data.\n"
"\n"
"For calendar folders, pass a date range via `filter` (e.g. '2026-06' or "
"'2026-06-01 2026-06-30') so only that period is fetched. "
"Omit `limit` for the connector default; set it only when the user asks for a "
"specific count or you need MORE after hitting the default."
), ),
parameters={ parameters={
"type": "object", "type": "object",
@ -334,8 +443,21 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
registry.register( registry.register(
"searchDataSource", _searchDataSource, "searchDataSource", _searchDataSource,
description=( description=(
"Search for files within a data source. Accepts either dataSourceId OR connectionId+service. " "PRIMARY tool for finding specific data in a source. The query runs "
"Use the `limit` parameter to control how many hits are returned." "server-side, so this is far more efficient than browsing + downloading -- "
"ALWAYS prefer search when the user is looking for something specific "
"(a topic, a sender, a date range, a keyword). NEVER browse and download an "
"entire large source to find a few items; search first, then download ONLY "
"the matching results.\n"
"\n"
"Per-service query syntax:\n"
"- Outlook (KQL): 'from:alice subject:budget', 'received>=2026-05-01'.\n"
"- Gmail: 'from:alice after:2026/05/01 before:2026/06/01 budget'.\n"
"- SharePoint/OneDrive: free-text, searches file names AND content.\n"
"- Google Drive: searches file names and content.\n"
"- Calendar: pass a date range (e.g. '2026-06' or '2026-06-01 2026-06-30').\n"
"- ClickUp: task name/description keywords.\n"
"Accepts either dataSourceId OR connectionId+service."
), ),
parameters={ parameters={
"type": "object", "type": "object",
@ -344,7 +466,7 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
"connectionId": {"type": "string", "description": "UserConnection ID (alternative to dataSourceId)"}, "connectionId": {"type": "string", "description": "UserConnection ID (alternative to dataSourceId)"},
"service": {"type": "string", "description": "Service name (alternative to dataSourceId)"}, "service": {"type": "string", "description": "Service name (alternative to dataSourceId)"},
"path": {"type": "string", "description": "Scope path (used with connectionId+service)"}, "path": {"type": "string", "description": "Scope path (used with connectionId+service)"},
"query": {"type": "string", "description": "Search query"}, "query": {"type": "string", "description": "Search query (use the per-service syntax in the tool description)"},
"limit": { "limit": {
"type": "integer", "type": "integer",
"description": "Maximum number of search results (default ~100, max 1000).", "description": "Maximum number of search results (default ~100, max 1000).",

View file

@ -157,6 +157,28 @@ def _registerFeatureSubAgentTools(registry: ToolRegistry, services):
success=False, error=f"No data tables available for feature '{featureCode}'", success=False, error=f"No data tables available for feature '{featureCode}'",
) )
# A2: build the per-table type/inheritance-aware neutralization policy.
# tableActive = effective (own or inherited) table-level neutralize flag;
# explicitFields = fields whose neutralize flag is set explicitly.
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import resolveEffectiveForFds
neutralizePolicy: Dict[str, Dict[str, Any]] = {}
for tblObj in selectedTables:
tn = tblObj.get("meta", {}).get("table", "") if isinstance(tblObj, dict) else ""
if not tn:
continue
eff = resolveEffectiveForFds(featureInstanceId, tn, None, _fdsAll, mode="walk")
tableActive = eff.get("effectiveNeutralize") is True
explicitFields = set(neutralizeFieldsPerTable.get(tn, []))
if tableActive or explicitFields:
neutralizePolicy[tn] = {"tableActive": tableActive, "explicitFields": explicitFields}
neutralizationService = services.getService("neutralization") if hasattr(services, "getService") else None
if neutralizationService is not None and not getattr(neutralizationService, "interfaceDbComponent", None):
try:
neutralizationService.interfaceDbComponent = services.chat.interfaceDbComponent
except Exception:
pass
cacheKey = f"{featureInstanceId}:{hashlib.md5(question.encode()).hexdigest()}" cacheKey = f"{featureInstanceId}:{hashlib.md5(question.encode()).hexdigest()}"
if cacheKey in _featureQueryCache: if cacheKey in _featureQueryCache:
cachedAt, cachedResult = _featureQueryCache[cacheKey] cachedAt, cachedResult = _featureQueryCache[cacheKey]
@ -202,7 +224,8 @@ def _registerFeatureSubAgentTools(registry: ToolRegistry, services):
instanceLabel=instanceLabel, instanceLabel=instanceLabel,
tableFilters=tableFilters, tableFilters=tableFilters,
requestLang=requestLang, requestLang=requestLang,
neutralizeFields=neutralizeFieldsPerTable if neutralizeFieldsPerTable else None, neutralizePolicy=neutralizePolicy if neutralizePolicy else None,
neutralizationService=neutralizationService,
maxRounds=parentMaxRounds, maxRounds=parentMaxRounds,
maxCostCHF=parentMaxCostCHF, maxCostCHF=parentMaxCostCHF,
) )

View file

@ -115,6 +115,31 @@ def _registerMediaTools(registry: ToolRegistry, services):
logger.warning(f"renderDocument: knowledge service unavailable: {e}") logger.warning(f"renderDocument: knowledge service unavailable: {e}")
resolvedImages = 0 resolvedImages = 0
# Large-document path: for binary doc formats (pdf/docx) we keep block
# images as fileId references and let the renderer fetch the bytes lazily
# (one image at a time) instead of embedding every image's base64 in the
# JSON. Inline images and other formats keep the eager pre-resolution.
lazyBlockImages = outputFormat.strip().lower() in ("pdf", "docx", "doc")
def _imageBytesResolver(fileId: str):
"""Lazy resolver passed to the renderer: fileId -> raw image bytes."""
if not fileId:
return None
if knowledgeService:
try:
chunks = knowledgeService._knowledgeDb.getContentChunks(fileId)
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
if imageChunks and imageChunks[0].get("data"):
import base64 as _b64
return _b64.b64decode(imageChunks[0]["data"])
except Exception as e:
logger.warning(f"renderDocument: lazy knowledge image fetch failed for {fileId}: {e}")
try:
return services.chat.getFileData(fileId)
except Exception as e:
logger.warning(f"renderDocument: lazy file image fetch failed for {fileId}: {e}")
return None
def _resolveImageRef(targetObj, fileRefKey="_fileRef", fileIdKey="fileId"): def _resolveImageRef(targetObj, fileRefKey="_fileRef", fileIdKey="fileId"):
"""Resolve a single image reference dict to base64Data in-place.""" """Resolve a single image reference dict to base64Data in-place."""
nonlocal resolvedImages nonlocal resolvedImages
@ -153,6 +178,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
cType = section.get("content_type") cType = section.get("content_type")
# Block-level image sections # Block-level image sections
if cType == "image": if cType == "image":
# For pdf/docx, defer to lazy renderer-side resolution: keep
# the fileId reference, do not embed base64 into the JSON.
if lazyBlockImages:
continue
for element in section.get("elements", []): for element in section.get("elements", []):
contentObj = element.get("content", {}) contentObj = element.get("content", {})
_resolveImageRef(contentObj) _resolveImageRef(contentObj)
@ -195,6 +224,8 @@ def _registerMediaTools(registry: ToolRegistry, services):
title=title, title=title,
userPrompt=content, userPrompt=content,
style=args.get("style"), style=args.get("style"),
documentTheme=args.get("documentTheme"),
imageResolver=_imageBytesResolver if lazyBlockImages else None,
) )
if not documents: if not documents:
@ -262,6 +293,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
"For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with " "For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with "
"`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. " "`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. "
"Images: ![alt text](file:fileId) in the markdown. " "Images: ![alt text](file:fileId) in the markdown. "
"Layout primitives (PDF/DOCX): a fenced ```cover_page block with `title:`/`subtitle:`/`author:`/`date:`/`logo: file:ID` "
"lines renders a centered title page (e.g. legal filing / report front page); a fenced ```image_grid block with an "
"optional `columns: N` line followed by image refs (`![alt](file:ID)` or `file:ID`, one per line) renders an N-column "
"image arrangement (marketing layouts). "
"Each rendered file's result line contains `file id: <fileId>` (for embeds / readFile) AND " "Each rendered file's result line contains `file id: <fileId>` (for embeds / readFile) AND "
"`doc ref: docItem:<chatDocId>` -- pass the latter inside `documentList` of subsequent " "`doc ref: docItem:<chatDocId>` -- pass the latter inside `documentList` of subsequent "
"`ai_process` / `ai_summarizeDocument` / `context_extractContent` calls." "`ai_process` / `ai_summarizeDocument` / `context_extractContent` calls."
@ -280,6 +315,17 @@ def _registerMediaTools(registry: ToolRegistry, services):
"outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"}, "outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
"title": {"type": "string", "description": "Document title", "default": "Document"}, "title": {"type": "string", "description": "Document title", "default": "Document"},
"language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"}, "language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"},
"documentTheme": {
"type": "string",
"enum": ["general", "finance", "legal", "technical", "hr", "marketing"],
"description": (
"Named style preset applied by the renderer (colors, fonts, spacing). "
"Pick the one that matches the document purpose: 'legal' for serif/justified "
"legal filings, 'marketing' for bold image-friendly layouts, 'finance', "
"'technical', 'hr', or 'general' (default). The explicit 'style' object, if "
"provided, overrides individual preset keys."
),
},
"style": { "style": {
"type": "object", "type": "object",
"description": ( "description": (
@ -840,6 +886,88 @@ def _registerMediaTools(registry: ToolRegistry, services):
except Exception as e: except Exception as e:
return ToolResult(toolCallId="", toolName="neutralizeData", success=False, error=str(e)) return ToolResult(toolCallId="", toolName="neutralizeData", success=False, error=str(e))
async def _revealDocument(args: Dict[str, Any], context: Dict[str, Any]):
"""De-neutralize (reveal) placeholder text back to original values for a one-time download.
PRIVACY: The cleartext is delivered to the user's browser as a transient download
only (SSE side-event). It is NEVER saved as a file, indexed into the knowledge base,
or written back into chat history. The persisted tool result contains only a
confirmation -- not the revealed cleartext. Resolution uses ONLY the private local
placeholder mapping (no external LLM).
"""
import base64 as _b64
import re as _re
text = args.get("text", "")
fileId = (args.get("fileId") or "").strip()
fileName = (args.get("fileName") or "").strip()
if not isinstance(text, str):
text = str(text) if text is not None else ""
if not text and not fileId:
return ToolResult(toolCallId="", toolName="revealDocument", success=False,
error="text or fileId is required")
try:
neutralizationService = services.getService("neutralization") if hasattr(services, "getService") else None
if not neutralizationService or not hasattr(neutralizationService, "resolveText"):
return ToolResult(toolCallId="", toolName="revealDocument", success=False,
error="Neutralization service not available")
if not getattr(neutralizationService, "interfaceDbComponent", None):
neutralizationService.interfaceDbComponent = services.chat.interfaceDbComponent
if fileId and not text:
dbMgmt = services.chat.interfaceDbComponent
fileRow = dbMgmt.getFile(fileId)
if not fileRow:
return ToolResult(toolCallId="", toolName="revealDocument", success=False,
error=f"fileId not found: {fileId}")
rawBytes = dbMgmt.getFileData(fileId)
if not rawBytes:
return ToolResult(toolCallId="", toolName="revealDocument", success=False,
error="File data not accessible")
decoded = None
for encoding in ("utf-8", "utf-8-sig", "latin-1"):
try:
decoded = rawBytes.decode(encoding)
break
except (UnicodeDecodeError, ValueError):
continue
if decoded is None:
return ToolResult(toolCallId="", toolName="revealDocument", success=False,
error="File is binary or could not be decoded as text; reveal only supports text content")
text = decoded
if not fileName:
info = fileRow if isinstance(fileRow, dict) else None
fileName = (info.get("fileName") if info else None) or f"{fileId}.txt"
# Resolve placeholders locally (private mapping, no LLM). Count for the audit message.
placeholderCount = len(_re.findall(r'\[[a-z]+\.[a-f0-9-]{36}\]', text))
revealed = neutralizationService.resolveText(text)
if not fileName:
fileName = "revealed.txt"
mimeType = "text/markdown" if fileName.lower().endswith((".md", ".markdown")) else "text/plain"
contentB64 = _b64.b64encode(revealed.encode("utf-8")).decode("ascii")
return ToolResult(
toolCallId="", toolName="revealDocument", success=True,
data=(
f"Revealed {placeholderCount} placeholder(s) and prepared '{fileName}' for "
f"download in the chat. The cleartext was NOT stored, indexed, or kept in history."
),
sideEvents=[{
"type": "revealDownload",
"data": {
"content": contentB64,
"encoding": "base64",
"fileName": fileName,
"mimeType": mimeType,
"placeholderCount": placeholderCount,
},
}],
)
except Exception as e:
logger.error(f"revealDocument failed: {e}")
return ToolResult(toolCallId="", toolName="revealDocument", success=False, error=str(e))
async def _executeCode(args: Dict[str, Any], context: Dict[str, Any]): async def _executeCode(args: Dict[str, Any], context: Dict[str, Any]):
code = args.get("code", "") code = args.get("code", "")
language = args.get("language", "python") language = args.get("language", "python")
@ -899,6 +1027,28 @@ def _registerMediaTools(registry: ToolRegistry, services):
readOnly=True readOnly=True
) )
registry.register(
"revealDocument", _revealDocument,
description=(
"De-neutralize (reveal) a neutralized text/document by replacing placeholders like "
"[name.<uuid>] with their original values, using ONLY the private local mapping (no "
"external LLM). The cleartext is delivered to the user as a transient, one-time download "
"in the chat -- it is NEVER saved, indexed, or written to chat history. Use ONLY when the "
"user explicitly asks to download the real/original (de-anonymized) version of a document. "
"Provide either 'fileId' (a stored neutralized text file) or inline 'text'."
),
parameters={
"type": "object",
"properties": {
"fileId": {"type": "string", "description": "ID of a stored text file containing placeholders to reveal"},
"text": {"type": "string", "description": "Inline placeholder text to reveal (alternative to fileId)"},
"fileName": {"type": "string", "description": "Optional download file name (e.g. 'contract-original.md')"},
},
},
readOnly=True,
displayLabel="preparing de-anonymized download",
)
from modules.serviceCenter.services.serviceAgent.sandboxExecutor import SANDBOX_ALLOWED_MODULES from modules.serviceCenter.services.serviceAgent.sandboxExecutor import SANDBOX_ALLOWED_MODULES
moduleList = ", ".join(sorted(SANDBOX_ALLOWED_MODULES | {"io"})) moduleList = ", ".join(sorted(SANDBOX_ALLOWED_MODULES | {"io"}))
registry.register( registry.register(

View file

@ -33,6 +33,7 @@ class AgentEventTypeEnum(str, Enum):
FILE_EDIT_REJECTED = "fileEditRejected" FILE_EDIT_REJECTED = "fileEditRejected"
DATA_SOURCE_ACCESS = "dataSourceAccess" DATA_SOURCE_ACCESS = "dataSourceAccess"
VOICE_RESPONSE = "voiceResponse" VOICE_RESPONSE = "voiceResponse"
REVEAL_DOWNLOAD = "revealDownload"
FINAL = "final" FINAL = "final"
ERROR = "error" ERROR = "error"

View file

@ -57,6 +57,8 @@ async def runFeatureDataAgent(
tableFilters: Optional[Dict[str, Dict[str, str]]] = None, tableFilters: Optional[Dict[str, Dict[str, str]]] = None,
requestLang: Optional[str] = None, requestLang: Optional[str] = None,
neutralizeFields: Optional[Dict[str, List[str]]] = None, neutralizeFields: Optional[Dict[str, List[str]]] = None,
neutralizePolicy: Optional[Dict[str, Dict[str, Any]]] = None,
neutralizationService: Optional[Any] = None,
maxRounds: Optional[int] = None, maxRounds: Optional[int] = None,
maxCostCHF: Optional[float] = None, maxCostCHF: Optional[float] = None,
) -> str: ) -> str:
@ -74,8 +76,13 @@ async def runFeatureDataAgent(
instanceLabel: Human-readable instance name for context. instanceLabel: Human-readable instance name for context.
tableFilters: Per-table record filters from FeatureDataSource.recordFilter. tableFilters: Per-table record filters from FeatureDataSource.recordFilter.
requestLang: ISO 639-1 code for resolving multilingual table labels in the schema prompt. requestLang: ISO 639-1 code for resolving multilingual table labels in the schema prompt.
neutralizeFields: Per-table list of field names to mask with placeholders neutralizeFields: LEGACY per-table list of field names for whole-value masking.
before returning data to the AI. neutralizePolicy: Per-table type/inheritance-aware neutralization policy
({"tableActive": bool, "explicitFields": set}) applied via the provider's
finalizeRowsAsync (A2 rules: strings substring-neutralized when effective,
binary dropped, other scalars only when explicit).
neutralizationService: Mandate/instance-scoped NeutralizationService used for
substring neutralization of string cells.
maxRounds: Inherited from the parent agent's configured `maxRounds` maxRounds: Inherited from the parent agent's configured `maxRounds`
(workspace user setting `maxAgentRounds` -> `AgentConfig.maxRounds`). (workspace user setting `maxAgentRounds` -> `AgentConfig.maxRounds`).
Falls back to the legacy 8-round default when not provided so direct Falls back to the legacy 8-round default when not provided so direct
@ -87,7 +94,12 @@ async def runFeatureDataAgent(
Plain-text answer produced by the sub-agent. Plain-text answer produced by the sub-agent.
""" """
provider = FeatureDataProvider(dbConnector, neutralizeFields=neutralizeFields) provider = FeatureDataProvider(
dbConnector,
neutralizeFields=neutralizeFields,
neutralizePolicy=neutralizePolicy,
neutralizationService=neutralizationService,
)
validator = _buildValidatorForFeature(featureCode) validator = _buildValidatorForFeature(featureCode)
registry = _buildSubAgentTools(provider, featureInstanceId, mandateId, tableFilters or {}, validator=validator) registry = _buildSubAgentTools(provider, featureInstanceId, mandateId, tableFilters or {}, validator=validator)
@ -207,6 +219,8 @@ def _buildSubAgentTools(
offset=offset, offset=offset,
extraFilters=_recordFilterToList(tableName), extraFilters=_recordFilterToList(tableName),
) )
if hasattr(provider, "finalizeRowsAsync") and "rows" in result:
result["rows"] = await provider.finalizeRowsAsync(tableName, result["rows"])
return ToolResult( return ToolResult(
toolCallId="", toolName="browseTable", toolCallId="", toolName="browseTable",
success="error" not in result, success="error" not in result,
@ -237,6 +251,8 @@ def _buildSubAgentTools(
offset=offset, offset=offset,
extraFilters=_recordFilterToList(tableName), extraFilters=_recordFilterToList(tableName),
) )
if hasattr(provider, "finalizeRowsAsync") and "rows" in result:
result["rows"] = await provider.finalizeRowsAsync(tableName, result["rows"])
return ToolResult( return ToolResult(
toolCallId="", toolName="queryTable", toolCallId="", toolName="queryTable",
success="error" not in result, success="error" not in result,
@ -271,6 +287,8 @@ def _buildSubAgentTools(
groupBy=groupBy, groupBy=groupBy,
extraFilters=combinedFilters or None, extraFilters=combinedFilters or None,
) )
if hasattr(provider, "finalizeRowsAsync") and "rows" in result:
result["rows"] = await provider.finalizeRowsAsync(tableName, result["rows"])
return ToolResult( return ToolResult(
toolCallId="", toolName="aggregateTable", toolCallId="", toolName="aggregateTable",
success="error" not in result, success="error" not in result,

View file

@ -8,6 +8,7 @@ feature table. All queries are automatically filtered by featureInstanceId
and mandateId so data isolation is guaranteed. and mandateId so data isolation is guaranteed.
""" """
import asyncio
import hashlib import hashlib
import logging import logging
import json import json
@ -62,18 +63,36 @@ _ALLOWED_AGGREGATES = {"SUM", "COUNT", "AVG", "MIN", "MAX"}
class FeatureDataProvider: class FeatureDataProvider:
"""Reads feature-instance data from the DB using DATA_OBJECTS metadata.""" """Reads feature-instance data from the DB using DATA_OBJECTS metadata."""
def __init__(self, dbConnector, neutralizeFields: Optional[Dict[str, List[str]]] = None): def __init__(
self,
dbConnector,
neutralizeFields: Optional[Dict[str, List[str]]] = None,
neutralizePolicy: Optional[Dict[str, Dict[str, Any]]] = None,
neutralizationService: Optional[Any] = None,
):
""" """
Args: Args:
dbConnector: A connectorDbPostgre.DatabaseConnector with an open connection. dbConnector: A connectorDbPostgre.DatabaseConnector with an open connection.
neutralizeFields: Per-table field names whose values must be replaced neutralizeFields: LEGACY per-table field names whose values are replaced
with placeholders before returning to the AI, e.g. with a whole-value placeholder ``[NEUT.<field>.<hash>]``. Kept for
``{"TrusteePosition": ["firstName", "lastName", "address"]}``. backward compatibility; superseded by ``neutralizePolicy``.
neutralizePolicy: Per-table type/inheritance-aware policy, e.g.
``{"TrusteePosition": {"tableActive": True, "explicitFields": {"iban"}}}``.
* ``tableActive`` -- effective (own/inherited) table-level neutralize flag.
* ``explicitFields`` -- fields whose neutralize flag is set EXPLICITLY.
Applied via :meth:`finalizeRowsAsync` following the A2 rules:
strings substring-neutralized when effective (explicit or inherited),
binary dropped, other scalars only when explicit.
neutralizationService: The mandate/instance-scoped NeutralizationService
used for substring neutralization of string cells (reuses the standard
neutralization engine; no external LLM is introduced here).
""" """
self._db = dbConnector self._db = dbConnector
self._neutralizeFields: Dict[str, Set[str]] = { self._neutralizeFields: Dict[str, Set[str]] = {
tbl: set(fields) for tbl, fields in (neutralizeFields or {}).items() tbl: set(fields) for tbl, fields in (neutralizeFields or {}).items()
} }
self._neutralizePolicy: Dict[str, Dict[str, Any]] = neutralizePolicy or {}
self._neutralizer = neutralizationService
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# public API (called by FeatureDataAgent tools) # public API (called by FeatureDataAgent tools)
@ -108,12 +127,27 @@ class FeatureDataProvider:
logger.warning(f"getActualColumns({tableName}) failed: {e}") logger.warning(f"getActualColumns({tableName}) failed: {e}")
return [] return []
def _applyFieldNeutralization(self, tableName: str, rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]: async def finalizeRowsAsync(self, tableName: str, rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Neutralize sensitive field values in query results before they reach the AI.""" """Make raw DB rows AI-safe: apply the field-neutralization policy and JSON-serialize.
fieldsToNeut = self._neutralizeFields.get(tableName)
if not fieldsToNeut: The query methods (``browseTable``/``queryTable``/``aggregateTable``) return RAW
return rows rows so this step can see the real Python types (bytes vs str vs scalar).
return [_neutralizeRowFields(row, fieldsToNeut) for row in rows]
* When a rich ``neutralizePolicy`` exists for the table, the A2 type/inheritance
rules apply (see :func:`_neutralizeAndSerializeRows`).
* Otherwise the legacy whole-value behavior is preserved (``neutralizeFields``).
* With no neutralization at all, rows are just JSON-serialized.
Always returns JSON-serializable rows.
"""
policy = self._neutralizePolicy.get(tableName)
if policy:
return await _neutralizeAndSerializeRows(rows, policy, self._neutralizer)
serialized = [_serializeRow(dict(r)) for r in rows]
legacyFields = self._neutralizeFields.get(tableName)
if legacyFields:
serialized = [_neutralizeRowFields(row, legacyFields) for row in serialized]
return serialized
def browseTable( def browseTable(
self, self,
@ -162,9 +196,10 @@ class FeatureDataProvider:
f'ORDER BY "id" LIMIT %s OFFSET %s' f'ORDER BY "id" LIMIT %s OFFSET %s'
) )
cur.execute(dataSql, allParams + [limit, offset]) cur.execute(dataSql, allParams + [limit, offset])
rows = [_serializeRow(dict(r)) for r in cur.fetchall()] # Return RAW rows; neutralization + JSON-serialization happen in
# finalizeRowsAsync (needs the real Python types to apply A2 rules).
rows = [dict(r) for r in cur.fetchall()]
rows = self._applyFieldNeutralization(tableName, rows)
result = {"rows": rows, "total": total, "limit": limit, "offset": offset} result = {"rows": rows, "total": total, "limit": limit, "offset": offset}
_debugQueryLog("browseTable", tableName, { _debugQueryLog("browseTable", tableName, {
"fields": fields, "limit": limit, "offset": offset, "fields": fields, "limit": limit, "offset": offset,
@ -226,9 +261,8 @@ class FeatureDataProvider:
f'FROM "{tableName}" WHERE {fullWhere}' f'FROM "{tableName}" WHERE {fullWhere}'
) )
cur.execute(sql, allParams) cur.execute(sql, allParams)
rows = [_serializeRow(dict(r)) for r in cur.fetchall()] rows = [dict(r) for r in cur.fetchall()]
rows = self._applyFieldNeutralization(tableName, rows)
result = { result = {
"rows": rows, "rows": rows,
"aggregate": aggregate, "aggregate": aggregate,
@ -300,9 +334,8 @@ class FeatureDataProvider:
f'WHERE {fullWhere} {orderClause} LIMIT %s OFFSET %s' f'WHERE {fullWhere} {orderClause} LIMIT %s OFFSET %s'
) )
cur.execute(dataSql, allParams + [limit, offset]) cur.execute(dataSql, allParams + [limit, offset])
rows = [_serializeRow(dict(r)) for r in cur.fetchall()] rows = [dict(r) for r in cur.fetchall()]
rows = self._applyFieldNeutralization(tableName, rows)
result = {"rows": rows, "total": total, "limit": limit, "offset": offset} result = {"rows": rows, "total": total, "limit": limit, "offset": offset}
_debugQueryLog("queryTable", tableName, { _debugQueryLog("queryTable", tableName, {
"filters": filters, "fields": fields, "orderBy": orderBy, "filters": filters, "fields": fields, "orderBy": orderBy,
@ -437,3 +470,142 @@ def _neutralizeRowFields(row: Dict[str, Any], fieldsToNeutralize: Set[str]) -> D
shortHash = hashlib.sha256(str(val).encode()).hexdigest()[:8] shortHash = hashlib.sha256(str(val).encode()).hexdigest()[:8]
row[field] = f"[{_PLACEHOLDER_PREFIX}.{field}.{shortHash}]" row[field] = f"[{_PLACEHOLDER_PREFIX}.{field}.{shortHash}]"
return row return row
# ------------------------------------------------------------------
# A2: type / inheritance-aware field neutralization for source data
# ------------------------------------------------------------------
#
# Rules (see wiki neutralization.md Failsafe 5/6):
# 1. STRING (incl. JSON/markdown/code -- anything textual): substring-neutralize
# via the private NeutralizationService whenever neutralize is EFFECTIVE for the
# field (explicit OR inherited). The placeholders stay embedded in the text so the
# record remains usable; the field name is passed as a type hint.
# 2. BINARY (bytes): never neutralized -- the column is DROPPED when neutralization
# applies to the table/field.
# 3. OTHER SCALARS (number/float/int/date/bool): neutralized (whole-value placeholder)
# ONLY when the field flag is set EXPLICITLY -- never via inheritance.
_NEUT_CONCURRENCY = 4
def _isStructuralField(key: str) -> bool:
"""Identifiers / system columns are references, not PII content -- never neutralized.
Excludes primary/foreign keys and audit columns so neutralization never corrupts
record references and never wastes an LLM call on a UUID/enum value.
"""
if key.startswith("_") or key.startswith("sys"):
return True
if key == "id" or key.endswith("Id") or key.endswith("_id"):
return True
return key in ("mandateId", "featureInstanceId", "instanceId", "createdBy", "updatedBy")
def _isTextValue(value: Any) -> bool:
"""True for values that should be treated as neutralizable text (str/JSON-ish)."""
return isinstance(value, str) or isinstance(value, (dict, list))
async def _neutralizeOneText(fieldName: str, text: str, neutralizer: Any) -> Optional[str]:
"""Substring-neutralize a single text value, using the field name as a type hint.
The hint (``"<field>: "``) is prepended so the private LLM can infer the entity
type for short PII columns, then stripped from the result. On any prefix mismatch
or error the cell is fail-safe redacted (never returns the raw value).
"""
prefix = f"{fieldName}: "
try:
result = await neutralizer.processTextAsync(prefix + text)
except Exception as e: # noqa: BLE001 - neutralization must fail closed
logger.warning("field neutralization failed for '%s': %s", fieldName, e)
return "[REDACTED]"
out = result.get("neutralized_text") if isinstance(result, dict) else None
if not isinstance(out, str):
return "[REDACTED]"
if out.startswith(prefix):
return out[len(prefix):]
# Engine altered the hint prefix (rare) -- fail closed rather than leak.
logger.warning("field neutralization prefix mismatch for '%s'; redacting", fieldName)
return "[REDACTED]"
async def _neutralizeAndSerializeRows(
rows: List[Dict[str, Any]],
policy: Dict[str, Any],
neutralizer: Any,
) -> List[Dict[str, Any]]:
"""Apply the A2 field-neutralization rules to raw rows and JSON-serialize them."""
tableActive = bool(policy.get("tableActive"))
explicitFields: Set[str] = set(policy.get("explicitFields") or [])
outRows: List[Dict[str, Any]] = []
# (fieldName, originalText) -> neutralizedText (dedup across the whole result set)
pending: Dict[tuple, Optional[str]] = {}
cellRefs: List[tuple] = [] # (rowIdx, key, fieldName, originalText)
for row in rows:
out: Dict[str, Any] = {}
for key, value in row.items():
fieldExplicit = key in explicitFields
fieldEffective = fieldExplicit or tableActive
if value is None:
out[key] = None
continue
# Identifiers / system columns: serialize but never neutralize.
if _isStructuralField(key):
out[key] = value.isoformat() if hasattr(value, "isoformat") else (
f"<binary {len(value)} bytes>" if isinstance(value, (bytes, bytearray)) else value
)
continue
if isinstance(value, (bytes, bytearray)):
# Rule 2: binary is dropped when neutralization applies; else legacy marker.
if tableActive or fieldExplicit:
continue
out[key] = f"<binary {len(value)} bytes>"
continue
if _isTextValue(value):
textVal = value if isinstance(value, str) else json.dumps(value, ensure_ascii=False, default=str)
if fieldEffective and textVal != "":
pending.setdefault((key, textVal), None)
cellRefs.append((len(outRows), key, key, textVal))
out[key] = textVal
continue
# Rule 3: other scalars (number/float/int/date/bool) -- explicit only.
serialized = value.isoformat() if hasattr(value, "isoformat") else value
if fieldExplicit:
shortHash = hashlib.sha256(str(value).encode()).hexdigest()[:8]
out[key] = f"[{_PLACEHOLDER_PREFIX}.{key}.{shortHash}]"
else:
out[key] = serialized
outRows.append(out)
if not cellRefs:
return outRows
if neutralizer is None or not hasattr(neutralizer, "processTextAsync"):
# Fail-safe: neutralization required but no engine -> redact the affected cells.
for rowIdx, key, _fieldName, _origText in cellRefs:
outRows[rowIdx][key] = "[REDACTED]"
return outRows
sem = asyncio.Semaphore(_NEUT_CONCURRENCY)
async def _resolvePair(fieldName: str, origText: str) -> None:
async with sem:
pending[(fieldName, origText)] = await _neutralizeOneText(fieldName, origText, neutralizer)
await asyncio.gather(*[
_resolvePair(fieldName, origText) for (fieldName, origText) in pending.keys()
])
for rowIdx, key, fieldName, origText in cellRefs:
neutralized = pending.get((fieldName, origText))
if neutralized is not None:
outRows[rowIdx][key] = neutralized
return outRows

View file

@ -231,6 +231,22 @@ def _registerDefaultToolboxes() -> None:
"trustee_refreshAccountingData", "trustee_refreshAccountingData",
], ],
), ),
ToolboxDefinition(
id="neutralization",
label="Neutralization / Reveal",
description=(
"Privacy-sensitive de-neutralization. NOT active by default - must be "
"explicitly requested. Contains revealDocument, which resolves "
"neutralization placeholders ([type.uuid]) back to cleartext using ONLY "
"the local mapping (no external LLM) and returns the result as a "
"transient one-time download. Cleartext is never saved, indexed, or kept "
"in the chat history."
),
isDefault=False,
tools=[
"revealDocument",
],
),
] ]
for tb in defaults: for tb in defaults:
_toolboxRegistry.registerToolbox(tb) _toolboxRegistry.registerToolbox(tb)

View file

@ -160,8 +160,11 @@ class AiService:
3. billingCallback on aiObjects: records one billing transaction per model call 3. billingCallback on aiObjects: records one billing transaction per model call
with exact provider + model name (set before AI call, invoked by _callWithModel) with exact provider + model name (set before AI call, invoked by _callWithModel)
NEUTRALIZATION: If enabled, prompt text is neutralized before the AI call NEUTRALIZATION: If enabled, prompt text is neutralized before the AI call.
and placeholders in the response are rehydrated afterwards. The response is persisted exactly as returned by the model (placeholders are
NOT rehydrated/re-saved with cleartext -- that would defeat neutralization).
De-neutralization for download is an explicit, transient action via the
agent's `revealDocument` tool (no save/index).
""" """
await self.ensureAiObjectsInitialized() await self.ensureAiObjectsInitialized()
@ -241,7 +244,9 @@ class AiService:
"""Streaming variant of callAi. Yields str deltas during generation, then final AiCallResponse. """Streaming variant of callAi. Yields str deltas during generation, then final AiCallResponse.
NEUTRALIZATION: If enabled, prompt text is neutralized before streaming. NEUTRALIZATION: If enabled, prompt text is neutralized before streaming.
Rehydration happens on the final AiCallResponse (not on individual str deltas). The streamed/persisted response keeps placeholders as returned by the model
(no cleartext re-hydration into storage). Use the agent's `revealDocument`
tool for an explicit, transient de-neutralization for download.
""" """
await self.ensureAiObjectsInitialized() await self.ensureAiObjectsInitialized()
@ -623,7 +628,7 @@ detectedIntent-Werte:
return basePrompt return basePrompt
# ========================================================================= # =========================================================================
# NEUTRALIZATION: Centralized prompt neutralization / response rehydration # NEUTRALIZATION: Centralized prompt neutralization (no response rehydration)
# ========================================================================= # =========================================================================
async def _hasNeutralizationModel(self) -> bool: async def _hasNeutralizationModel(self) -> bool:
@ -920,20 +925,6 @@ detectedIntent-Werte:
logger.info(f"_neutralizeRequest complete: neutralized={_wasNeutralized}, excluded={len(excludedDocs)}") logger.info(f"_neutralizeRequest complete: neutralized={_wasNeutralized}, excluded={len(excludedDocs)}")
return request, _wasNeutralized, excludedDocs return request, _wasNeutralized, excludedDocs
def _rehydrateResponse(self, responseText: str) -> str:
"""Replace neutralization placeholders with original values in AI response."""
if not responseText:
return responseText
try:
neutralSvc = self._get_service("neutralization")
if not neutralSvc or not hasattr(neutralSvc, 'resolveText'):
return responseText
resolved = neutralSvc.resolveText(responseText)
return resolved if resolved else responseText
except Exception as e:
logger.warning(f"Response rehydration failed: {e}")
return responseText
def _preflightBillingCheck(self) -> None: def _preflightBillingCheck(self) -> None:
""" """
Pre-flight billing validation - like a 0 CHF credit card authorization check. Pre-flight billing validation - like a 0 CHF credit card authorization check.
@ -1689,7 +1680,8 @@ Respond with ONLY a JSON object in this exact format:
language: str, language: str,
title: str, title: str,
userPrompt: str, userPrompt: str,
parentOperationId: str parentOperationId: str,
documentTheme: Optional[str] = None
) -> List[RenderedDocument]: ) -> List[RenderedDocument]:
""" """
Phase 5E: Rendert gefüllte Struktur zum Ziel-Format. Phase 5E: Rendert gefüllte Struktur zum Ziel-Format.
@ -1741,7 +1733,8 @@ Respond with ONLY a JSON object in this exact format:
title, title,
userPrompt, userPrompt,
self, self,
parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie parentOperationId=renderOperationId, # Parent-Referenz für ChatLog-Hierarchie
documentTheme=documentTheme
) )
# ChatLog abschließen # ChatLog abschließen
@ -1783,7 +1776,8 @@ Respond with ONLY a JSON object in this exact format:
outputFormat: Optional[str] = None, outputFormat: Optional[str] = None,
title: Optional[str] = None, title: Optional[str] = None,
parentOperationId: Optional[str] = None, parentOperationId: Optional[str] = None,
generationIntent: Optional[str] = None # NEW: Explicit intent from action (skips detection) generationIntent: Optional[str] = None, # NEW: Explicit intent from action (skips detection)
documentTheme: Optional[str] = None # Named style preset for document rendering
) -> AiResponse: ) -> AiResponse:
""" """
Unified AI content generation with explicit intent requirement. Unified AI content generation with explicit intent requirement.
@ -1802,6 +1796,8 @@ Respond with ONLY a JSON object in this exact format:
parentOperationId: Optional parent operation ID for hierarchical logging parentOperationId: Optional parent operation ID for hierarchical logging
generationIntent: REQUIRED explicit intent ("document" | "code" | "image") from action. generationIntent: REQUIRED explicit intent ("document" | "code" | "image") from action.
NO auto-detection - actions must explicitly specify intent. NO auto-detection - actions must explicitly specify intent.
documentTheme: Optional named style preset (general/finance/legal/technical/
hr/marketing) forwarded to the renderer for document generation.
Returns: Returns:
AiResponse with content, metadata, and optional documents AiResponse with content, metadata, and optional documents
@ -1872,7 +1868,8 @@ Respond with ONLY a JSON object in this exact format:
contentParts=contentParts, contentParts=contentParts,
outputFormat=outputFormat, outputFormat=outputFormat,
title=title, title=title,
parentOperationId=parentOperationId parentOperationId=parentOperationId,
documentTheme=documentTheme
) )
# DATA_EXTRACT: Extract content from documents and process with AI (no structure generation) # DATA_EXTRACT: Extract content from documents and process with AI (no structure generation)
@ -2088,7 +2085,8 @@ Respond with ONLY a JSON object in this exact format:
contentParts: Optional[List[ContentPart]], contentParts: Optional[List[ContentPart]],
outputFormat: str, outputFormat: str,
title: str, title: str,
parentOperationId: Optional[str] parentOperationId: Optional[str],
documentTheme: Optional[str] = None
) -> AiResponse: ) -> AiResponse:
"""Handle document generation using document generation path.""" """Handle document generation using document generation path."""
from modules.serviceCenter.services.serviceGeneration.paths.documentPath import DocumentGenerationPath from modules.serviceCenter.services.serviceGeneration.paths.documentPath import DocumentGenerationPath
@ -2105,7 +2103,8 @@ Respond with ONLY a JSON object in this exact format:
contentParts=contentParts, contentParts=contentParts,
outputFormat=outputFormat, outputFormat=outputFormat,
title=title or "Generated Document", title=title or "Generated Document",
parentOperationId=parentOperationId parentOperationId=parentOperationId,
documentTheme=documentTheme
) )

View file

@ -26,7 +26,7 @@ from modules.interfaces.interfaceDbBilling import getInterface as getBillingInte
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Markup percentage for internal pricing (+50% für Infrastruktur und Platform Service + 50% für Währungsrisiko ==> Faktor 2.0) # Markup percentage on the AI base price (400% ==> Faktor 5.0: Infrastruktur, Platform Service, Währungsrisiko)
BILLING_MARKUP_PERCENT = 400 BILLING_MARKUP_PERCENT = 400
# Singleton cache # Singleton cache
@ -150,7 +150,7 @@ class BillingService:
if basePriceCHF <= 0: if basePriceCHF <= 0:
return 0.0 return 0.0
# Apply markup (50% = multiply by 1.5) # Apply markup (400% = multiply by 5.0)
markup_multiplier = 1 + (BILLING_MARKUP_PERCENT / 100) markup_multiplier = 1 + (BILLING_MARKUP_PERCENT / 100)
return round(basePriceCHF * markup_multiplier, 6) return round(basePriceCHF * markup_multiplier, 6)

View file

@ -383,7 +383,7 @@ class GenerationService:
'workflowId': 'unknown' 'workflowId': 'unknown'
} }
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None) -> List[RenderedDocument]: async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None, documentTheme: Optional[str] = None, imageResolver=None) -> List[RenderedDocument]:
""" """
Render extracted JSON content to the specified output format. Render extracted JSON content to the specified output format.
Processes EACH document separately and calls renderer for each. Processes EACH document separately and calls renderer for each.
@ -401,13 +401,20 @@ class GenerationService:
aiService: AI service instance for generation prompt creation aiService: AI service instance for generation prompt creation
parentOperationId: Optional parent operation ID for hierarchical logging parentOperationId: Optional parent operation ID for hierarchical logging
style: Optional style overrides (deep-merged with DEFAULT_STYLE) style: Optional style overrides (deep-merged with DEFAULT_STYLE)
documentTheme: Optional named theme preset (general/finance/legal/
technical/hr/marketing). Resolved as DEFAULT_STYLE <- preset <- style,
so an explicit ``style`` override always wins.
imageResolver: Optional callable ``fileId -> bytes`` for lazy, on-demand
resolution of block images that carry only a ``fileId`` (no embedded
base64). Lets large documents avoid holding every image's bytes in
the JSON simultaneously. When None, images must be pre-embedded.
Returns: Returns:
List of RenderedDocument objects. List of RenderedDocument objects.
Each RenderedDocument represents one rendered file (main document or supporting file) Each RenderedDocument represents one rendered file (main document or supporting file)
""" """
try: try:
resolvedStyle = resolveStyle(style) resolvedStyle = resolveStyle(style, documentTheme)
# Validate JSON input # Validate JSON input
if not isinstance(extractedContent, dict): if not isinstance(extractedContent, dict):
raise ValueError("extractedContent must be a JSON dictionary") raise ValueError("extractedContent must be a JSON dictionary")
@ -451,6 +458,11 @@ class GenerationService:
if not renderer: if not renderer:
logger.warning(f"Unsupported format '{docFormat}' for document {doc.get('id', docIndex)}, skipping") logger.warning(f"Unsupported format '{docFormat}' for document {doc.get('id', docIndex)}, skipping")
continue continue
# Provide the per-render lazy image resolver (fileId -> bytes) so
# renderers can fetch block images on demand instead of relying on
# all bytes being pre-embedded in the document JSON.
if imageResolver is not None:
renderer._imageResolver = imageResolver
# Check output style classification (code/document/image/etc.) from renderer # Check output style classification (code/document/image/etc.) from renderer
from .renderers.registry import getOutputStyle from .renderers.registry import getOutputStyle

View file

@ -34,7 +34,8 @@ class DocumentGenerationPath:
contentParts: Optional[List[ContentPart]] = None, contentParts: Optional[List[ContentPart]] = None,
outputFormat: str = "txt", outputFormat: str = "txt",
title: Optional[str] = None, title: Optional[str] = None,
parentOperationId: Optional[str] = None parentOperationId: Optional[str] = None,
documentTheme: Optional[str] = None
) -> AiResponse: ) -> AiResponse:
""" """
Generate document using existing chapter/section model. Generate document using existing chapter/section model.
@ -165,7 +166,8 @@ class DocumentGenerationPath:
language, # Global fallback (per-document language extracted from structure in renderReport) language, # Global fallback (per-document language extracted from structure in renderReport)
title or "Generated Document", title or "Generated Document",
userPrompt, userPrompt,
docOperationId docOperationId,
documentTheme=documentTheme
) )
# Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData # Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData

View file

@ -25,6 +25,42 @@ class BaseRenderer(ABC):
def __init__(self, services=None): def __init__(self, services=None):
self.logger = logger self.logger = logger
self.services = services # Add services attribute self.services = services # Add services attribute
# Optional per-render lazy image resolver: fileId -> raw bytes. Set by
# GenerationService.renderReport. When present, block images that only
# carry a fileId (no embedded base64Data) are fetched on demand during
# rendering, so the document JSON never holds all image bytes at once.
self._imageResolver = None
def _lazyResolveImageBase64(self, *sources) -> str:
"""Resolve a fileId/_fileRef (found in any of the given dicts) to base64.
Returns "" when no resolver is configured, no id is present, or the
fetch fails/empty. The raw bytes are held only transiently here; nothing
is written back into the source JSON, keeping peak memory ~ one image.
"""
resolver = getattr(self, "_imageResolver", None)
if not callable(resolver):
return ""
fileId = ""
for src in sources:
if isinstance(src, dict):
fileId = src.get("fileId") or src.get("_fileRef") or ""
if fileId:
break
if not fileId:
return ""
try:
rawBytes = resolver(fileId)
except Exception as e: # noqa: BLE001 - a single bad image must not abort the doc
self.logger.warning(f"lazy image resolve failed for fileId={fileId}: {e}")
return ""
if not rawBytes:
return ""
try:
return base64.b64encode(rawBytes).decode("ascii")
except Exception as e: # noqa: BLE001
self.logger.warning(f"lazy image encode failed for fileId={fileId}: {e}")
return ""
@classmethod @classmethod
def getSupportedFormats(cls) -> List[str]: def getSupportedFormats(cls) -> List[str]:

View file

@ -158,8 +158,14 @@ class RendererDocx(BaseRenderer):
# Fallback to metadata.title only if title parameter is empty # Fallback to metadata.title only if title parameter is empty
document_title = title if title else metadata.get("title", "Generated Document") document_title = title if title else metadata.get("title", "Generated Document")
# A cover_page section renders its own title page; skip the standalone
# title in that case so the title is not duplicated.
hasCoverPage = any(
(s.get("content_type") if isinstance(s, dict) else "") == "cover_page"
for s in sections
)
# Add document title using Title style # Add document title using Title style
if document_title: if document_title and not hasCoverPage:
doc.add_paragraph(document_title, style='Title') doc.add_paragraph(document_title, style='Title')
# Process each section in order # Process each section in order
@ -377,6 +383,10 @@ class RendererDocx(BaseRenderer):
self._renderJsonCodeBlock(doc, element, styles) self._renderJsonCodeBlock(doc, element, styles)
elif element_type == "image": elif element_type == "image":
self._renderJsonImage(doc, element, styles) self._renderJsonImage(doc, element, styles)
elif element_type == "cover_page" or section_type == "cover_page":
self._renderCoverPage(doc, element, styles)
elif element_type == "image_grid" or section_type == "image_grid":
self._renderImageGrid(doc, element, styles)
else: else:
# Fallback: if element_type not set, use section_type # Fallback: if element_type not set, use section_type
if section_type == "table": if section_type == "table":
@ -1030,6 +1040,89 @@ class RendererDocx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}") self.logger.warning(f"Error rendering code block: {str(e)}")
def _imageStreamFromContent(self, content: Dict[str, Any]):
"""Return a BytesIO of image bytes from base64Data or a lazily-resolved fileId.
Returns None when nothing resolvable is present. Kept small so layout
primitives (cover/grid) share the same lazy-resolution path as block images.
"""
if not isinstance(content, dict):
return None
b64 = content.get("base64Data") or content.get("logoBase64") or ""
if not b64:
b64 = self._lazyResolveImageBase64(content)
if not b64:
return None
try:
return io.BytesIO(base64.b64decode(b64))
except Exception:
return None
def _renderCoverPage(self, doc: Document, element: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a cover/title page in DOCX, ending with a page break."""
try:
from docx.enum.text import WD_ALIGN_PARAGRAPH
content = element.get("content", element) if isinstance(element, dict) else {}
if not isinstance(content, dict):
content = {}
for _ in range(6):
doc.add_paragraph("")
logoStream = self._imageStreamFromContent(content)
if logoStream is not None:
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
p.add_run().add_picture(logoStream, width=Inches(2.0))
title = (content.get("title") or "").strip()
if title:
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(title)
run.bold = True
run.font.size = Pt(28)
for key, sizePt in (("subtitle", 16), ("author", 12), ("date", 12)):
val = (content.get(key) or "").strip()
if not val:
continue
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
p.add_run(val).font.size = Pt(sizePt)
doc.add_page_break()
except Exception as e:
self.logger.warning(f"Error rendering cover_page: {e}")
def _renderImageGrid(self, doc: Document, element: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render an image grid (N columns) as a borderless DOCX table of pictures."""
try:
content = element.get("content", element) if isinstance(element, dict) else {}
if not isinstance(content, dict):
return
images = content.get("images") or []
if not isinstance(images, list) or not images:
return
try:
columns = max(1, int(content.get("columns", 2)))
except (TypeError, ValueError):
columns = 2
streams = [s for s in (self._imageStreamFromContent(i) for i in images) if s is not None]
if not streams:
return
import math
nrows = math.ceil(len(streams) / columns)
table = doc.add_table(rows=nrows, cols=columns)
cellWidthInches = max(1.0, 6.5 / columns - 0.1)
for idx, stream in enumerate(streams):
r, c = divmod(idx, columns)
cell = table.cell(r, c)
para = cell.paragraphs[0]
try:
para.add_run().add_picture(stream, width=Inches(cellWidthInches))
except Exception as ie:
self.logger.warning(f"image_grid cell failed: {ie}")
doc.add_paragraph("")
except Exception as e:
self.logger.warning(f"Error rendering image_grid: {e}")
def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None: def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON image to DOCX.""" """Render a JSON image to DOCX."""
try: try:
@ -1056,6 +1149,11 @@ class RendererDocx(BaseRenderer):
if not caption: if not caption:
caption = image_data.get("caption", "") caption = image_data.get("caption", "")
# Last resort: lazily resolve a fileId reference to bytes on demand
# (large-document path - keeps image bytes out of the document JSON).
if not base64_data:
base64_data = self._lazyResolveImageBase64(content if isinstance(content, dict) else None, image_data)
# CRITICAL: Ensure we don't render base64 data as text # CRITICAL: Ensure we don't render base64 data as text
# If base64_data looks like it might be rendered elsewhere, skip it # If base64_data looks like it might be rendered elsewhere, skip it
if not base64_data: if not base64_data:

View file

@ -165,6 +165,10 @@ class RendererPdf(BaseRenderer):
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, unifiedStyle: Dict[str, Any] = None) -> str: async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, unifiedStyle: Dict[str, Any] = None) -> str:
"""Generate PDF content from structured JSON document using AI-generated styling.""" """Generate PDF content from structured JSON document using AI-generated styling."""
# Large-document streaming: image flowables are backed by temp files (reportlab
# reads them from disk at build time) so image bytes are not all resident in
# memory simultaneously. Collected here, deleted after the build.
self._tempImageFiles = []
try: try:
# Get style set from unified style or legacy approach # Get style set from unified style or legacy approach
if unifiedStyle: if unifiedStyle:
@ -210,7 +214,13 @@ class RendererPdf(BaseRenderer):
document_title = (title or "").strip() document_title = (title or "").strip()
if not document_title and isinstance(metadata, dict): if not document_title and isinstance(metadata, dict):
document_title = (metadata.get("title") or "").strip() document_title = (metadata.get("title") or "").strip()
if document_title: # A cover_page section already renders its own title page; rendering the
# standalone document title on top of it would duplicate the title.
hasCoverPage = any(
(s.get("content_type") if isinstance(s, dict) else "") == "cover_page"
for s in sections
)
if document_title and not hasCoverPage:
story.append(self._paragraphFromInlineMarkdown(document_title, self._createDocumentTitleStyle(styles))) story.append(self._paragraphFromInlineMarkdown(document_title, self._createDocumentTitleStyle(styles)))
# Process each section (sections already extracted above) # Process each section (sections already extracted above)
@ -233,6 +243,18 @@ class RendererPdf(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.error(f"Error generating PDF from JSON: {str(e)}") self.logger.error(f"Error generating PDF from JSON: {str(e)}")
raise Exception(f"PDF generation failed: {str(e)}") raise Exception(f"PDF generation failed: {str(e)}")
finally:
self._cleanupTempImageFiles()
def _cleanupTempImageFiles(self) -> None:
"""Delete temp image files created for streamed (file-backed) PDF images."""
import os
for path in getattr(self, "_tempImageFiles", []) or []:
try:
os.unlink(path)
except OSError:
pass
self._tempImageFiles = []
def _buildPdfWithOverflowGuard(self, doc, story: List[Any], buffer) -> None: def _buildPdfWithOverflowGuard(self, doc, story: List[Any], buffer) -> None:
"""Try doc.build(); on 'too large on page' LayoutError, drop the offending """Try doc.build(); on 'too large on page' LayoutError, drop the offending
@ -790,6 +812,10 @@ class RendererPdf(BaseRenderer):
all_elements.extend(self._renderJsonCodeBlock(element, styles)) all_elements.extend(self._renderJsonCodeBlock(element, styles))
elif element_type == "image": elif element_type == "image":
all_elements.extend(self._renderJsonImage(element, styles)) all_elements.extend(self._renderJsonImage(element, styles))
elif element_type == "cover_page" or section_type == "cover_page":
all_elements.extend(self._renderCoverPage(element, styles))
elif element_type == "image_grid" or section_type == "image_grid":
all_elements.extend(self._renderImageGrid(element, styles))
else: else:
# Fallback: if element_type not set, use section_type as fallback # Fallback: if element_type not set, use section_type as fallback
if section_type == "table": if section_type == "table":
@ -1057,6 +1083,83 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering code block: {str(e)}") self.logger.warning(f"Error rendering code block: {str(e)}")
return [] return []
def _renderCoverPage(self, element: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a cover/title page: centered title, subtitle, author, date, optional logo.
Content keys: title, subtitle, author, date, logo (fileId/_fileRef). Ends with a
page break so the body starts on the next page (legal filing / report front page).
"""
from reportlab.platypus import Spacer, PageBreak
content = element.get("content", element) if isinstance(element, dict) else {}
if not isinstance(content, dict):
content = {}
out: List[Any] = [Spacer(1, 160)]
# Optional logo at the top of the cover.
logoB64 = self._lazyResolveImageBase64(content) or content.get("logoBase64", "")
if logoB64:
out = self._renderJsonImage({"content": {"base64Data": logoB64, "altText": "Logo"}}, styles)
out.append(Spacer(1, 60))
title = (content.get("title") or "").strip()
if title:
out.append(self._paragraphFromInlineMarkdown(title, self._createDocumentTitleStyle(styles)))
out.append(Spacer(1, 18))
for key, sizePt in (("subtitle", 16), ("author", 12), ("date", 12)):
val = (content.get(key) or "").strip()
if not val:
continue
st = ParagraphStyle(f"cover_{key}", parent=self._createNormalStyle(styles), alignment=1, fontSize=sizePt)
out.append(Paragraph(self._escapeReportlabXml(val), st))
out.append(Spacer(1, 8))
out.append(PageBreak())
return out
def _renderImageGrid(self, element: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render an image grid (N columns) -- marketing-style image arrangement.
Content: ``columns`` (int, default 2) and ``images`` (list of dicts each carrying
base64Data or a fileId/_fileRef resolved lazily). Images are laid out in a
borderless reportlab Table so they sit side by side.
"""
from reportlab.platypus import Table, Spacer
content = element.get("content", element) if isinstance(element, dict) else {}
if not isinstance(content, dict):
return []
images = content.get("images") or []
if not isinstance(images, list) or not images:
return []
try:
columns = max(1, int(content.get("columns", 2)))
except (TypeError, ValueError):
columns = 2
cellFlowables: List[Any] = []
for img in images:
if not isinstance(img, dict):
continue
rendered = self._renderJsonImage({"content": img}, styles)
# Keep only the image flowable(s); skip caption paragraphs for a clean grid.
from reportlab.platypus import Image as ReportLabImage
imgsOnly = [f for f in rendered if isinstance(f, ReportLabImage)]
cellFlowables.append(imgsOnly[0] if imgsOnly else (rendered[0] if rendered else Spacer(1, 1)))
if not cellFlowables:
return []
# Pad to a full final row, then chunk into rows of `columns`.
while len(cellFlowables) % columns != 0:
cellFlowables.append(Spacer(1, 1))
rows = [cellFlowables[i:i + columns] for i in range(0, len(cellFlowables), columns)]
colW = _PDF_CONTENT_WIDTH_PT / columns
# Scale each image to fit its column cell.
from reportlab.platypus import Image as ReportLabImage
for row in rows:
for cell in row:
if isinstance(cell, ReportLabImage) and getattr(cell, "drawWidth", 0) > colW - 8:
scale = (colW - 8) / cell.drawWidth
cell.drawWidth *= scale
cell.drawHeight *= scale
table = Table(rows, colWidths=[colW] * columns)
return [table, Spacer(1, 10)]
def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON image to PDF elements using reportlab.""" """Render a JSON image to PDF elements using reportlab."""
try: try:
@ -1094,6 +1197,11 @@ class RendererPdf(BaseRenderer):
if match: if match:
base64_data = match.group(1) base64_data = match.group(1)
# Last resort: lazily resolve a fileId reference to bytes on demand
# (large-document path - keeps image bytes out of the document JSON).
if not base64_data:
base64_data = self._lazyResolveImageBase64(content if isinstance(content, dict) else None, image_data)
if not base64_data: if not base64_data:
self.logger.warning(f"No base64 data found for image. Alt text: {alt_text}") self.logger.warning(f"No base64 data found for image. Alt text: {alt_text}")
return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))] return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
@ -1183,8 +1291,20 @@ class RendererPdf(BaseRenderer):
imgHeight = 3 * inch # ~216 points, safe for ~751pt available height imgHeight = 3 * inch # ~216 points, safe for ~751pt available height
imageStream.seek(0) imageStream.seek(0)
# Create reportlab Image # Create reportlab Image from a TEMP FILE rather than the in-memory
reportlabImage = ReportLabImage(imageStream, width=imgWidth, height=imgHeight) # stream: reportlab reads file-backed images lazily at build time, so
# the bytes of all images are not held in memory at once (large-doc path).
import tempfile
imageStream.seek(0)
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".img")
try:
tmp.write(imageStream.read())
finally:
tmp.close()
if not hasattr(self, "_tempImageFiles") or self._tempImageFiles is None:
self._tempImageFiles = []
self._tempImageFiles.append(tmp.name)
reportlabImage = ReportLabImage(tmp.name, width=imgWidth, height=imgHeight)
elements = [reportlabImage] elements = [reportlabImage]

View file

@ -57,6 +57,97 @@ DEFAULT_STYLE: Dict[str, Any] = {
} }
# ------------------------------------------------------------------
# Theme presets (A3): named, purpose-specific style overrides that are
# deep-merged onto DEFAULT_STYLE. A preset only declares the keys it changes;
# everything else inherits the default. Explicit per-call `style` overrides
# always win over the preset.
# ------------------------------------------------------------------
THEME_PRESETS: Dict[str, Dict[str, Any]] = {
# "general" intentionally empty -> identical to DEFAULT_STYLE.
"general": {},
"finance": {
"fonts": {"primary": "Calibri"},
"colors": {"primary": "#0B3D2E", "secondary": "#14532D", "accent": "#047857"},
"documentTitle": {"color": "#0B3D2E", "align": "left"},
"headings": {
"h1": {"color": "#0B3D2E"},
"h2": {"color": "#0B3D2E"},
"h3": {"color": "#14532D"},
"h4": {"color": "#14532D"},
},
"table": {"headerBg": "#0B3D2E", "rowBandingEven": "#ECFDF5"},
},
"legal": {
# Serif, sober, single-column, justified body, no logo banner.
"fonts": {"primary": "Times New Roman"},
"colors": {"primary": "#1A1A1A", "secondary": "#333333", "accent": "#5A5A5A"},
"documentTitle": {"color": "#1A1A1A", "align": "center", "sizePt": 20},
"headings": {
"h1": {"color": "#1A1A1A", "sizePt": 16},
"h2": {"color": "#1A1A1A", "sizePt": 14},
"h3": {"color": "#333333", "sizePt": 12},
"h4": {"color": "#333333", "sizePt": 11},
},
"paragraph": {"sizePt": 11, "lineSpacing": 1.5, "color": "#1A1A1A", "align": "justify"},
"table": {"headerBg": "#333333", "rowBandingEven": "#F5F5F5", "borderColor": "#999999"},
"page": {"showPageNumbers": True},
},
"technical": {
"fonts": {"primary": "Arial", "monospace": "Consolas"},
"colors": {"primary": "#0F172A", "secondary": "#1E293B", "accent": "#2563EB"},
"documentTitle": {"color": "#0F172A", "align": "left"},
"headings": {
"h1": {"color": "#0F172A"},
"h2": {"color": "#1E293B"},
"h3": {"color": "#1E293B"},
"h4": {"color": "#334155"},
},
"paragraph": {"sizePt": 10, "lineSpacing": 1.2},
"codeBlock": {"fontSizePt": 9, "background": "#0F172A"},
"table": {"headerBg": "#1E293B", "rowBandingEven": "#EEF2FF"},
},
"hr": {
"fonts": {"primary": "Calibri"},
"colors": {"primary": "#5B21B6", "secondary": "#6D28D9", "accent": "#9333EA"},
"documentTitle": {"color": "#5B21B6", "align": "center"},
"headings": {
"h1": {"color": "#5B21B6"},
"h2": {"color": "#6D28D9"},
"h3": {"color": "#7C3AED"},
"h4": {"color": "#7C3AED"},
},
"table": {"headerBg": "#5B21B6", "rowBandingEven": "#F5F3FF"},
},
"marketing": {
# Bold, image-friendly, generous spacing, larger title.
"fonts": {"primary": "Verdana"},
"colors": {"primary": "#BE123C", "secondary": "#E11D48", "accent": "#F59E0B"},
"documentTitle": {"color": "#BE123C", "sizePt": 34, "align": "center", "spaceAfterPt": 24},
"headings": {
"h1": {"color": "#BE123C", "sizePt": 24},
"h2": {"color": "#E11D48", "sizePt": 19},
"h3": {"color": "#E11D48", "sizePt": 15},
"h4": {"color": "#9F1239", "sizePt": 13},
},
"paragraph": {"sizePt": 12, "lineSpacing": 1.3},
"image": {"defaultWidthPt": 540, "maxWidthPt": 900, "alignment": "center"},
"table": {"headerBg": "#BE123C", "rowBandingEven": "#FFF1F2"},
},
}
def resolveTheme(themeName: str | None) -> Dict[str, Any]:
"""Return the partial style override for a named theme preset.
Unknown / empty names fall back to ``{}`` (i.e. plain DEFAULT_STYLE).
The lookup is case-insensitive.
"""
if not themeName:
return {}
return dict(THEME_PRESETS.get(str(themeName).strip().lower(), {}))
def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]: def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
"""Recursively merge override into base. Both dicts left unchanged; returns new dict.""" """Recursively merge override into base. Both dicts left unchanged; returns new dict."""
result = {} result = {}
@ -76,8 +167,17 @@ def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]
return result return result
def resolveStyle(agentStyle: dict | None) -> Dict[str, Any]: def resolveStyle(agentStyle: dict | None, documentTheme: str | None = None) -> Dict[str, Any]:
"""Deep-merge DEFAULT_STYLE <- agentStyle. Returns fully resolved style dict.""" """Resolve the effective style: ``DEFAULT_STYLE <- themePreset <- agentStyle``.
if not agentStyle:
return dict(DEFAULT_STYLE) Precedence (lowest to highest): platform defaults, the named ``documentTheme``
return _deepMerge(DEFAULT_STYLE, agentStyle) preset, then any explicit per-call ``agentStyle`` override. With no theme and
no override this returns plain :data:`DEFAULT_STYLE`.
"""
resolved = dict(DEFAULT_STYLE)
themeOverride = resolveTheme(documentTheme)
if themeOverride:
resolved = _deepMerge(resolved, themeOverride)
if agentStyle:
resolved = _deepMerge(resolved, agentStyle)
return resolved

View file

@ -134,6 +134,68 @@ def _parseInlineRuns(text: str) -> list:
return runs if runs else [{"type": "text", "value": text}] return runs if runs else [{"type": "text", "value": text}]
def _imageRefToDict(token: str) -> Optional[Dict[str, Any]]:
"""Parse one image reference line into an image content dict.
Accepts markdown image syntax ``![alt](file:ID "200pt")`` / ``![alt](https://...)``
or a bare ``file:ID`` / URL. Returns None for blank lines.
"""
token = (token or "").strip()
if not token:
return None
m = re.match(r"^!\[([^\]]*)\]\(([^)\"]+)(?:\s+\"(\d+)pt\")?\)\s*$", token)
if m:
alt = (m.group(1) or "").strip() or "Image"
src = (m.group(2) or "").strip()
widthStr = m.group(3)
else:
alt, src, widthStr = "Image", token, None
fileId = src[5:] if src.startswith("file:") else ""
out: Dict[str, Any] = {"altText": alt, "base64Data": "", "_fileRef": fileId, "_srcUrl": src if not fileId else ""}
if widthStr:
out["widthPt"] = int(widthStr)
return out
def _parseCoverPageBlock(codeLines: List[str]) -> Dict[str, Any]:
"""Parse a ```cover_page fenced block of ``key: value`` lines.
Recognized keys: title, subtitle, author, date, logo (``file:ID`` or URL).
"""
content: Dict[str, Any] = {}
for raw in codeLines:
if ":" not in raw:
continue
key, _, value = raw.partition(":")
key = key.strip().lower()
value = value.strip()
if not value:
continue
if key in ("title", "subtitle", "author", "date"):
content[key] = value
elif key == "logo":
content["_fileRef"] = value[5:] if value.startswith("file:") else ""
if not content["_fileRef"]:
content["_srcUrl"] = value
return content
def _parseImageGridBlock(codeLines: List[str]) -> Dict[str, Any]:
"""Parse a ```image_grid fenced block: optional ``columns: N`` plus image refs."""
columns = 2
images: List[Dict[str, Any]] = []
for raw in codeLines:
stripped = raw.strip()
m = re.match(r"^columns\s*:\s*(\d+)\s*$", stripped, re.IGNORECASE)
if m:
columns = max(1, int(m.group(1)))
continue
img = _imageRefToDict(stripped)
if img:
images.append(img)
return {"columns": columns, "images": images}
def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]: def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
""" """
Convert markdown content to the standard document JSON format with Inline-Run model. Convert markdown content to the standard document JSON format with Inline-Run model.
@ -178,6 +240,19 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
codeLines.append(lines[i]) codeLines.append(lines[i])
i += 1 i += 1
i += 1 i += 1
# Layout primitives are authored as fenced blocks with a special "language".
if lang == "cover_page":
sections.append({
"id": _nextId(), "content_type": "cover_page", "order": order,
"elements": [{"content": _parseCoverPageBlock(codeLines)}],
})
continue
if lang == "image_grid":
sections.append({
"id": _nextId(), "content_type": "image_grid", "order": order,
"elements": [{"content": _parseImageGridBlock(codeLines)}],
})
continue
sections.append({ sections.append({
"id": _nextId(), "content_type": "code_block", "order": order, "id": _nextId(), "content_type": "code_block", "order": order,
"elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}], "elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}],

View file

@ -10,7 +10,7 @@ The UDB shows three logical hierarchies as a single user-facing tree:
For every visible node the UI needs: For every visible node the UI needs:
- a stable `key` (used both for expand-state and as parent reference) - a stable `key` (used both for expand-state and as parent reference)
- a `kind`, `label`, optional `icon` - a `kind`, `label`, optional `icon`
- effective values for all three flags (neutralize, scope, ragIndexEnabled) - effective values for flags (neutralize, ragIndexEnabled)
- whether a backing DB record exists (`dataSourceId` + `modelType`) - whether a backing DB record exists (`dataSourceId` + `modelType`)
- whether the node has children to expand - whether the node has children to expand

View file

@ -1,6 +1,6 @@
# Copyright (c) 2025 Patrick Motsch # Copyright (c) 2025 Patrick Motsch
# All rights reserved. # All rights reserved.
"""Cascade-inherit semantics for DataSource flags (neutralize, ragIndexEnabled, scope). """Cascade-inherit semantics for DataSource flags (neutralize, ragIndexEnabled).
Three-state flags allow tree elements to either set an explicit value or Three-state flags allow tree elements to either set an explicit value or
inherit the value from their nearest ancestor in the path hierarchy. inherit the value from their nearest ancestor in the path hierarchy.
@ -19,7 +19,8 @@ Path-traversal rules:
- Sub-elements have paths like `/folder1/sub`. Their parent path is the - Sub-elements have paths like `/folder1/sub`. Their parent path is the
longest prefix path that exists as a DataSource record (string-based). longest prefix path that exists as a DataSource record (string-based).
- If no ancestor with an explicit value exists, the default is `False` - If no ancestor with an explicit value exists, the default is `False`
(or `'personal'` for scope) matching the legacy behavior of NULL = inherit. matching the legacy behavior of NULL = inherit.
(scope was removed from DataSource in 2026-06 for privacy reasons.)
""" """
import logging import logging
@ -27,7 +28,7 @@ from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
_INHERITABLE_FLAGS = ("neutralize", "ragIndexEnabled", "scope") _INHERITABLE_FLAGS = ("neutralize", "ragIndexEnabled")
_INHERITABLE_FDS_FLAGS = ("neutralize", "ragIndexEnabled") _INHERITABLE_FDS_FLAGS = ("neutralize", "ragIndexEnabled")
# Connection-root DataSources carry the authority as their sourceType # Connection-root DataSources carry the authority as their sourceType
@ -56,8 +57,6 @@ def _normalisePath(path: Optional[str]) -> str:
def _flagDefault(flag: str) -> Any: def _flagDefault(flag: str) -> Any:
if flag == "scope":
return "personal"
return False return False
@ -545,7 +544,8 @@ def resolveEffectiveForPath(
"""Resolve effective flags for ANY (connectionId, sourceType, path) tuple. """Resolve effective flags for ANY (connectionId, sourceType, path) tuple.
Works whether or not a DataSource record exists for this exact path. Works whether or not a DataSource record exists for this exact path.
Returns dict with effectiveNeutralize, effectiveScope, effectiveRagIndexEnabled. Returns dict with effectiveNeutralize, effectiveRagIndexEnabled.
(effectiveScope removed 2026-06 personal sources have no scope.)
""" """
normPath = _normalisePath(path) normPath = _normalisePath(path)
exactRecord = None exactRecord = None
@ -561,7 +561,6 @@ def resolveEffectiveForPath(
if exactRecord: if exactRecord:
return { return {
"effectiveNeutralize": getEffectiveFlag(exactRecord, "neutralize", allDs, mode=mode), "effectiveNeutralize": getEffectiveFlag(exactRecord, "neutralize", allDs, mode=mode),
"effectiveScope": getEffectiveFlag(exactRecord, "scope", allDs, mode=mode),
"effectiveRagIndexEnabled": getEffectiveFlag(exactRecord, "ragIndexEnabled", allDs, mode=mode), "effectiveRagIndexEnabled": getEffectiveFlag(exactRecord, "ragIndexEnabled", allDs, mode=mode),
} }
@ -571,12 +570,10 @@ def resolveEffectiveForPath(
"sourceType": sourceType, "sourceType": sourceType,
"path": normPath, "path": normPath,
"neutralize": None, "neutralize": None,
"scope": None,
"ragIndexEnabled": None, "ragIndexEnabled": None,
} }
return { return {
"effectiveNeutralize": getEffectiveFlag(virtualRec, "neutralize", allDs, mode=mode), "effectiveNeutralize": getEffectiveFlag(virtualRec, "neutralize", allDs, mode=mode),
"effectiveScope": getEffectiveFlag(virtualRec, "scope", allDs, mode=mode),
"effectiveRagIndexEnabled": getEffectiveFlag(virtualRec, "ragIndexEnabled", allDs, mode=mode), "effectiveRagIndexEnabled": getEffectiveFlag(virtualRec, "ragIndexEnabled", allDs, mode=mode),
} }

View file

@ -163,7 +163,7 @@ def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list]
over all DataSources whose effective value resolves to True, including over all DataSources whose effective value resolves to True, including
inherited ones. inherited ones.
Returned dicts carry **resolved** flags (`neutralize`, `scope`) so the Returned dicts carry **resolved** flags (`neutralize`) so the
downstream walkers can keep reading `ds.get("neutralize")` directly downstream walkers can keep reading `ds.get("neutralize")` directly
without having to know about the inheritance chain. without having to know about the inheritance chain.
@ -185,7 +185,6 @@ def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list]
continue continue
dsCopy = dict(ds) if isinstance(ds, dict) else {**ds.__dict__} dsCopy = dict(ds) if isinstance(ds, dict) else {**ds.__dict__}
dsCopy["neutralize"] = getEffectiveFlag(ds, "neutralize", allDs) dsCopy["neutralize"] = getEffectiveFlag(ds, "neutralize", allDs)
dsCopy["scope"] = getEffectiveFlag(ds, "scope", allDs)
dsCopy["ragIndexEnabled"] = True dsCopy["ragIndexEnabled"] = True
if connectionRootLimits: if connectionRootLimits:

View file

@ -152,7 +152,6 @@ async def _featureBootstrapHandler(
try: try:
dbConnector = _getFeatureDbConnector(featureCode) dbConnector = _getFeatureDbConnector(featureCode)
provider = FeatureDataProvider(dbConnector)
rootUser = getRootUser() rootUser = getRootUser()
ctx = ServiceCenterContext( ctx = ServiceCenterContext(
@ -162,6 +161,22 @@ async def _featureBootstrapHandler(
) )
knowledgeService = getService("knowledge", ctx) knowledgeService = getService("knowledge", ctx)
# A2: index the SAME neutralized text the query path returns (parity).
neutralizationService = getService("neutralization", ctx)
neutralizePolicy = None
if effectiveNeutralize or neutralizeFields:
neutralizePolicy = {
tableName: {
"tableActive": bool(effectiveNeutralize),
"explicitFields": set(neutralizeFields),
}
}
provider = FeatureDataProvider(
dbConnector,
neutralizePolicy=neutralizePolicy,
neutralizationService=neutralizationService,
)
extraFilters = [ extraFilters = [
{"field": k, "op": "=", "value": v} {"field": k, "op": "=", "value": v}
for k, v in recordFilter.items() for k, v in recordFilter.items()
@ -186,12 +201,16 @@ async def _featureBootstrapHandler(
if not rows: if not rows:
break break
# Apply the A2 field-neutralization policy + JSON-serialize (same as
# the sub-agent query path) before building the embedding text.
rows = await provider.finalizeRowsAsync(tableName, rows)
for row in rows: for row in rows:
rowId = row.get("id", "") rowId = row.get("id", "")
if not rowId: if not rowId:
continue continue
textContent = _serializeRowToText(row, neutralizeFields if effectiveNeutralize else None) textContent = _serializeRowToText(row)
if not textContent.strip(): if not textContent.strip():
fdsSkipped += 1 fdsSkipped += 1
continue continue

View file

@ -77,10 +77,11 @@ class UdbNode(ABC):
def supportsFlag(self, flag: str) -> bool: def supportsFlag(self, flag: str) -> bool:
"""Whether this node carries a value for `flag` at all. """Whether this node carries a value for `flag` at all.
Subclasses override to restrict (e.g. FDS has no scope; fdsField Subclasses override to restrict (e.g. fdsField only has neutralize).
only has neutralize). Scope was removed from DataSource nodes (privacy, 2026-06) and never
existed on FDS nodes. Only Files (folder-files) retain scope.
""" """
return flag in ("neutralize", "scope", "ragIndexEnabled") return flag in ("neutralize", "ragIndexEnabled")
@abstractmethod @abstractmethod
def canEdit(self, context: Any, rootIf: Any) -> bool: def canEdit(self, context: Any, rootIf: Any) -> bool:
@ -185,7 +186,7 @@ class SyntheticContainerNode(UdbNode):
"dataSourceId": None, "dataSourceId": None,
"modelType": None, "modelType": None,
"effectiveNeutralize": self.getEffectiveFlag("neutralize", allDs, allFds, "aggregate"), "effectiveNeutralize": self.getEffectiveFlag("neutralize", allDs, allFds, "aggregate"),
"effectiveScope": self.getEffectiveFlag("scope", allDs, allFds, "aggregate") or "personal", "effectiveScope": "personal",
"effectiveRagIndexEnabled": self.getEffectiveFlag("ragIndexEnabled", allDs, allFds, "aggregate"), "effectiveRagIndexEnabled": self.getEffectiveFlag("ragIndexEnabled", allDs, allFds, "aggregate"),
"supportsRag": False, "supportsRag": False,
"canBeAdded": False, "canBeAdded": False,
@ -248,12 +249,14 @@ class _DataSourceFamilyNode(UdbNode):
return _isConnectionOwner(rootIf, str(context.user.id), self.connectionId) return _isConnectionOwner(rootIf, str(context.user.id), self.connectionId)
def getEffectiveFlag(self, flag, allDs, allFds, mode="aggregate") -> Any: def getEffectiveFlag(self, flag, allDs, allFds, mode="aggregate") -> Any:
if not self.supportsFlag(flag):
return False
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import ( from modules.serviceCenter.services.serviceKnowledge._inheritFlags import (
resolveEffectiveForPath, resolveEffectiveForPath,
) )
out = resolveEffectiveForPath(self.connectionId, self.sourceType, self.path, allDs, mode=mode) out = resolveEffectiveForPath(self.connectionId, self.sourceType, self.path, allDs, mode=mode)
key = "effective" + flag[0].upper() + flag[1:] key = "effective" + flag[0].upper() + flag[1:]
return out.get(key, False if flag != "scope" else "personal") return out.get(key, False)
def setFlag(self, flag, value, rootIf) -> List[str]: def setFlag(self, flag, value, rootIf) -> List[str]:
from modules.datamodels.datamodelDataSource import DataSource from modules.datamodels.datamodelDataSource import DataSource
@ -371,7 +374,7 @@ def _dsDict(node: _DataSourceFamilyNode, allDs: List[Dict[str, Any]]) -> Dict[st
"dataSourceId": node.rec.get("id") if node.rec else None, "dataSourceId": node.rec.get("id") if node.rec else None,
"modelType": "DataSource" if node.rec else None, "modelType": "DataSource" if node.rec else None,
"effectiveNeutralize": node.getEffectiveFlag("neutralize", allDs, [], "aggregate"), "effectiveNeutralize": node.getEffectiveFlag("neutralize", allDs, [], "aggregate"),
"effectiveScope": node.getEffectiveFlag("scope", allDs, [], "aggregate"), "effectiveScope": "personal",
"effectiveRagIndexEnabled": node.getEffectiveFlag("ragIndexEnabled", allDs, [], "aggregate"), "effectiveRagIndexEnabled": node.getEffectiveFlag("ragIndexEnabled", allDs, [], "aggregate"),
"supportsRag": True, "supportsRag": True,
"canBeAdded": node.rec is None, "canBeAdded": node.rec is None,
@ -900,8 +903,6 @@ def _aggregateFromChildren(node: UdbNode, flag: str,
""" """
children = node.getLogicalChildren(allDs, allFds, None, None) children = node.getLogicalChildren(allDs, allFds, None, None)
if not children: if not children:
if flag == "scope":
return "personal"
return False return False
seen = set() seen = set()
last: Any = None last: Any = None
@ -917,7 +918,7 @@ def _aggregateFromChildren(node: UdbNode, flag: str,
if len(seen) > 1: if len(seen) > 1:
return "mixed" return "mixed"
if not seen: if not seen:
return "personal" if flag == "scope" else False return False
return last return last

View file

@ -575,9 +575,6 @@ def _deleteUserDataFromFeatureDatabases(userId: str, currentUser) -> Dict[str, A
elif featureCode == "realestate": elif featureCode == "realestate":
from modules.features.realestate.interfaceFeatureRealEstate import getInterface as getRealEstateInterface from modules.features.realestate.interfaceFeatureRealEstate import getInterface as getRealEstateInterface
featureInterface = getRealEstateInterface(currentUser) featureInterface = getRealEstateInterface(currentUser)
elif featureCode == "chatbot":
from modules.features.chatbot.interfaceFeatureChatbot import getInterface as getChatbotInterface
featureInterface = getChatbotInterface(currentUser)
elif featureCode == "neutralization": elif featureCode == "neutralization":
from modules.features.neutralization.interfaceFeatureNeutralizer import getInterface as getNeutralizerInterface from modules.features.neutralization.interfaceFeatureNeutralizer import getInterface as getNeutralizerInterface
featureInterface = getNeutralizerInterface(currentUser) featureInterface = getNeutralizerInterface(currentUser)

View file

@ -440,7 +440,6 @@ def _registerFeatureUiLabels():
"modules.features.workspace.mainWorkspace", "modules.features.workspace.mainWorkspace",
"modules.features.realEstate.mainRealEstate", "modules.features.realEstate.mainRealEstate",
"modules.features.neutralization.mainNeutralization", "modules.features.neutralization.mainNeutralization",
"modules.features.chatbot.mainChatbot",
) )
added = 0 added = 0
for modPath in _featureModulePaths: for modPath in _featureModulePaths:
@ -481,7 +480,6 @@ def _registerRbacLabels():
"modules.features.workspace.mainWorkspace", "modules.features.workspace.mainWorkspace",
"modules.features.realEstate.mainRealEstate", "modules.features.realEstate.mainRealEstate",
"modules.features.neutralization.mainNeutralization", "modules.features.neutralization.mainNeutralization",
"modules.features.chatbot.mainChatbot",
) )
added = 0 added = 0

View file

@ -330,16 +330,6 @@ NAVIGATION_SECTIONS = [
"adminOnly": True, "adminOnly": True,
"sysAdminOnly": True, "sysAdminOnly": True,
}, },
{
"id": "admin-stt-benchmark",
"objectKey": "ui.admin.sttBenchmark",
"label": t("STT Benchmark"),
"icon": "FaMicrophone",
"path": "/admin/stt-benchmark",
"order": 92,
"adminOnly": True,
"sysAdminOnly": True,
},
{ {
"id": "admin-languages", "id": "admin-languages",
"objectKey": "ui.admin.languages", "objectKey": "ui.admin.languages",

View file

@ -22,6 +22,7 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
return ActionResult.isFailure(error="prompt is required") return ActionResult.isFailure(error="prompt is required")
documentType = parameters.get("documentType") documentType = parameters.get("documentType")
documentTheme = parameters.get("documentTheme") or None
# Prefer explicit outputFormat (flow UI); resultType remains for legacy / API callers. # Prefer explicit outputFormat (flow UI); resultType remains for legacy / API callers.
resultType = parameters.get("outputFormat") or parameters.get("resultType") resultType = parameters.get("outputFormat") or parameters.get("resultType")
if isinstance(resultType, str): if isinstance(resultType, str):
@ -82,7 +83,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
outputFormat=resultType, # Can be None - AI determines from prompt outputFormat=resultType, # Can be None - AI determines from prompt
title=title, title=title,
parentOperationId=parentOperationId, parentOperationId=parentOperationId,
generationIntent="document" # NEW: Explicit intent, skips detection generationIntent="document", # NEW: Explicit intent, skips detection
documentTheme=documentTheme # Named style preset for the renderer
) )
# Convert AiResponse to ActionResult # Convert AiResponse to ActionResult

View file

@ -89,10 +89,10 @@ class MethodAi(MethodBase):
name="documentTheme", name="documentTheme",
type="str", type="str",
frontendType=FrontendType.SELECT, frontendType=FrontendType.SELECT,
frontendOptions=["general", "finance", "legal", "technical", "hr"], frontendOptions=["general", "finance", "legal", "technical", "hr", "marketing"],
required=False, required=False,
default="general", default="general",
description="Style hint for the document renderer (e.g. finance, legal). Used by the AI agent to choose colors and layout." description="Named style preset for the document renderer (general/finance/legal/technical/hr/marketing). The agent forwards it to the renderDocument tool's documentTheme."
), ),
"resultType": WorkflowActionParameter( "resultType": WorkflowActionParameter(
name="resultType", name="resultType",
@ -118,7 +118,7 @@ class MethodAi(MethodBase):
frontendType=FrontendType.CHECKBOX, frontendType=FrontendType.CHECKBOX,
required=False, required=False,
default=False, default=False,
description="If true, uses fast simple AI call without document generation pipeline. Use for chatbot responses and simple text generation." description="If true, uses fast simple AI call without document generation pipeline. Use for conversational responses and simple text generation."
), ),
"contentParts": WorkflowActionParameter( "contentParts": WorkflowActionParameter(
name="contentParts", name="contentParts",
@ -385,6 +385,15 @@ class MethodAi(MethodBase):
required=False, required=False,
description="Type of document (content hint for the model); used as title fallback when title is empty." description="Type of document (content hint for the model); used as title fallback when title is empty."
), ),
"documentTheme": WorkflowActionParameter(
name="documentTheme",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["general", "finance", "legal", "technical", "hr", "marketing"],
required=False,
default="general",
description="Named style preset applied by the renderer (colors, fonts, spacing): general, finance, legal, technical, hr, marketing."
),
"resultType": WorkflowActionParameter( "resultType": WorkflowActionParameter(
name="resultType", name="resultType",
type="str", type="str",

View file

@ -1,6 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
from .methodChatbot import MethodChatbot
__all__ = ['MethodChatbot']

View file

@ -1,3 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.

View file

@ -1,157 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Query Database action for Chatbot operations.
Executes SQL queries via the preprocessor connector.
"""
import logging
import json
import time
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.connectors.connectorPreprocessor import PreprocessorConnector
logger = logging.getLogger(__name__)
@action
async def queryDatabase(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Execute a SQL query via the preprocessor connector.
Parameters:
- sqlQuery (str, required): SQL SELECT query to execute. Can also be extracted from analysis_result document if provided in documentList.
"""
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"chatbot_query_db_{workflowId}_{int(time.time())}"
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Database Query",
"Executing SQL Query",
"Preprocessing API",
parentOperationId=parentOperationId
)
# Get SQL query from parameters or extract from documentList
sqlQuery = parameters.get("sqlQuery")
# If sqlQuery not provided, try to extract from documentList (analysis_result)
if not sqlQuery:
documentListParam = parameters.get("documentList")
if documentListParam:
# Get documents from previous task
from modules.datamodels.datamodelDocref import DocumentReferenceList
if isinstance(documentListParam, str):
docList = DocumentReferenceList.from_string_list([documentListParam])
elif isinstance(documentListParam, list):
docList = DocumentReferenceList.from_string_list(documentListParam)
else:
docList = documentListParam
# Get documents from workflow
documents = self.services.chat.getChatDocumentsFromDocumentList(docList)
# Try to extract SQL query from JSON document
for doc in documents:
try:
# ChatDocument objects have fileId - get file data from database
if hasattr(doc, 'fileId') and doc.fileId:
# Get file data from database
fileData = self.services.interfaceDbComponent.getFileData(doc.fileId)
if fileData:
# Decode bytes if needed
if isinstance(fileData, bytes):
docData = fileData.decode('utf-8')
else:
docData = str(fileData)
# Try to parse as JSON
analysisData = json.loads(docData)
sqlQuery = analysisData.get("sqlQuery")
if sqlQuery:
logger.info(f"Extracted SQL query from analysis_result document: {sqlQuery[:100]}...")
break
except (json.JSONDecodeError, AttributeError, KeyError, TypeError) as e:
logger.debug(f"Could not parse document as JSON: {e}")
continue
if not sqlQuery:
return ActionResult.isFailure(error="SQL query is required. Provide sqlQuery parameter or analysis_result document with sqlQuery field.")
# Update progress
self.services.chat.progressLogUpdate(operationId, 0.3, "Validating query")
# Validate: only SELECT queries allowed
sqlNormalized = sqlQuery.strip().upper()
if not sqlNormalized.startswith("SELECT"):
return ActionResult.isFailure(error="Only SELECT queries are allowed")
forbiddenKeywords = ["INSERT", "UPDATE", "DELETE", "DROP", "ALTER", "CREATE", "TRUNCATE", "EXEC", "EXECUTE"]
for kw in forbiddenKeywords:
if f" {kw} " in f" {sqlNormalized} " or sqlNormalized.startswith(f"{kw} "):
return ActionResult.isFailure(error=f"Forbidden SQL keyword detected: {kw}")
# Initialize connector
connector = PreprocessorConnector()
# Update progress
self.services.chat.progressLogUpdate(operationId, 0.5, "Executing query")
try:
result = await connector.executeQuery(sqlQuery)
except Exception:
await connector.close()
raise
# Update progress
self.services.chat.progressLogUpdate(operationId, 0.8, "Formatting results")
# Generate meaningful filename
meaningful_name = self._generateMeaningfulFileName(
base_name="database_query",
extension="txt",
action_name="queryDatabase"
)
# Create validation metadata
validationMetadata = self._createValidationMetadata(
"queryDatabase",
sqlQuery=sqlQuery[:200] if len(sqlQuery) > 200 else sqlQuery, # Truncate for metadata
resultLength=len(result)
)
# Create action document
document = ActionDocument(
documentName=meaningful_name,
documentData=result,
mimeType="text/plain",
validationMetadata=validationMetadata
)
# Complete progress tracking
self.services.chat.progressLogFinish(operationId, True)
# Close connector
await connector.close()
return ActionResult.isSuccess(documents=[document])
except Exception as e:
logger.error(f"Error executing database query: {str(e)}")
try:
self.services.chat.progressLogFinish(operationId, False)
except Exception:
pass
return ActionResult.isFailure(
error=str(e)
)

View file

@ -1,55 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import logging
from modules.workflows.methods.methodBase import MethodBase
from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter
from modules.shared.frontendTypes import FrontendType
# Import actions
from .actions.queryDatabase import queryDatabase
logger = logging.getLogger(__name__)
class MethodChatbot(MethodBase):
"""Chatbot operations methods."""
def __init__(self, services):
super().__init__(services)
self.name = "chatbot"
self.description = "Chatbot operations"
# RBAC-Integration: Action-Definitionen mit actionId
self._actions = {
"queryDatabase": WorkflowActionDefinition(
actionId="chatbot.queryDatabase",
description="Execute a SQL SELECT query via the preprocessor connector. Returns formatted query results.",
dynamicMode=False,
outputType="QueryResult",
parameters={
"sqlQuery": WorkflowActionParameter(
name="sqlQuery",
type="str",
uiHint="textarea",
frontendType=FrontendType.TEXTAREA,
required=False,
description="SQL SELECT query to execute. If not provided, will attempt to extract from analysis_result document in documentList."
),
"documentList": WorkflowActionParameter(
name="documentList",
type="DocumentList",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=False,
description="Document reference(s) containing analysis_result with sqlQuery field. Used if sqlQuery parameter is not provided."
)
},
execute=queryDatabase.__get__(self, self.__class__)
)
}
# Validate actions after definition
self._validateActions()
# Register actions as methods (optional, für direkten Zugriff)
self.queryDatabase = queryDatabase.__get__(self, self.__class__)

View file

@ -81,7 +81,7 @@ pytest>=8.0.0
pytest-asyncio>=0.21.0 pytest-asyncio>=0.21.0
## Configuration Validation ## Configuration Validation
jsonschema>=4.0.0 # Required for chatbot workflow config validation jsonschema>=4.0.0 # Required for feature/workflow config validation
## Streaming JSON parser (memory-safe import of large DB exports) ## Streaming JSON parser (memory-safe import of large DB exports)
ijson>=3.2.0 ijson>=3.2.0
@ -123,11 +123,4 @@ extract-msg>=0.55.0
pyproj>=3.6.0 # For coordinate transformations (EPSG:2056 <-> EPSG:4326) pyproj>=3.6.0 # For coordinate transformations (EPSG:2056 <-> EPSG:4326)
shapely>=2.0.0 # For geometric operations (intersections, area calculations) shapely>=2.0.0 # For geometric operations (intersections, area calculations)
geopandas>=0.14.0 # For reading and querying GeoPackage files geopandas>=0.14.0 # For reading and querying GeoPackage files
fiona>=1.9.0 # Required by geopandas for reading GeoPackage files fiona>=1.9.0 # Required by geopandas for reading GeoPackage files
## LangChain & LangGraph for chatbot workflow
langchain==1.2.8
langchain-core==1.2.8
langgraph==1.0.7
langchain-tavily==0.2.17
nest-asyncio>=1.6.0 # For running async code in sync context (LangGraph compatibility)

View file

@ -11,7 +11,6 @@ erstellt: <dateiname>_structure.json
Datenbanken: Datenbanken:
- poweron_app (User, Mandate, RBAC, Features, etc.) - poweron_app (User, Mandate, RBAC, Features, etc.)
- poweron_chat (Chat-Konversationen und Nachrichten) - poweron_chat (Chat-Konversationen und Nachrichten)
- poweron_chatbot (Chatbot-Feature: Konversationen, Nachrichten, Logs)
- poweron_management (Workflows, Prompts, Connections, etc.) - poweron_management (Workflows, Prompts, Connections, etc.)
- poweron_realestate (Real Estate Daten) - poweron_realestate (Real Estate Daten)
- poweron_trustee (Trustee Daten) - poweron_trustee (Trustee Daten)
@ -104,7 +103,6 @@ ALL_DATABASES = [
"poweron_app", "poweron_app",
"poweron_billing", "poweron_billing",
"poweron_chat", "poweron_chat",
"poweron_chatbot",
"poweron_commcoach", "poweron_commcoach",
"poweron_knowledge", "poweron_knowledge",
"poweron_management", "poweron_management",
@ -122,7 +120,6 @@ ALL_DATABASES = [
DATABASE_CONFIG = { DATABASE_CONFIG = {
"poweron_app": "DB_APP", "poweron_app": "DB_APP",
"poweron_chat": "DB_CHAT", "poweron_chat": "DB_CHAT",
"poweron_chatbot": "DB_CHATBOT",
"poweron_management": "DB_MANAGEMENT", "poweron_management": "DB_MANAGEMENT",
"poweron_realestate": "DB_REALESTATE", "poweron_realestate": "DB_REALESTATE",
"poweron_trustee": "DB_TRUSTEE", "poweron_trustee": "DB_TRUSTEE",
@ -772,7 +769,6 @@ def main():
Datenbanken: Datenbanken:
poweron_app - User, Mandate, RBAC, Features poweron_app - User, Mandate, RBAC, Features
poweron_chat - Chat-Konversationen poweron_chat - Chat-Konversationen
poweron_chatbot - Chatbot-Feature
poweron_management - Workflows, Prompts, Connections poweron_management - Workflows, Prompts, Connections
poweron_realestate - Real Estate Daten poweron_realestate - Real Estate Daten
poweron_trustee - Trustee Daten poweron_trustee - Trustee Daten
@ -781,7 +777,7 @@ Beispiele:
python script_db_export_migration.py python script_db_export_migration.py
python script_db_export_migration.py --pretty python script_db_export_migration.py --pretty
python script_db_export_migration.py -o backup.json --pretty python script_db_export_migration.py -o backup.json --pretty
python script_db_export_migration.py --db poweron_app,poweron_chat,poweron_chatbot python script_db_export_migration.py --db poweron_app,poweron_chat,poweron_management
python script_db_export_migration.py --exclude Token,AuthEvent --include-meta python script_db_export_migration.py --exclude Token,AuthEvent --include-meta
python script_db_export_migration.py --summary python script_db_export_migration.py --summary
""" """

View file

@ -1,101 +0,0 @@
#!/usr/bin/env python3
"""
Initialize poweron_chatbot database for the Chatbot feature.
Creates the poweron_chatbot database if it does not exist.
Uses DB_CHATBOT_* config (falls back to DB_*).
Tables (ChatbotConversation, ChatbotMessage, ChatbotDocument, ChatbotLog) are
auto-created by the connector on first use.
Usage:
python script_db_init_chatbot.py [--dry-run]
"""
import os
import sys
import argparse
import logging
from pathlib import Path
scriptPath = Path(__file__).resolve()
gatewayPath = scriptPath.parent.parent
sys.path.insert(0, str(gatewayPath))
os.chdir(str(gatewayPath))
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
import psycopg2
from modules.shared.configuration import APP_CONFIG
DB_NAME = "poweron_chatbot"
CONFIG_PREFIX = "DB_CHATBOT"
def _get_config():
"""Get DB config: DB_CHATBOT_* with fallback to DB_*."""
host = APP_CONFIG.get(f"{CONFIG_PREFIX}_HOST") or APP_CONFIG.get("DB_HOST", "localhost")
port = int(APP_CONFIG.get(f"{CONFIG_PREFIX}_PORT") or APP_CONFIG.get("DB_PORT", "5432"))
user = APP_CONFIG.get(f"{CONFIG_PREFIX}_USER") or APP_CONFIG.get("DB_USER")
password = (
APP_CONFIG.get(f"{CONFIG_PREFIX}_PASSWORD_SECRET")
or APP_CONFIG.get(f"{CONFIG_PREFIX}_PASSWORD")
or APP_CONFIG.get("DB_PASSWORD_SECRET")
or APP_CONFIG.get("DB_PASSWORD")
)
return {"host": host, "port": port, "user": user, "password": password}
def init_chatbot_db(dry_run: bool = False) -> bool:
"""Create poweron_chatbot database if it does not exist."""
config = _get_config()
if not config["user"] or not config["password"]:
logger.error("DB_USER and DB_PASSWORD (or DB_CHATBOT_*) required")
return False
try:
conn = psycopg2.connect(
host=config["host"],
port=config["port"],
database="postgres",
user=config["user"],
password=config["password"],
)
conn.autocommit = True
with conn.cursor() as cur:
cur.execute(
"SELECT 1 FROM pg_database WHERE datname = %s",
(DB_NAME,),
)
exists = cur.fetchone() is not None
if exists:
logger.info(f"Database {DB_NAME} already exists")
else:
if dry_run:
logger.info(f"[DRY-RUN] Would create database {DB_NAME}")
else:
cur.execute(f'CREATE DATABASE "{DB_NAME}"')
logger.info(f"Created database {DB_NAME}")
conn.close()
return True
except Exception as e:
logger.error(f"Failed to init {DB_NAME}: {e}")
return False
def main():
parser = argparse.ArgumentParser(description="Initialize poweron_chatbot database")
parser.add_argument("--dry-run", action="store_true", help="Do not create, only report")
args = parser.parse_args()
ok = init_chatbot_db(dry_run=args.dry_run)
sys.exit(0 if ok else 1)
if __name__ == "__main__":
main()

View file

@ -47,7 +47,6 @@ ALL_DATABASES = [
"poweron_billing", "poweron_billing",
"poweron_workspace", "poweron_workspace",
"poweron_graphicaleditor", "poweron_graphicaleditor",
"poweron_chatbot",
"poweron_trustee", "poweron_trustee",
"poweron_commcoach", "poweron_commcoach",
"poweron_neutralization", "poweron_neutralization",

View file

@ -30,6 +30,5 @@ pytest tests/demo/test_demo_uc1_trustee.py -v
| `test_demo_bootstrap.py` | Idempotent load/remove, mandates, user, features, RMA, neutralization | | `test_demo_bootstrap.py` | Idempotent load/remove, mandates, user, features, RMA, neutralization |
| `test_demo_uc1_trustee.py` | Trustee instances, RMA config, system workflow templates | | `test_demo_uc1_trustee.py` | Trustee instances, RMA config, system workflow templates |
| `test_demo_uc2_realestate.py` | Workspace instances for agent demo | | `test_demo_uc2_realestate.py` | Workspace instances for agent demo |
| `test_demo_uc3_chatbot.py` | Chatbot instance, knowledge-base files |
| `test_demo_uc4_i18n.py` | i18n readiness, Spanish not pre-installed | | `test_demo_uc4_i18n.py` | i18n readiness, Spanish not pre-installed |
| `test_demo_neutralization.py` | Neutralization config enabled, test PDF exists | | `test_demo_neutralization.py` | Neutralization config enabled, test PDF exists |

View file

@ -60,19 +60,6 @@ class TestDemoBootstrap:
instances = _getFeatureInstances(db, mid, featureCode) instances = _getFeatureInstances(db, mid, featureCode)
assert len(instances) >= 1, f"Feature '{featureCode}' missing in Alpina Treuhand AG" assert len(instances) >= 1, f"Feature '{featureCode}' missing in Alpina Treuhand AG"
def test_alpinaNoChatbot(self, db, mandateAlpina):
"""Alpina should NOT have a chatbot instance."""
mid = mandateAlpina.get("id")
instances = _getFeatureInstances(db, mid, "chatbot")
assert len(instances) == 0, "Alpina Treuhand should not have chatbot"
def test_happylifeNoChatbot(self, db, mandateHappylife):
"""HappyLife also should NOT have a chatbot instance — chatbot was
removed from the InvestorDemo on 2026-04-20 (see changelog)."""
mid = mandateHappylife.get("id")
instances = _getFeatureInstances(db, mid, "chatbot")
assert len(instances) == 0, "HappyLife should no longer have chatbot (removed 2026-04-20)"
class TestDemoBootstrapRma: class TestDemoBootstrapRma:

View file

@ -1,39 +0,0 @@
"""
T-UC3: Knowledge Chatbot.
The chatbot feature instance was removed from the InvestorDemo on
2026-04-20 (see changelog) neither HappyLife nor Alpina bootstrap a
chatbot today; the actual RAG demo runs via workspace. We still verify
the knowledge-base demo files are present and that the bootstrap does
NOT (re)create chatbot instances in either mandate.
"""
import pytest
from pathlib import Path
from tests.demo.conftest import _getFeatureInstances
class TestChatbotSetup:
def test_chatbotNotInHappylife(self, db, mandateHappylife):
"""HappyLife should NOT have a chatbot instance (removed 2026-04-20)."""
mid = mandateHappylife.get("id")
instances = _getFeatureInstances(db, mid, "chatbot")
assert len(instances) == 0, "HappyLife should no longer bootstrap a chatbot instance"
def test_chatbotNotInAlpina(self, db, mandateAlpina):
"""Alpina should NOT have a chatbot instance."""
mid = mandateAlpina.get("id")
instances = _getFeatureInstances(db, mid, "chatbot")
assert len(instances) == 0, "Alpina should not have chatbot"
class TestKnowledgeBaseFiles:
def test_knowledgeBaseFilesExist(self):
"""Knowledge-base documents must exist in demoData."""
kbDir = Path(__file__).resolve().parent.parent.parent / "demoData" / "knowledge-base"
assert kbDir.exists(), f"knowledge-base dir not found at {kbDir}"
files = list(kbDir.iterdir())
docs = [f for f in files if f.suffix in (".md", ".html", ".pdf", ".docx", ".txt")]
assert len(docs) >= 3, f"Expected at least 3 knowledge-base docs, found {len(docs)}: {[f.name for f in docs]}"

View file

@ -0,0 +1,154 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""A3 / AC15: lazy file-reference image resolution for large documents.
Block images may carry only a ``fileId`` (no embedded base64). The renderer
fetches the bytes on demand via an injected ``_imageResolver``, so the document
JSON never holds every image's bytes simultaneously. These tests exercise the
PDF and DOCX block-image paths directly (real reportlab / python-docx), without
the full GenerationService/services plumbing.
"""
import io
import pytest
from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle
from modules.serviceCenter.services.serviceGeneration.renderers.rendererPdf import RendererPdf
from modules.serviceCenter.services.serviceGeneration.renderers.rendererDocx import RendererDocx
def _tinyPng() -> bytes:
from PIL import Image as PILImage
buf = io.BytesIO()
PILImage.new("RGB", (8, 8), (200, 30, 30)).save(buf, format="PNG")
return buf.getvalue()
class _ResolverSpy:
"""Records the fileIds it was asked for and returns fixed PNG bytes."""
def __init__(self, data: bytes):
self._data = data
self.calls = []
def __call__(self, fileId):
self.calls.append(fileId)
return self._data
def _imageElement(fileId: str):
return {"type": "image", "content": {"fileId": fileId, "altText": "Image"}}
# ── Base helper ────────────────────────────────────────────────────
def test_base_helper_resolves_bytes_to_base64():
png = _tinyPng()
r = RendererPdf()
r._imageResolver = _ResolverSpy(png)
b64 = r._lazyResolveImageBase64({"fileId": "img-1"})
import base64 as _b64
assert _b64.b64decode(b64) == png
def test_base_helper_without_resolver_returns_empty():
r = RendererPdf()
assert r._lazyResolveImageBase64({"fileId": "img-1"}) == ""
def test_base_helper_without_fileid_returns_empty():
r = RendererPdf()
r._imageResolver = _ResolverSpy(_tinyPng())
assert r._lazyResolveImageBase64({"altText": "no id"}) == ""
# ── DOCX block images ──────────────────────────────────────────────
def test_docx_lazy_resolves_block_images():
from docx import Document
spy = _ResolverSpy(_tinyPng())
r = RendererDocx()
r._imageResolver = spy
doc = Document()
n = 5
elements = [_imageElement(f"img-{i}") for i in range(n)]
for el in elements:
r._renderJsonImage(doc, el, {})
# All N images embedded, resolver hit once per image, in order.
assert len(doc.inline_shapes) == n
assert spy.calls == [f"img-{i}" for i in range(n)]
# Stays lazy: the source JSON never gained embedded base64.
assert all("base64Data" not in el["content"] for el in elements)
def test_docx_without_resolver_embeds_no_image():
from docx import Document
r = RendererDocx() # no _imageResolver
doc = Document()
r._renderJsonImage(doc, _imageElement("img-1"), {})
assert len(doc.inline_shapes) == 0
# ── PDF block images ───────────────────────────────────────────────
def test_pdf_lazy_resolves_block_image():
from reportlab.platypus import Image as ReportLabImage
spy = _ResolverSpy(_tinyPng())
r = RendererPdf()
r._imageResolver = spy
styles = r._convertUnifiedStyleToInternal(resolveStyle(None))
el = _imageElement("img-1")
flowables = r._renderJsonImage(el, styles)
assert spy.calls == ["img-1"]
assert any(isinstance(f, ReportLabImage) for f in flowables)
assert "base64Data" not in el["content"]
r._cleanupTempImageFiles()
def test_pdf_without_resolver_returns_placeholder():
from reportlab.platypus import Image as ReportLabImage
r = RendererPdf() # no _imageResolver
styles = r._convertUnifiedStyleToInternal(resolveStyle(None))
flowables = r._renderJsonImage(_imageElement("img-1"), styles)
assert not any(isinstance(f, ReportLabImage) for f in flowables)
def test_pdf_images_are_file_backed_and_cleaned():
"""Step 2: PDF image flowables read from temp files (not in-memory streams),
so image bytes are not all resident at build time. Temp files are cleaned up."""
import os
from reportlab.platypus import Image as ReportLabImage
spy = _ResolverSpy(_tinyPng())
r = RendererPdf()
r._imageResolver = spy
r._tempImageFiles = []
styles = r._convertUnifiedStyleToInternal(resolveStyle(None))
n = 4
images = []
for i in range(n):
flowables = r._renderJsonImage(_imageElement(f"img-{i}"), styles)
images += [f for f in flowables if isinstance(f, ReportLabImage)]
assert len(images) == n
# Each image is file-backed by a tracked temp file that currently exists on disk.
assert len(r._tempImageFiles) == n
for img in images:
assert img.filename in r._tempImageFiles
assert os.path.exists(img.filename)
paths = list(r._tempImageFiles)
r._cleanupTempImageFiles()
assert all(not os.path.exists(p) for p in paths)
assert r._tempImageFiles == []

View file

@ -0,0 +1,146 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""A3: layout primitives (cover_page, image_grid).
Covers the markdown authoring conventions (fenced ```cover_page / ```image_grid
blocks) and the PDF/DOCX renderer handlers, using real reportlab / python-docx.
"""
import base64
import io
from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle
from modules.serviceCenter.services.serviceGeneration.renderers.rendererPdf import RendererPdf
from modules.serviceCenter.services.serviceGeneration.renderers.rendererDocx import RendererDocx
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
from modules.datamodels.datamodelJson import supportedSectionTypes
def _tinyPngB64() -> str:
from PIL import Image as PILImage
buf = io.BytesIO()
PILImage.new("RGB", (8, 8), (30, 120, 200)).save(buf, format="PNG")
return base64.b64encode(buf.getvalue()).decode("ascii")
def _imgContent():
return {"base64Data": _tinyPngB64(), "altText": "Pic"}
# ── datamodel ───────────────────────────────────────────────────────
def test_layout_types_are_registered():
assert "cover_page" in supportedSectionTypes
assert "image_grid" in supportedSectionTypes
# ── markdown authoring ──────────────────────────────────────────────
def test_markdown_parses_cover_page_block():
md = (
"```cover_page\n"
"title: Klageschrift\n"
"subtitle: In Sachen A gegen B\n"
"author: Dr. Muster\n"
"date: 2026-06-02\n"
"logo: file:logo-1\n"
"```\n"
)
doc = markdownToDocumentJson(md, "T")
sections = doc["documents"][0]["sections"]
assert len(sections) == 1
sec = sections[0]
assert sec["content_type"] == "cover_page"
content = sec["elements"][0]["content"]
assert content["title"] == "Klageschrift"
assert content["subtitle"] == "In Sachen A gegen B"
assert content["author"] == "Dr. Muster"
assert content["date"] == "2026-06-02"
assert content["_fileRef"] == "logo-1"
def test_markdown_parses_image_grid_block():
md = (
"```image_grid\n"
"columns: 3\n"
'![Foto1](file:img-1 "200pt")\n'
"file:img-2\n"
"https://example.com/x.png\n"
"```\n"
)
doc = markdownToDocumentJson(md, "T")
sec = doc["documents"][0]["sections"][0]
assert sec["content_type"] == "image_grid"
content = sec["elements"][0]["content"]
assert content["columns"] == 3
assert len(content["images"]) == 3
assert content["images"][0]["_fileRef"] == "img-1"
assert content["images"][0]["widthPt"] == 200
assert content["images"][1]["_fileRef"] == "img-2"
assert content["images"][2]["_srcUrl"] == "https://example.com/x.png"
def test_markdown_plain_code_block_still_works():
md = "```python\nprint('x')\n```\n"
doc = markdownToDocumentJson(md, "T")
sec = doc["documents"][0]["sections"][0]
assert sec["content_type"] == "code_block"
assert sec["elements"][0]["content"]["language"] == "python"
# ── PDF handlers ────────────────────────────────────────────────────
def test_pdf_cover_page_emits_page_break():
from reportlab.platypus import PageBreak
r = RendererPdf()
styles = r._convertUnifiedStyleToInternal(resolveStyle(None))
el = {"content": {"title": "Report", "subtitle": "2026", "author": "PM"}}
flowables = r._renderCoverPage(el, styles)
assert any(isinstance(f, PageBreak) for f in flowables)
assert len(flowables) >= 2
def test_pdf_image_grid_builds_table():
from reportlab.platypus import Table
r = RendererPdf()
r._tempImageFiles = []
styles = r._convertUnifiedStyleToInternal(resolveStyle(None))
el = {"content": {"columns": 2, "images": [_imgContent(), _imgContent(), _imgContent()]}}
flowables = r._renderImageGrid(el, styles)
tables = [f for f in flowables if isinstance(f, Table)]
assert len(tables) == 1
# 3 images, 2 columns -> 2 rows (last cell padded).
assert len(tables[0]._cellvalues) == 2
r._cleanupTempImageFiles()
# ── DOCX handlers ───────────────────────────────────────────────────
def test_docx_cover_page_renders_and_breaks():
from docx import Document
r = RendererDocx()
doc = Document()
before = len(doc.paragraphs)
r._renderCoverPage(doc, {"content": {"title": "Klageschrift", "author": "Dr. M"}}, {})
# New paragraphs were added (spacers + title + author) and a page break exists.
assert len(doc.paragraphs) > before
xml = doc.element.xml
assert "w:br" in xml and 'type="page"' in xml
def test_docx_image_grid_builds_table_with_pictures():
from docx import Document
r = RendererDocx()
doc = Document()
r._renderImageGrid(doc, {"content": {"columns": 2, "images": [_imgContent(), _imgContent(), _imgContent()]}}, {})
assert len(doc.tables) == 1
table = doc.tables[0]
assert len(table.columns) == 2
assert len(table.rows) == 2
assert len(doc.inline_shapes) == 3

View file

@ -1,7 +1,12 @@
# Copyright (c) 2025 Patrick Motsch # Copyright (c) 2025 Patrick Motsch
# All rights reserved. # All rights reserved.
import pytest import pytest
from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle, DEFAULT_STYLE from modules.serviceCenter.services.serviceGeneration.styleDefaults import (
resolveStyle,
resolveTheme,
DEFAULT_STYLE,
THEME_PRESETS,
)
def test_resolve_none_returns_defaults(): def test_resolve_none_returns_defaults():
@ -44,3 +49,45 @@ def test_override_document_title_partial_merge():
assert result["documentTitle"]["sizePt"] == 32 assert result["documentTitle"]["sizePt"] == 32
assert result["documentTitle"]["align"] == "center" assert result["documentTitle"]["align"] == "center"
assert result["headings"]["h1"]["sizePt"] == DEFAULT_STYLE["headings"]["h1"]["sizePt"] assert result["headings"]["h1"]["sizePt"] == DEFAULT_STYLE["headings"]["h1"]["sizePt"]
# ── Theme presets (A3) ─────────────────────────────────────────────
def test_resolve_theme_unknown_is_empty():
assert resolveTheme(None) == {}
assert resolveTheme("does-not-exist") == {}
def test_resolve_theme_case_insensitive():
assert resolveTheme("FINANCE") == THEME_PRESETS["finance"]
def test_general_theme_equals_defaults():
assert resolveStyle(None, "general") == DEFAULT_STYLE
def test_theme_applies_preset_over_defaults():
result = resolveStyle(None, "legal")
# legal preset changes the primary font to a serif and justifies body text
assert result["fonts"]["primary"] == "Times New Roman"
assert result["paragraph"]["align"] == "justify"
# untouched keys still come from DEFAULT_STYLE
assert result["page"]["format"] == DEFAULT_STYLE["page"]["format"]
def test_explicit_style_overrides_theme():
# theme sets finance green; explicit style must win
result = resolveStyle({"colors": {"primary": "#FF0000"}}, "finance")
assert result["colors"]["primary"] == "#FF0000"
# non-overridden theme key still applies
assert result["table"]["headerBg"] == THEME_PRESETS["finance"]["table"]["headerBg"]
def test_marketing_theme_enlarges_title_and_images():
result = resolveStyle(None, "marketing")
assert result["documentTitle"]["sizePt"] == 34
assert result["image"]["defaultWidthPt"] == 540
def test_unknown_theme_falls_back_to_defaults():
assert resolveStyle(None, "rainbow") == DEFAULT_STYLE

View file

@ -264,7 +264,6 @@ def _instantiateMethod(methodCls):
("modules.workflows.methods.methodFile.methodFile", "MethodFile"), ("modules.workflows.methods.methodFile.methodFile", "MethodFile"),
("modules.workflows.methods.methodContext.methodContext", "MethodContext"), ("modules.workflows.methods.methodContext.methodContext", "MethodContext"),
("modules.workflows.methods.methodJira.methodJira", "MethodJira"), ("modules.workflows.methods.methodJira.methodJira", "MethodJira"),
("modules.workflows.methods.methodChatbot.methodChatbot", "MethodChatbot"),
]) ])
def test_methodSignaturesAreHealthy(modulePath, className): def test_methodSignaturesAreHealthy(modulePath, className):
"""Each shipping Method's _actions must validate against the catalog.""" """Each shipping Method's _actions must validate against the catalog."""

View file

@ -0,0 +1,149 @@
# Copyright (c) 2026 Patrick Motsch
# All rights reserved.
"""A2: type/inheritance-aware field neutralization for feature source data.
Tests the pure policy logic in
``modules.serviceCenter.services.serviceAgent.featureDataProvider._neutralizeAndSerializeRows``
with a fake NeutralizationService (no DB, no real LLM).
Rules under test:
1. strings -> substring-neutralized when effective (explicit OR inherited)
2. binary -> dropped when neutralization applies
3. other scalars -> neutralized ONLY when the field flag is explicit
"""
import asyncio
import datetime
from modules.serviceCenter.services.serviceAgent.featureDataProvider import (
_neutralizeAndSerializeRows,
)
class _FakeNeutralizer:
"""Deterministic stand-in: replaces the token 'SECRET' with a placeholder.
Records every text it was asked to process so tests can assert that the
field name was passed as a type hint (prefix ``"<field>: "``).
"""
def __init__(self):
self.seen = []
async def processTextAsync(self, text, fileId=None):
self.seen.append(text)
neutralized = text.replace("SECRET", "[name.00000000-0000-0000-0000-000000000000]")
return {"neutralized_text": neutralized}
def _run(rows, policy, neutralizer):
return asyncio.run(_neutralizeAndSerializeRows(rows, policy, neutralizer))
def test_string_substring_neutralized_when_inherited():
"""AC7: tableActive (inherited), empty explicitFields -> strings substring-neutralized."""
fake = _FakeNeutralizer()
rows = [{"id": "row-1", "note": "Patient SECRET has a fever"}]
policy = {"tableActive": True, "explicitFields": set()}
out = _run(rows, policy, fake)
assert out[0]["note"] == "Patient [name.00000000-0000-0000-0000-000000000000] has a fever"
# surrounding text preserved (NOT whole-value replaced)
assert out[0]["note"].startswith("Patient ")
# id is structural -> never neutralized
assert out[0]["id"] == "row-1"
def test_field_name_passed_as_type_hint():
"""AC8: the field name is prepended as a type hint and stripped from the result."""
fake = _FakeNeutralizer()
rows = [{"id": "row-1", "lastName": "SECRET"}]
policy = {"tableActive": True, "explicitFields": set()}
out = _run(rows, policy, fake)
assert out[0]["lastName"] == "[name.00000000-0000-0000-0000-000000000000]"
assert any(t.startswith("lastName: ") for t in fake.seen)
def test_binary_dropped_when_active():
"""AC9: binary columns are dropped (not masked) when neutralization applies."""
fake = _FakeNeutralizer()
rows = [{"id": "row-1", "photo": b"\x89PNG\x00\x01", "note": "ok"}]
policy = {"tableActive": True, "explicitFields": set()}
out = _run(rows, policy, fake)
assert "photo" not in out[0]
assert out[0]["id"] == "row-1"
def test_numeric_unchanged_when_only_inherited():
"""AC10: numeric/date fields are NOT neutralized via inheritance."""
fake = _FakeNeutralizer()
rows = [{"id": "row-1", "amount": 4200, "bookedAt": datetime.date(2026, 1, 2)}]
policy = {"tableActive": True, "explicitFields": set()}
out = _run(rows, policy, fake)
assert out[0]["amount"] == 4200
assert out[0]["bookedAt"] == "2026-01-02"
def test_numeric_neutralized_when_explicit():
"""AC11: numeric field neutralized (whole-value placeholder) when explicitly flagged."""
fake = _FakeNeutralizer()
rows = [{"id": "row-1", "amount": 4200}]
policy = {"tableActive": False, "explicitFields": {"amount"}}
out = _run(rows, policy, fake)
assert out[0]["amount"].startswith("[NEUT.amount.")
assert out[0]["amount"].endswith("]")
def test_failsafe_redacts_when_no_neutralizer():
"""No engine available but neutralization required -> fail closed ([REDACTED])."""
rows = [{"id": "row-1", "note": "Patient SECRET"}]
policy = {"tableActive": True, "explicitFields": set()}
out = _run(rows, policy, None)
assert out[0]["note"] == "[REDACTED]"
def test_dedup_across_rows_single_call_per_unique_value():
"""Identical (field,value) pairs are neutralized once and reused."""
fake = _FakeNeutralizer()
rows = [
{"id": "a", "note": "SECRET"},
{"id": "b", "note": "SECRET"},
]
policy = {"tableActive": True, "explicitFields": set()}
out = _run(rows, policy, fake)
assert out[0]["note"] == out[1]["note"]
# 'note: SECRET' appears exactly once in the engine call log (deduped)
assert fake.seen.count("note: SECRET") == 1
def test_inactive_table_leaves_strings_untouched():
"""No tableActive and no explicit fields -> strings pass through unchanged."""
fake = _FakeNeutralizer()
rows = [{"id": "row-1", "note": "Patient SECRET"}]
policy = {"tableActive": False, "explicitFields": set()}
out = _run(rows, policy, fake)
assert out[0]["note"] == "Patient SECRET"
assert fake.seen == []
def test_rag_bootstrap_parity_with_query_path():
"""AC12: the RAG bootstrap serializes the SAME neutralized rows the query path returns.
Both paths run rows through ``_neutralizeAndSerializeRows``; the bootstrap then
turns the finalized row into embedding text via ``_serializeRowToText``. This
asserts no raw secret leaks into the index text and structural fields are skipped.
"""
from modules.serviceCenter.services.serviceKnowledge.subFeatureBootstrap import _serializeRowToText
fake = _FakeNeutralizer()
rows = [{"id": "row-1", "sysCreatedAt": "2026-01-01", "note": "Patient SECRET has a fever"}]
policy = {"tableActive": True, "explicitFields": set()}
finalized = _run(rows, policy, fake) # identical to the sub-agent query path
indexText = _serializeRowToText(finalized[0])
# Neutralized placeholder is indexed, raw secret is not.
assert "[name.00000000-0000-0000-0000-000000000000]" in indexText
assert "SECRET" not in indexText
# Structural fields are excluded from the embedding text.
assert "id:" not in indexText
assert "sysCreatedAt" not in indexText

View file

@ -27,7 +27,6 @@ def _ds(idVal: str, path: str, **flags) -> dict:
"path": path, "path": path,
"neutralize": None, "neutralize": None,
"ragIndexEnabled": None, "ragIndexEnabled": None,
"scope": None,
} }
base.update(flags) base.update(flags)
return base return base
@ -97,15 +96,6 @@ class TestEffectiveFlagWalk(unittest.TestCase):
leaf = _ds("l", "/anything/anywhere") leaf = _ds("l", "/anything/anywhere")
self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, leaf])) self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, leaf]))
def test_scope_inheritance_with_string_default(self):
root = _ds("r", "/", scope="mandate")
leaf = _ds("l", "/folder")
self.assertEqual(_inheritFlags.getEffectiveFlag(leaf, "scope", [root, leaf]), "mandate")
def test_scope_default_personal_when_empty(self):
leaf = _ds("l", "/folder")
self.assertEqual(_inheritFlags.getEffectiveFlag(leaf, "scope", [leaf]), "personal")
def test_unknown_flag_raises(self): def test_unknown_flag_raises(self):
leaf = _ds("l", "/") leaf = _ds("l", "/")
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
@ -158,20 +148,6 @@ class TestEffectiveFlagAggregate(unittest.TestCase):
allDs = [root, child1, child2] allDs = [root, child1, child2]
self.assertEqual(_inheritFlags.getEffectiveFlag(root, "neutralize", allDs, mode="aggregate"), "mixed") self.assertEqual(_inheritFlags.getEffectiveFlag(root, "neutralize", allDs, mode="aggregate"), "mixed")
def test_mixed_scope(self):
root = _ds("r", "/", scope="personal")
child1 = _ds("c1", "/a", scope="team")
child2 = _ds("c2", "/b") # inherits personal from root
allDs = [root, child1, child2]
self.assertEqual(_inheritFlags.getEffectiveFlag(root, "scope", allDs, mode="aggregate"), "mixed")
def test_all_scope_same_explicit_returns_concrete(self):
root = _ds("r", "/", scope="team")
child1 = _ds("c1", "/a", scope="team")
child2 = _ds("c2", "/b") # inherits team
allDs = [root, child1, child2]
self.assertEqual(_inheritFlags.getEffectiveFlag(root, "scope", allDs, mode="aggregate"), "team")
def test_connection_root_aggregate_cross_sourcetype(self): def test_connection_root_aggregate_cross_sourcetype(self):
connRoot = _ds("conn", "/", sourceType="msft", neutralize=True) connRoot = _ds("conn", "/", sourceType="msft", neutralize=True)
spExplicit = _ds("sp", "/", sourceType="sharepointFolder", neutralize=False) spExplicit = _ds("sp", "/", sourceType="sharepointFolder", neutralize=False)
@ -531,20 +507,20 @@ class TestBuildEffectiveByWorkspaceFds(unittest.TestCase):
class TestResolveEffectiveForPath(unittest.TestCase): class TestResolveEffectiveForPath(unittest.TestCase):
def test_with_exact_record(self): def test_with_exact_record(self):
root = _ds("r", "/", neutralize=True, scope="mandate", ragIndexEnabled=False) root = _ds("r", "/", neutralize=True, ragIndexEnabled=False)
leaf = _ds("l", "/folder/sub", neutralize=False) leaf = _ds("l", "/folder/sub", neutralize=False)
allDs = [root, leaf] allDs = [root, leaf]
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/folder/sub", allDs) result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/folder/sub", allDs)
self.assertEqual(result["effectiveNeutralize"], False) self.assertEqual(result["effectiveNeutralize"], False)
self.assertEqual(result["effectiveScope"], "mandate") self.assertNotIn("effectiveScope", result)
self.assertEqual(result["effectiveRagIndexEnabled"], False) self.assertEqual(result["effectiveRagIndexEnabled"], False)
def test_without_record_inherits_from_ancestor(self): def test_without_record_inherits_from_ancestor(self):
root = _ds("r", "/", neutralize=True, scope="mandate", ragIndexEnabled=True) root = _ds("r", "/", neutralize=True, ragIndexEnabled=True)
allDs = [root] allDs = [root]
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/deep/path/file.txt", allDs) result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/deep/path/file.txt", allDs)
self.assertEqual(result["effectiveNeutralize"], True) self.assertEqual(result["effectiveNeutralize"], True)
self.assertEqual(result["effectiveScope"], "mandate") self.assertNotIn("effectiveScope", result)
self.assertEqual(result["effectiveRagIndexEnabled"], True) self.assertEqual(result["effectiveRagIndexEnabled"], True)
def test_without_record_inherits_from_closest_ancestor(self): def test_without_record_inherits_from_closest_ancestor(self):
@ -559,7 +535,7 @@ class TestResolveEffectiveForPath(unittest.TestCase):
allDs: list = [] allDs: list = []
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/path", allDs) result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/path", allDs)
self.assertEqual(result["effectiveNeutralize"], False) self.assertEqual(result["effectiveNeutralize"], False)
self.assertEqual(result["effectiveScope"], "personal") self.assertNotIn("effectiveScope", result)
self.assertEqual(result["effectiveRagIndexEnabled"], False) self.assertEqual(result["effectiveRagIndexEnabled"], False)
def test_connection_root_covers_service_subtree(self): def test_connection_root_covers_service_subtree(self):
@ -620,14 +596,15 @@ class TestResolveEffectiveForFds(unittest.TestCase):
result = _inheritFlags.resolveEffectiveForFds("fi-1", "*", None, allFds, mode="aggregate") result = _inheritFlags.resolveEffectiveForFds("fi-1", "*", None, allFds, mode="aggregate")
self.assertEqual(result["effectiveRagIndexEnabled"], "mixed") self.assertEqual(result["effectiveRagIndexEnabled"], "mixed")
def test_inheritable_fds_flags_excludes_scope(self): def test_inheritable_flags_and_fds_flags(self):
self.assertIn("ragIndexEnabled", _inheritFlags._INHERITABLE_FDS_FLAGS) self.assertIn("ragIndexEnabled", _inheritFlags._INHERITABLE_FDS_FLAGS)
self.assertIn("neutralize", _inheritFlags._INHERITABLE_FDS_FLAGS) self.assertIn("neutralize", _inheritFlags._INHERITABLE_FDS_FLAGS)
self.assertNotIn("scope", _inheritFlags._INHERITABLE_FDS_FLAGS) self.assertNotIn("scope", _inheritFlags._INHERITABLE_FDS_FLAGS)
self.assertNotIn("scope", _inheritFlags._INHERITABLE_FLAGS)
# =========================================================================== # ===========================================================================
# FDS cascade resets RAG (in addition to neutralize and scope) # FDS cascade resets RAG (in addition to neutralize)
# =========================================================================== # ===========================================================================
class TestCascadeResetFdsRag(unittest.TestCase): class TestCascadeResetFdsRag(unittest.TestCase):
@ -678,15 +655,6 @@ class TestVirtualCoordAggregate(unittest.TestCase):
) )
self.assertEqual(result["effectiveNeutralize"], "mixed") self.assertEqual(result["effectiveNeutralize"], "mixed")
def test_virtual_folder_mixed_scope(self):
child1 = _ds("c1", "/virtual/a", scope="mandate")
child2 = _ds("c2", "/virtual/b", scope="personal")
allDs = [child1, child2]
result = _inheritFlags.resolveEffectiveForPath(
"conn-1", "sharepointFolder", "/virtual", allDs, mode="aggregate",
)
self.assertEqual(result["effectiveScope"], "mixed")
def test_virtual_folder_mixed_rag(self): def test_virtual_folder_mixed_rag(self):
child1 = _ds("c1", "/virtual/a", ragIndexEnabled=True) child1 = _ds("c1", "/virtual/a", ragIndexEnabled=True)
child2 = _ds("c2", "/virtual/b", ragIndexEnabled=False) child2 = _ds("c2", "/virtual/b", ragIndexEnabled=False)

View file

@ -46,10 +46,10 @@ class TestSupportsFlag(unittest.TestCase):
self.assertFalse(n.supportsFlag("scope")) self.assertFalse(n.supportsFlag("scope"))
self.assertFalse(n.supportsFlag("ragIndexEnabled")) self.assertFalse(n.supportsFlag("ragIndexEnabled"))
def test_connection_supports_all_three(self): def test_connection_supports_neutralize_and_rag(self):
n = ConnectionNode("c1", "msft", label="m", parentKey="personalRoot", rec=None) n = ConnectionNode("c1", "msft", label="m", parentKey="personalRoot", rec=None)
self.assertTrue(n.supportsFlag("neutralize")) self.assertTrue(n.supportsFlag("neutralize"))
self.assertTrue(n.supportsFlag("scope")) self.assertFalse(n.supportsFlag("scope"))
self.assertTrue(n.supportsFlag("ragIndexEnabled")) self.assertTrue(n.supportsFlag("ragIndexEnabled"))
def test_fds_table_supports_neutralize_and_rag_but_not_scope(self): def test_fds_table_supports_neutralize_and_rag_but_not_scope(self):
@ -137,7 +137,7 @@ class TestGetEffectiveFlag(unittest.TestCase):
def test_ds_walk_inherits_from_authority_root(self): def test_ds_walk_inherits_from_authority_root(self):
root = { root = {
"id": "r", "connectionId": "c", "sourceType": "msft", "path": "/", "id": "r", "connectionId": "c", "sourceType": "msft", "path": "/",
"userId": "user-1", "neutralize": True, "scope": None, "ragIndexEnabled": None, "userId": "user-1", "neutralize": True, "ragIndexEnabled": None,
} }
node = FolderNode( node = FolderNode(
connectionId="c", service="sharepoint", sourceType="sharepointFolder", connectionId="c", service="sharepoint", sourceType="sharepointFolder",