fixes private model and udb scoping sources
This commit is contained in:
parent
24899b0cf2
commit
d61e29bcac
90 changed files with 2323 additions and 9697 deletions
29
app.py
29
app.py
|
|
@ -282,7 +282,7 @@ initLogging()
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
instanceLabel = APP_CONFIG.get("APP_ENV_LABEL")
|
instanceLabel = APP_CONFIG.get("APP_ENV_LABEL")
|
||||||
|
|
||||||
# Pre-warm AI connectors on process load (before lifespan). Critical for chatbot latency.
|
# Pre-warm AI connectors on process load (before lifespan). Critical for AI/agent latency.
|
||||||
try:
|
try:
|
||||||
import modules.aicore.aicoreModelRegistry # noqa: F401
|
import modules.aicore.aicoreModelRegistry # noqa: F401
|
||||||
logger.info("AI connectors pre-warm (app load) triggered")
|
logger.info("AI connectors pre-warm (app load) triggered")
|
||||||
|
|
@ -428,10 +428,20 @@ async def lifespan(app: FastAPI):
|
||||||
|
|
||||||
# --- Shutdown sequence (protected against CancelledError) ---
|
# --- Shutdown sequence (protected against CancelledError) ---
|
||||||
try:
|
try:
|
||||||
# 1. Stop scheduler first (removes all pending cron/interval jobs)
|
# 1. Signal DB layer to abort in-flight borrow waits immediately.
|
||||||
|
# This MUST happen first so that sync worker threads stuck in
|
||||||
|
# _acquireConn (30 s poll loop) bail out within one backoff tick
|
||||||
|
# instead of blocking process exit for the full borrow timeout.
|
||||||
|
try:
|
||||||
|
from modules.connectors.connectorDbPostgre import closeAllPools
|
||||||
|
closeAllPools()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Closing DB connection pools failed: {e}")
|
||||||
|
|
||||||
|
# 2. Stop scheduler (removes all pending cron/interval jobs)
|
||||||
eventManager.stop()
|
eventManager.stop()
|
||||||
|
|
||||||
# 2. Stop Feature Containers (Plug&Play)
|
# 3. Stop Feature Containers (Plug&Play)
|
||||||
try:
|
try:
|
||||||
mainModules = loadFeatureMainModules()
|
mainModules = loadFeatureMainModules()
|
||||||
for featureName, module in mainModules.items():
|
for featureName, module in mainModules.items():
|
||||||
|
|
@ -444,14 +454,6 @@ async def lifespan(app: FastAPI):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Could not shutdown feature containers: {e}")
|
logger.warning(f"Could not shutdown feature containers: {e}")
|
||||||
|
|
||||||
# 3. Close all PostgreSQL connection pools (LAST -- features may still
|
|
||||||
# issue DB calls during their onStop hooks)
|
|
||||||
try:
|
|
||||||
from modules.connectors.connectorDbPostgre import closeAllPools
|
|
||||||
closeAllPools()
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Closing DB connection pools failed: {e}")
|
|
||||||
|
|
||||||
logger.info("Application has been shut down")
|
logger.info("Application has been shut down")
|
||||||
|
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
|
|
@ -628,8 +630,7 @@ app.include_router(connectionsRouter)
|
||||||
from modules.routes.routeRagInventory import router as ragInventoryRouter
|
from modules.routes.routeRagInventory import router as ragInventoryRouter
|
||||||
app.include_router(ragInventoryRouter)
|
app.include_router(ragInventoryRouter)
|
||||||
|
|
||||||
from modules.routes.routeAdminSttBenchmark import router as sttBenchmarkRouter
|
|
||||||
app.include_router(sttBenchmarkRouter)
|
|
||||||
|
|
||||||
from modules.routes.routeTableViews import router as tableViewsRouter
|
from modules.routes.routeTableViews import router as tableViewsRouter
|
||||||
app.include_router(tableViewsRouter)
|
app.include_router(tableViewsRouter)
|
||||||
|
|
@ -745,4 +746,4 @@ if __name__ == "__main__":
|
||||||
], check=True)
|
], check=True)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import uvicorn
|
import uvicorn
|
||||||
uvicorn.run("app:app", host="0.0.0.0", port=port, workers=1, timeout_graceful_shutdown=5)
|
uvicorn.run("app:app", host="0.0.0.0", port=port, workers=1, timeout_graceful_shutdown=2)
|
||||||
|
|
@ -1,309 +0,0 @@
|
||||||
# Aufwandsschätzung Althaus Bot v2 -- Unabhängige Analyse
|
|
||||||
|
|
||||||
**Projekt:** Althaus Bot v2 -- Weiterentwicklung & neue Use Cases
|
|
||||||
**Kunde:** W. Althaus AG, Aarwangen
|
|
||||||
**Erstellt:** 13. April 2026
|
|
||||||
**Basis:** Code-Analyse Gateway-Repository + Offerte v2 vom 14.04.2026
|
|
||||||
**Methodik:** Bottom-Up-Schätzung auf Basis der bestehenden Implementierung, Dreipunktschätzung (Min / Mitte / Max)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 1. Ist-Zustand der Implementierung
|
|
||||||
|
|
||||||
### 1.1 Architekturübersicht
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────────┐
|
|
||||||
│ React Frontend (SSE-Streaming, Chat-UI) │
|
|
||||||
└──────────────────────────┬──────────────────────────────────────┘
|
|
||||||
│ /api/chatbot/*
|
|
||||||
┌──────────────────────────▼──────────────────────────────────────┐
|
|
||||||
│ Gateway (Python/FastAPI) │
|
|
||||||
│ ┌─────────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ Chatbot Feature (modules/features/chatbot/) │ │
|
|
||||||
│ │ ┌─────────┐ ┌──────────┐ ┌──────────┐ ┌────────┐ │ │
|
|
||||||
│ │ │ Planner │→ │ SQL Plan │→ │ Parse & │→ │Formul. │ │ │
|
|
||||||
│ │ │ Node │ │ Node │ │ Execute │ │ Node │ │ │
|
|
||||||
│ │ └────┬────┘ └──────────┘ └────┬─────┘ └────────┘ │ │
|
|
||||||
│ │ │ │ │ │
|
|
||||||
│ │ ├→ Tavily (Web Search) │ │ │
|
|
||||||
│ │ └→ Direct Answer │ │ │
|
|
||||||
│ └──────────────────────────────────┼──────────────────────┘ │
|
|
||||||
│ │ │
|
|
||||||
│ ┌──────────────────────────────────▼──────────────────────┐ │
|
|
||||||
│ │ PreprocessorConnector (HTTP POST → Azure SQL API) │ │
|
|
||||||
│ └─────────────────────────────────────────────────────────┘ │
|
|
||||||
│ │
|
|
||||||
│ ┌─────────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ KnowledgeService (pgvector/RAG) -- NICHT IM CHATBOT │ │
|
|
||||||
│ │ Produktiv im AgentService + CommCoach │ │
|
|
||||||
│ └─────────────────────────────────────────────────────────┘ │
|
|
||||||
└──────────────────────────────────────────────────────────────────┘
|
|
||||||
│
|
|
||||||
┌──────────────────────────▼──────────────────────────────────────┐
|
|
||||||
│ Azure Preprocessing Server (deployed, ERP-Daten deaktiviert) │
|
|
||||||
│ Tabellen: Artikel, Einkaufspreis, Lagerplatz, Lagerplatz_Art. │
|
|
||||||
│ Repo: github.com/valueonag/gateway_preprocessing │
|
|
||||||
└─────────────────────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
### 1.2 Vorhandene Komponenten (Wiederverwendung)
|
|
||||||
|
|
||||||
| Komponente | Datei / Modul | Status | Wiederverwendbar für |
|
|
||||||
|---|---|---|---|
|
|
||||||
| LangGraph-Workflow | `chatbot/chatbot.py` | Produktiv (deaktiviert) | Alle Positionen -- Grundgerüst |
|
|
||||||
| PreprocessorConnector | `connectors/connectorPreprocessor.py` | Produktiv (deaktiviert) | Pos. 1, 2, 3, 4 -- SQL-Abfragen |
|
|
||||||
| ChatbotConfig | `chatbot/config.py` | Produktiv | Alle -- Konfiguration pro Instanz |
|
|
||||||
| Streaming-Bridge | `chatbot/service.py` | Produktiv | Alle -- SSE ans Frontend |
|
|
||||||
| ChatbotDocument | `chatbot/interfaceFeatureChatbot.py` | Implementiert | Pos. 1.4, 2.1, 2.5 -- File-Handling |
|
|
||||||
| KnowledgeService/RAG | `serviceCenter/services/serviceKnowledge/` | Produktiv (AgentService) | Pos. 5 -- Wiki-Integration |
|
|
||||||
| Automation-Template | `automation/subAutomationTemplates.py` | Produktiv | Pos. 6 -- Preprocessor-Updates |
|
|
||||||
| SQL-Sanitize | `chatbot.py` → `_sanitize_sql_typos` | Produktiv | Pos. 1.1 -- Gesperrte Artikel |
|
|
||||||
| Markdown-Tabellen | `chatbot.py` → `_tool_output_to_markdown_table` | Produktiv | Pos. 1.3, 3.3 -- Darstellung |
|
|
||||||
| File-Upload Backend | `service.py` → `_convert_file_ids_to_document_references` | Implementiert | Pos. 1.4 -- Upload-Pipeline |
|
|
||||||
| Excel-Export | `service.py` → `_create_chat_document_from_action_document` | Implementiert | Pos. 2.5 -- Kalktool-Export |
|
|
||||||
|
|
||||||
### 1.3 Fehlende Komponenten (Neuentwicklung)
|
|
||||||
|
|
||||||
| Komponente | Benötigt für | Komplexität |
|
|
||||||
|---|---|---|
|
|
||||||
| Matching-Engine (exakt → fuzzy → KI) | Pos. 2.2 | Hoch |
|
|
||||||
| Neuer Planner-Pfad "WIKI" | Pos. 5.2 | Mittel |
|
|
||||||
| KnowledgeService → Chatbot Integration | Pos. 5.2 | Mittel |
|
|
||||||
| Wiki-Connector (API/Crawling) | Pos. 5.1 | Unbekannt (Wiki-abhängig) |
|
|
||||||
| Delta-Sync-Mechanismus | Pos. 5.3 | Mittel |
|
|
||||||
| Preprocessor: 8-10 neue Tabellen/Views | Pos. 1.5, 3.1, 4.1 | Mittel (Code-Änderung) |
|
|
||||||
| Frontend: File-Picker, Drag&Drop | Pos. 1.4 | Mittel |
|
|
||||||
| Frontend: Thread-Liste, Suchfunktion | Pos. 1.2 | Mittel |
|
|
||||||
| Kalktool-Excel-Format-Export | Pos. 2.5 | Mittel |
|
|
||||||
| Schwellenwert-Insights | Pos. 4.5 | Mittel |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 2. Detaillierte Aufwandsschätzung
|
|
||||||
|
|
||||||
### Position 1: Basics (Plattform-Verbesserungen)
|
|
||||||
|
|
||||||
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|
|
||||||
|---|---|:-:|:-:|:-:|:-:|---|
|
|
||||||
| 1.1 | Gesperrte Artikel filtern | 4 | 3 | 4 | 4 | System-Prompt + SQL-Sanitize-Regel. Kleine Änderung. |
|
|
||||||
| 1.2 | Chat-Verlauf speichern | 12 | 12 | 14 | 16 | Backend existiert. Frontend-Aufwand (Thread-Liste, Suche). |
|
|
||||||
| 1.3 | Längere Antworten | 6 | 4 | 5 | 6 | Streaming-Config + Frontend-Rendering. |
|
|
||||||
| 1.4 | Datei-Upload | 16 | 16 | 18 | 20 | Full-Stack: Drag&Drop + LangGraph-Integration + Extraktion. |
|
|
||||||
| 1.5 | Kundenartikelnummern | 8 | 10 | 12 | 14 | Preprocessor-Code + Prompt + Cross-Ref-Queries. ERP-abhängig. |
|
|
||||||
| 1.6 | Abklärungen & Testing | 8 | 8 | 8 | 8 | Standard. |
|
|
||||||
| | **Subtotal** | **54** | **53** | **61** | **68** | |
|
|
||||||
|
|
||||||
**Delta zur Offerte: +7h (Mitte) / +14h (Max)**
|
|
||||||
**Haupttreiber:** Preprocessor-Erweiterung für Kundenartikelnummern (Pos. 1.5) erfordert Code-Änderung, nicht nur Config. Frontend-Aufwand bei Upload (Pos. 1.4) eher am oberen Ende.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Position 2: Use Case Kalktool
|
|
||||||
|
|
||||||
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|
|
||||||
|---|---|:-:|:-:|:-:|:-:|---|
|
|
||||||
| 2.1 | Stücklisten-Upload & Extraktion | 12 | 10 | 12 | 14 | Nutzt Pos. 1.4. serviceExtraction vorhanden. |
|
|
||||||
| 2.2 | Artikelidentifikation & Matching | 20 | 24 | 28 | 32 | **KRITISCH**: Neue Matching-Engine, 3 Stufen, ERP-abhängig. |
|
|
||||||
| 2.3 | Automatische Feldergänzung | 16 | 14 | 16 | 18 | Preprocessor + Enrichment-Logik. |
|
|
||||||
| 2.4 | Alternativartikel-Vorschläge | 12 | 12 | 14 | 16 | KI-Vorschläge + Bestätigungs-Workflow im Chat. |
|
|
||||||
| 2.5 | Excel-Export (Kalktool-Format) | 12 | 10 | 12 | 14 | Basis existiert. Kalktool-Vorlage-Anpassung. |
|
|
||||||
| 2.6 | Erweiterbarkeit neue Felder | 8 | 6 | 8 | 10 | Config-gesteuertes Feld-Mapping. |
|
|
||||||
| 2.7 | Abklärungen & Testing | 12 | 12 | 12 | 12 | Kalktool-Vorlage, Testdaten, UAT. |
|
|
||||||
| | **Subtotal** | **92** | **88** | **102** | **116** | |
|
|
||||||
|
|
||||||
**Delta zur Offerte: +10h (Mitte) / +24h (Max)**
|
|
||||||
**Haupttreiber:** Die Matching-Engine (Pos. 2.2) ist die komplexeste Neuentwicklung im gesamten Projekt. Mehrstufiges Matching (exakt → fuzzy → KI-gestützt) ohne bestehende Basis. Die Qualität hängt stark von der ERP-Datenqualität und der Vielfalt der Kunden-Stücklisten-Formate ab.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Position 3: Use Case Materialmanagement 1
|
|
||||||
|
|
||||||
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|
|
||||||
|---|---|:-:|:-:|:-:|:-:|---|
|
|
||||||
| 3.1 | ERP-Daten erweitern | 16 | 16 | 19 | 22 | Preprocessor: Bestellungen, Wareneingänge, Aufträge. Code nötig. |
|
|
||||||
| 3.2 | System-Prompt Materialmanagement | 8 | 6 | 8 | 10 | Prompt-Engineering + SQL-Templates. |
|
|
||||||
| 3.3 | Transparente Statusübersicht | 8 | 6 | 7 | 8 | Markdown-Rendering existiert, Erweiterung nötig. |
|
|
||||||
| 3.4 | Auswirkungsanalyse & Empfehlungen | 12 | 14 | 16 | 18 | Cross-Table-Queries + KI-Analyse. Komplex. |
|
|
||||||
| 3.5 | Abklärungen & Testing | 8 | 8 | 8 | 8 | Standard. |
|
|
||||||
| | **Subtotal** | **52** | **50** | **58** | **66** | |
|
|
||||||
|
|
||||||
**Delta zur Offerte: +6h (Mitte) / +14h (Max)**
|
|
||||||
**Haupttreiber:** Auswirkungsanalyse (Pos. 3.4) erfordert Multi-Table-Joins und KI-gestützte Bewertung, was über einfache SQL-Abfragen hinausgeht.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Position 4: Use Case Materialmanagement 2 (KPIs)
|
|
||||||
|
|
||||||
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|
|
||||||
|---|---|:-:|:-:|:-:|:-:|---|
|
|
||||||
| 4.1 | ERP-Daten erweitern | 16 | 16 | 19 | 22 | Lagerjournal, Preishistorie. Aggregierte Views. |
|
|
||||||
| 4.2 | System-Prompt KPI-Analyse | 8 | 6 | 8 | 10 | Prompt-Engineering. |
|
|
||||||
| 4.3 | Liefertermintreue-Analyse | 10 | 10 | 12 | 14 | Zeitreihen, Lieferantenvergleich, komplexe SQL. |
|
|
||||||
| 4.4 | Preisentwicklungs-Analyse | 10 | 10 | 11 | 12 | Preishistorie, Abweichungsberechnung. |
|
|
||||||
| 4.5 | Automatisierte Insights | 8 | 10 | 12 | 14 | Schwellenwert-Warnungen, proaktive Erkennung. Neues Konzept. |
|
|
||||||
| 4.6 | Abklärungen & Testing | 8 | 8 | 8 | 8 | Standard. |
|
|
||||||
| | **Subtotal** | **60** | **60** | **70** | **80** | |
|
|
||||||
|
|
||||||
**Delta zur Offerte: +10h (Mitte) / +20h (Max)**
|
|
||||||
**Haupttreiber:** Automatisierte Insights (Pos. 4.5) erfordern eine neue Logikschicht, die proaktiv Schwellenwerte überwacht und Empfehlungen generiert. Das ist im aktuellen Chat-Flow nicht vorgesehen.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Position 5: Use Case Wiki-Anbindung
|
|
||||||
|
|
||||||
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|
|
||||||
|---|---|:-:|:-:|:-:|:-:|---|
|
|
||||||
| 5.1 | Wiki-Anbindung & Indexierung | 16 | 16 | 20 | 24 | KnowledgeService existiert. Wiki-Zugang UNBEKANNT. |
|
|
||||||
| 5.2 | RAG-Integration im Chatbot | 12 | 12 | 14 | 16 | Pattern existiert (AgentService), muss portiert werden. |
|
|
||||||
| 5.3 | Inkrementelle Aktualisierung | 8 | 8 | 11 | 14 | Delta-Sync stark Wiki-abhängig. |
|
|
||||||
| 5.4 | Abklärungen & Testing | 8 | 8 | 9 | 10 | Relevanz-Tuning ist iterativ. |
|
|
||||||
| | **Subtotal** | **44** | **44** | **54** | **64** | |
|
|
||||||
|
|
||||||
**Delta zur Offerte: +10h (Mitte) / +20h (Max)**
|
|
||||||
**Haupttreiber:** Wiki-System ist unbekannt. Bei Wiki mit guter API (Confluence, SharePoint) sind 44h erreichbar. Bei proprietärem System ohne API steigt der Aufwand erheblich.
|
|
||||||
|
|
||||||
**Synergie:** KnowledgeService mit pgvector, Chunking, Embedding und semanticSearch ist bereits produktiv. Die RAG-Pipeline (Ingestion → Embedding → Retrieval) muss nicht neu gebaut werden. Das spart geschätzt 20-30h gegenüber einer Neuentwicklung.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Position 6: Azure-Migration
|
|
||||||
|
|
||||||
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|
|
||||||
|---|---|:-:|:-:|:-:|:-:|---|
|
|
||||||
| 6.1 | Migration Preprocessor | 6 | 4 | 6 | 8 | Config-Änderungen, Env-Files, Netzwerk. |
|
|
||||||
| 6.2 | Validierung & Smoke-Tests | 4 | 4 | 4 | 4 | End-to-End-Tests. |
|
|
||||||
| | **Subtotal** | **10** | **8** | **10** | **12** | |
|
|
||||||
|
|
||||||
**Delta zur Offerte: 0h (Mitte)**
|
|
||||||
**Bewertung:** Realistisch. Einfachste Position.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Position 7: Projektmanagement
|
|
||||||
|
|
||||||
| # | Anforderung | Offerte | Min | Mitte | Max | Begründung |
|
|
||||||
|---|---|:-:|:-:|:-:|:-:|---|
|
|
||||||
| 7.1 | Kick-off & Workshop | 4 | 4 | 4 | 4 | Standard. |
|
|
||||||
| 7.2 | Projektmanagement | 8 | 10 | 12 | 14 | 10-14 Wochen, 3 Ansprechpartner, 7 Positionen. |
|
|
||||||
| 7.3 | Deployment & Go-Live | 6 | 6 | 7 | 8 | Staging + Prod + erste Betriebswoche. |
|
|
||||||
| | **Subtotal** | **18** | **20** | **23** | **26** | |
|
|
||||||
|
|
||||||
**Delta zur Offerte: +5h (Mitte) / +8h (Max)**
|
|
||||||
**Haupttreiber:** PM-Aufwand bei 3-Monats-Projekt mit mehreren Stakeholdern ist erfahrungsgemäss höher.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 3. Gesamtübersicht
|
|
||||||
|
|
||||||
| Pos. | Beschreibung | Offerte (h) | Min (h) | Mitte (h) | Max (h) | Offerte CHF | Mitte CHF |
|
|
||||||
|---|---|:-:|:-:|:-:|:-:|:-:|:-:|
|
|
||||||
| 1 | Basics | 54 | 53 | 61 | 68 | 8'100 | 9'150 |
|
|
||||||
| 2 | Kalktool | 92 | 88 | 102 | 116 | 13'800 | 15'300 |
|
|
||||||
| 3 | Materialmanagement 1 | 52 | 50 | 58 | 66 | 7'800 | 8'700 |
|
|
||||||
| 4 | Materialmanagement 2 | 60 | 60 | 70 | 80 | 9'000 | 10'500 |
|
|
||||||
| 5 | Wiki-Anbindung | 44 | 44 | 54 | 64 | 6'600 | 8'100 |
|
|
||||||
| 6 | Azure-Migration | 10 | 8 | 10 | 12 | 1'500 | 1'500 |
|
|
||||||
| 7 | Projektmanagement | 18 | 20 | 23 | 26 | 2'700 | 3'450 |
|
|
||||||
| | **Gesamt** | **330** | **323** | **378** | **432** | **49'500** | **56'700** |
|
|
||||||
|
|
||||||
### Zusammenfassung
|
|
||||||
|
|
||||||
| Szenario | Stunden | CHF (à 150/h) | Differenz zur Offerte |
|
|
||||||
|---|:-:|:-:|:-:|
|
|
||||||
| Offerte (Kostendach) | 330 | 49'500 | -- |
|
|
||||||
| Eigene Schätzung (Minimum) | 323 | 48'450 | -2% |
|
|
||||||
| **Eigene Schätzung (Mitte)** | **378** | **56'700** | **+15%** |
|
|
||||||
| Eigene Schätzung (Maximum) | 432 | 64'800 | +31% |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 4. Risikobewertung
|
|
||||||
|
|
||||||
### Risikomatrix
|
|
||||||
|
|
||||||
| # | Risiko | Wahrscheinlichkeit | Auswirkung | Betroffene Pos. | Möglicher Mehraufwand |
|
|
||||||
|---|---|:-:|:-:|---|:-:|
|
|
||||||
| R1 | Matching-Engine komplexer als erwartet | Hoch | Hoch | 2.2 | +10-15h |
|
|
||||||
| R2 | Wiki-System ohne API | Mittel | Hoch | 5.1, 5.3 | +10-20h |
|
|
||||||
| R3 | ERP-Datenqualität mangelhaft | Mittel | Mittel | 1.5, 2.2, 3.1, 4.1 | +8-16h |
|
|
||||||
| R4 | Preprocessor-Erweiterung aufwändiger | Mittel | Mittel | 1.5, 3.1, 4.1 | +8-12h |
|
|
||||||
| R5 | Frontend-Aufwand unterschätzt | Mittel | Gering | 1.2, 1.4 | +4-8h |
|
|
||||||
| R6 | KI-Modell-Qualität für SQL-Generierung | Gering | Mittel | 3, 4 | +4-8h |
|
|
||||||
|
|
||||||
### Synergien (Aufwandsreduktion durch bestehende Komponenten)
|
|
||||||
|
|
||||||
| Synergie | Geschätzte Einsparung | Betroffene Pos. |
|
|
||||||
|---|:-:|---|
|
|
||||||
| KnowledgeService/RAG existiert produktiv | 20-30h | Pos. 5 |
|
|
||||||
| ChatbotDocument-Modell existiert | 4-6h | Pos. 1.4, 2.1 |
|
|
||||||
| LangGraph modular erweiterbar | 6-10h | Pos. 3, 4, 5 |
|
|
||||||
| Prompt-Engineering über DB-Config | 2-4h | Pos. 1.1, 3.2, 4.2 |
|
|
||||||
| Excel-Export-Pattern existiert | 2-4h | Pos. 2.5 |
|
|
||||||
| **Gesamt Einsparung** | **34-54h** | |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 5. Empfehlungen
|
|
||||||
|
|
||||||
### 5.1 Zur Offerte
|
|
||||||
|
|
||||||
Die Offerte mit 330h als Kostendach ist **ambitioniert, aber bei idealem Verlauf erreichbar**. Die grössten Risiken liegen in:
|
|
||||||
- Position 2 (Kalktool): Die Matching-Engine ist die komplexeste Neuentwicklung
|
|
||||||
- Position 5 (Wiki): Komplett abhängig vom Wiki-System, das noch unklärt ist
|
|
||||||
|
|
||||||
**Empfehlung:** Offerte bei 330h als Kostendach belassen, aber intern mit 370-380h planen. Die Differenz (~40-50h) als interne Reserve einkalkulieren.
|
|
||||||
|
|
||||||
### 5.2 Priorisierung
|
|
||||||
|
|
||||||
1. **Must-Have (Prio 1):** Pos. 1 (Basics) + Pos. 6 (Azure-Migration) -- Voraussetzung für alles
|
|
||||||
2. **High-Value (Prio 2):** Pos. 2 (Kalktool) -- Höchster Kundennutzen, aber auch höchstes Risiko
|
|
||||||
3. **Quick-Win (Prio 3):** Pos. 3+4 (Materialmanagement) -- Nutzen vorhandene Architektur
|
|
||||||
4. **Abhängig (Prio 4):** Pos. 5 (Wiki) -- Erst nach Wiki-Klärung starten
|
|
||||||
|
|
||||||
### 5.3 Offene Punkte (vor Projektstart zu klären)
|
|
||||||
|
|
||||||
| # | Offener Punkt | Verantwortlich | Kritisch für |
|
|
||||||
|---|---|---|---|
|
|
||||||
| O1 | Wiki-System und Zugangsart klären | Althaus (Samuel) | Pos. 5 |
|
|
||||||
| O2 | ERP-System identifizieren und Datenstrukturen dokumentieren | Althaus (Stefan) | Pos. 1.5, 3.1, 4.1 |
|
|
||||||
| O3 | Preprocessor-Code-Review für Erweiterbarkeit | PowerOn (Entwicklung) | Pos. 1.5, 3.1, 4.1 |
|
|
||||||
| O4 | Kalktool-Vorlage erhalten und analysieren | Althaus (Reto) | Pos. 2.5 |
|
|
||||||
| O5 | Muster-Stücklisten für Matching-Test | Althaus (Reto) | Pos. 2.2 |
|
|
||||||
| O6 | Azure-Subscription-Details | Althaus | Pos. 6 |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 6. Zeitplan (2 Entwickler)
|
|
||||||
|
|
||||||
```
|
|
||||||
Woche 1-2: Kick-off + Azure-Migration (Pos. 6) + Basics 1.1-1.3
|
|
||||||
Entwickler A: Azure-Migration + 1.1 (Gesperrte Artikel)
|
|
||||||
Entwickler B: 1.2 (Chat-Verlauf Frontend) + 1.3 (Lange Antworten)
|
|
||||||
|
|
||||||
Woche 2-5: Basics 1.4-1.6 (Grundlage für Use Cases)
|
|
||||||
Entwickler A: 1.4 (File-Upload Full-Stack)
|
|
||||||
Entwickler B: 1.5 (Kundenartikelnummern + Preprocessor)
|
|
||||||
|
|
||||||
Woche 4-9: Kalktool (Pos. 2) -- längster Block, früh starten
|
|
||||||
Entwickler A: 2.1-2.2 (Upload + Matching-Engine)
|
|
||||||
Entwickler B: 2.3-2.5 (Feldergänzung + Export)
|
|
||||||
|
|
||||||
Woche 6-9: Materialmanagement 1+2 (Pos. 3+4) -- parallel zum Kalktool
|
|
||||||
Entwickler B: 3.1-3.4 + 4.1-4.5 (Preprocessor + Prompts)
|
|
||||||
(Entwickler A bleibt auf Kalktool)
|
|
||||||
|
|
||||||
Woche 9-12: Wiki-Anbindung (Pos. 5) -- nach Klärung des Wiki-Systems
|
|
||||||
Entwickler A: 5.1-5.2 (Connector + RAG-Integration)
|
|
||||||
Entwickler B: 5.3 (Delta-Sync) + Integrationstests
|
|
||||||
|
|
||||||
Woche 12-13: Integrationstests, UAT, Go-Live (Pos. 7.3)
|
|
||||||
Beide Entwickler: E2E-Tests + Deployment + Monitoring
|
|
||||||
```
|
|
||||||
|
|
||||||
**Gesamtdauer:** 12-14 Wochen
|
|
||||||
**Kritischer Pfad:** Pos. 1 → Pos. 2 (Kalktool braucht Upload + Kundenartikelnummern)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
*Dokument erstellt auf Basis der Code-Analyse des Gateway-Repository (Stand 13.04.2026)*
|
|
||||||
|
|
@ -1,143 +0,0 @@
|
||||||
# Fragenkatalog Althaus Bot v2 -- Kick-off-Vorbereitung
|
|
||||||
|
|
||||||
**Zweck:** Strukturierte Fragen für den Anforderungsworkshop mit W. Althaus AG
|
|
||||||
**Erstellt:** 13. April 2026
|
|
||||||
**Zielgruppe:** Projektleitung PowerOn + Ansprechpartner Althaus (Reto, Stefan, Samuel)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## A. Wiki-System (Ansprechpartner: Samuel)
|
|
||||||
|
|
||||||
> **Kritisch für:** Position 5 (Wiki-Anbindung) -- Aufwandsschätzung schwankt zwischen 44h und 64h je nach Wiki-System.
|
|
||||||
|
|
||||||
### A.1 Wiki-Identifikation
|
|
||||||
|
|
||||||
| # | Frage | Hintergrund |
|
|
||||||
|---|---|---|
|
|
||||||
| A1.1 | Welches Wiki-System wird eingesetzt? (z.B. Confluence, SharePoint Wiki, MediaWiki, DokuWiki, Notion, anderes) | Bestimmt die Anbindungsstrategie (API vs. Export vs. Crawling) |
|
|
||||||
| A1.2 | Wo wird das Wiki gehostet? (Cloud-SaaS, On-Premise, Azure) | Netzwerk-Zugang und Firewall-Konfiguration |
|
|
||||||
| A1.3 | Wie viele Seiten/Artikel enthält das Wiki ungefähr? | Dimensionierung der Erstindexierung und Embedding-Kosten |
|
|
||||||
| A1.4 | In welchen Formaten liegen die Inhalte vor? (reiner Text, HTML, Markdown, eingebettete PDFs/Bilder) | Bestimmt die Extraktions-Komplexität |
|
|
||||||
|
|
||||||
### A.2 Technischer Zugang
|
|
||||||
|
|
||||||
| # | Frage | Hintergrund |
|
|
||||||
|---|---|---|
|
|
||||||
| A2.1 | Gibt es eine REST-API oder ähnliche Schnittstelle zum Lesen der Wiki-Inhalte? | API-Zugang = deutlich weniger Aufwand als Crawling |
|
|
||||||
| A2.2 | Gibt es eine Export-Funktion? (z.B. XML-Export, PDF-Export, Datenbank-Dump) | Fallback wenn keine API vorhanden |
|
|
||||||
| A2.3 | Gibt es Authentifizierung (API-Key, OAuth, LDAP)? Welche Credentials werden benötigt? | Konfiguration des Connectors |
|
|
||||||
| A2.4 | Gibt es eine Change-API oder Webhooks, die bei Änderungen notifizieren? | Bestimmt den Aufwand für inkrementelle Updates (Pos. 5.3) |
|
|
||||||
| A2.5 | Gibt es Zugriffsbeschränkungen auf bestimmte Wiki-Bereiche? | RBAC-Überlegungen bei der Indexierung |
|
|
||||||
|
|
||||||
### A.3 Inhaltliche Abgrenzung
|
|
||||||
|
|
||||||
| # | Frage | Hintergrund |
|
|
||||||
|---|---|---|
|
|
||||||
| A3.1 | Soll das gesamte Wiki indexiert werden oder nur bestimmte Bereiche? | Scope-Begrenzung für Erstindexierung |
|
|
||||||
| A3.2 | Gibt es vertrauliche Inhalte, die nicht in den Chatbot einfliessen dürfen? | Datenschutz-/Compliance-Anforderung |
|
|
||||||
| A3.3 | Wie oft werden Wiki-Inhalte aktualisiert? (täglich, wöchentlich, selten) | Bestimmt die Sync-Frequenz |
|
|
||||||
| A3.4 | Welche Sprache(n) haben die Wiki-Inhalte? (Deutsch, Englisch, gemischt) | Embedding-Modell-Auswahl |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## B. ERP-System & Datenstrukturen (Ansprechpartner: Stefan)
|
|
||||||
|
|
||||||
> **Kritisch für:** Positionen 1.5, 2.2-2.3, 3.1, 4.1 -- Preprocessor-Erweiterungen und Matching-Engine.
|
|
||||||
|
|
||||||
### B.1 ERP-Identifikation
|
|
||||||
|
|
||||||
| # | Frage | Hintergrund |
|
|
||||||
|---|---|---|
|
|
||||||
| B1.1 | Welches ERP-System wird eingesetzt? (z.B. Abacus, SAP, Microsoft Dynamics, bexio, Sage) | Bestimmt Datenstruktur und Zugriffsmöglichkeiten |
|
|
||||||
| B1.2 | Wie werden die Daten aktuell an den Preprocessor geliefert? (direkter DB-Zugriff, API, Export-Datei) | Verständnis der bestehenden Datenpipeline |
|
|
||||||
| B1.3 | In welchem Rhythmus werden die Daten aktualisiert? (Echtzeit, täglich, wöchentlich) | Aktualität der Chatbot-Antworten |
|
|
||||||
|
|
||||||
### B.2 Kundenartikelnummern (Position 1.5)
|
|
||||||
|
|
||||||
| # | Frage | Hintergrund |
|
|
||||||
|---|---|---|
|
|
||||||
| B2.1 | Gibt es im ERP eine dedizierte Tabelle für Kundenartikelnummern? Wenn ja, wie heisst sie? | Preprocessor-Schema-Erweiterung |
|
|
||||||
| B2.2 | Wie ist die Zuordnung: 1 Kundenartikel → 1 ERP-Artikel, oder n:m? | Bestimmt die Mapping-Komplexität |
|
|
||||||
| B2.3 | Wie viele Kundenartikelnummern gibt es ungefähr? | Dimensionierung |
|
|
||||||
| B2.4 | Welche Felder hat die Kundenartikelnummern-Tabelle? (z.B. KundenNr, KundenArtikelNr, InterneArtikelNr, Bezeichnung) | Schema-Definition für Preprocessor |
|
|
||||||
|
|
||||||
### B.3 Bestellwesen & Materialmanagement (Positionen 3 + 4)
|
|
||||||
|
|
||||||
| # | Frage | Hintergrund |
|
|
||||||
|---|---|---|
|
|
||||||
| B3.1 | Welche ERP-Tabellen/Views gibt es für Bestellungen? (Bestellkopf, Bestellpositionen, Status) | Preprocessor-Erweiterung Pos. 3.1 |
|
|
||||||
| B3.2 | Gibt es eine Tabelle für Wareneingänge mit Datum und Menge? | Liefertermin-Treue-Berechnung Pos. 4.3 |
|
|
||||||
| B3.3 | Gibt es eine Preishistorie-Tabelle? Welche Felder enthält sie? (Datum, Preis, Lieferant, Währung) | Preisentwicklungs-Analyse Pos. 4.4 |
|
|
||||||
| B3.4 | Gibt es ein Lagerjournal mit Buchungsdaten? | KPI-Analyse Pos. 4.1 |
|
|
||||||
| B3.5 | Gibt es eine Bestandesbedarfsliste oder Dispositions-View? | Material-Analyse Pos. 3.4 |
|
|
||||||
| B3.6 | Gibt es Felder für "bestätigter Liefertermin" vs. "gewünschter Liefertermin"? | Termintreue-KPI Pos. 4.3 |
|
|
||||||
| B3.7 | Wie viele offene Bestellungen gibt es typischerweise gleichzeitig? | Performance-Dimensionierung |
|
|
||||||
|
|
||||||
### B.4 Datenqualität
|
|
||||||
|
|
||||||
| # | Frage | Hintergrund |
|
|
||||||
|---|---|---|
|
|
||||||
| B4.1 | Wie konsistent sind Lieferanten-Namen im ERP? (exakt gleich oder Varianten wie "Siemens AG" vs. "Siemens") | Matching-Qualität Pos. 2.2 |
|
|
||||||
| B4.2 | Gibt es Pflichtfelder die häufig leer sind? | Feldergänzungs-Logik Pos. 2.3 |
|
|
||||||
| B4.3 | Wie sind Preise gespeichert? (Netto, Brutto, mit/ohne MwSt., Währung) | SQL-Query-Generierung |
|
|
||||||
| B4.4 | Werden gelöschte/gesperrte Datensätze physisch oder nur logisch gelöscht? | Filter-Logik Pos. 1.1 |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## C. Kalktool (Ansprechpartner: Reto)
|
|
||||||
|
|
||||||
> **Kritisch für:** Position 2 (Kalktool) -- Höchstes Risiko in der Offerte.
|
|
||||||
|
|
||||||
### C.1 Kalktool-Vorlage
|
|
||||||
|
|
||||||
| # | Frage | Hintergrund |
|
|
||||||
|---|---|---|
|
|
||||||
| C1.1 | Können wir die aktuelle Kalktool-Vorlage (Kalktool_Aktuell_2026_V1.4.xlsx) erhalten? | Zielformat für Excel-Export Pos. 2.5 |
|
|
||||||
| C1.2 | Welche Spalten/Felder sind Pflicht in der Kalktool-Vorlage? | Feldergänzungs-Priorität Pos. 2.3 |
|
|
||||||
| C1.3 | Gibt es Formeln in der Vorlage, die erhalten bleiben müssen? | Komplexität des Excel-Exports |
|
|
||||||
| C1.4 | Welches Format haben die Kunden-Stücklisten typischerweise? (PDF, Excel, CSV) | Extraktions-Strategie Pos. 2.1 |
|
|
||||||
|
|
||||||
### C.2 Matching-Anforderungen
|
|
||||||
|
|
||||||
| # | Frage | Hintergrund |
|
|
||||||
|---|---|---|
|
|
||||||
| C2.1 | Können wir 3-5 Muster-Stücklisten von verschiedenen Kunden erhalten? | Testdaten für Matching-Engine Pos. 2.2 |
|
|
||||||
| C2.2 | Welche Identifikationsmerkmale haben Kunden-Stücklisten? (Kundenartikelnr., Hersteller-Typ, Beschreibung) | Matching-Stufen definieren |
|
|
||||||
| C2.3 | Wie hoch ist die erwartete Trefferquote beim exakten Match? (10%? 50%? 90%?) | Gewichtung exakt vs. fuzzy vs. KI |
|
|
||||||
| C2.4 | Welche Felder sollen bei nicht-eindeutigem Match als "Alternative durch KI" markiert werden? | Bestätigungs-Workflow Pos. 2.4 |
|
|
||||||
| C2.5 | Gibt es Produktgruppen, die besonders schwierig zu matchen sind? | Risikobewertung |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## D. Infrastruktur & Azure (Ansprechpartner: Stefan / IT)
|
|
||||||
|
|
||||||
| # | Frage | Hintergrund |
|
|
||||||
|---|---|---|
|
|
||||||
| D1 | Details zur neuen Azure-Subscription (Subscription-ID, Region, Resource Group) | Pos. 6 -- Migration |
|
|
||||||
| D2 | Gibt es Netzwerk-Einschränkungen (VPN, Private Endpoints, Firewall)? | Zugang Preprocessor ↔ ERP |
|
|
||||||
| D3 | Wer hat Admin-Zugang zur neuen Subscription? | Deployment-Planung |
|
|
||||||
| D4 | Gibt es Budget-Limits auf der Azure-Subscription? | Betriebskosten-Planung |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## E. Priorisierung & Vorgehensweise
|
|
||||||
|
|
||||||
| # | Frage | Hintergrund |
|
|
||||||
|---|---|---|
|
|
||||||
| E1 | Sollen alle 7 Positionen umgesetzt werden, oder gibt es eine Priorisierung? | Scope-Bestätigung |
|
|
||||||
| E2 | Gibt es einen gewünschten Go-Live-Termin? | Zeitplanung |
|
|
||||||
| E3 | Wie soll die UAT organisiert werden? (dedizierte Testphase, laufend, Key-User) | Testplanung |
|
|
||||||
| E4 | Wer sind die Pilot-User für den reaktivierten Bot? | UAT-Teilnehmer |
|
|
||||||
| E5 | Sollen Schulungen für Endanwender durchgeführt werden? (nicht in Offerte enthalten) | Ggf. Nachtragsofferte |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Nächste Schritte
|
|
||||||
|
|
||||||
1. **Vor dem Kick-off:** Fragenkatalog an Althaus senden, damit Antworten vorbereitet werden können
|
|
||||||
2. **Im Kick-off:** Fragen durchgehen, fehlende Antworten als Action Items festhalten
|
|
||||||
3. **Nach dem Kick-off:** Aufwandsschätzung anhand der Antworten finalisieren, insbesondere Pos. 2.2 (Matching) und Pos. 5 (Wiki)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
*PowerOn AG -- Vorbereitung Anforderungsworkshop Althaus Bot v2*
|
|
||||||
|
|
@ -1,223 +0,0 @@
|
||||||
# Preprocessor Assessment -- Althaus Bot v2
|
|
||||||
|
|
||||||
**Zweck:** Technische Analyse des Preprocessing-Servers für die Aufwandsschätzung der Erweiterungen
|
|
||||||
**Erstellt:** 13. April 2026
|
|
||||||
**Quellen:** Gateway-Code-Analyse (Repo nicht lokal verfügbar: github.com/valueonag/gateway_preprocessing)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 1. Ist-Zustand (abgeleitet aus Gateway-Code)
|
|
||||||
|
|
||||||
### 1.1 Infrastruktur
|
|
||||||
|
|
||||||
| Eigenschaft | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Host** | Azure App Service (Switzerland North) |
|
|
||||||
| **URL (Datenverarbeitung)** | `poweron-althaus-preprocess-prod-*.azurewebsites.net/api/v1/dataprocessor/update-db-with-config` |
|
|
||||||
| **URL (Abfragen)** | `poweron-althaus-preprocess-prod-*.azurewebsites.net/api/v1/dataquery/query` |
|
|
||||||
| **Authentifizierung** | `X-PP-API-Key` (Abfragen) / `X-DB-API-Key` (Abfragen) |
|
|
||||||
| **Status** | Deployed, ERP-Datenanbindung deaktiviert |
|
|
||||||
| **Quellcode** | `github.com/valueonag/gateway_preprocessing` (separates Repo) |
|
|
||||||
|
|
||||||
### 1.2 Aktuelle Tabellen-Konfiguration
|
|
||||||
|
|
||||||
Aus dem Automation-Template (`subAutomationTemplates.py`) extrahiert:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"tables": [
|
|
||||||
{
|
|
||||||
"name": "Artikel",
|
|
||||||
"powerbi_table_name": "Artikel",
|
|
||||||
"steps": [
|
|
||||||
{
|
|
||||||
"keep": {
|
|
||||||
"columns": [
|
|
||||||
"I_ID", "Artikelbeschrieb", "Artikelbezeichnung",
|
|
||||||
"Artikelgruppe", "Artikelkategorie", "Artikelkürzel",
|
|
||||||
"Artikelnummer", "Einheit", "Gesperrt",
|
|
||||||
"Keywords", "Lieferant", "Warengruppe"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"fillna": {
|
|
||||||
"column": "Lieferant",
|
|
||||||
"value": "Unbekannt"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Einkaufspreis",
|
|
||||||
"powerbi_table_name": "Einkaufspreis",
|
|
||||||
"steps": [
|
|
||||||
{
|
|
||||||
"to_numeric": {
|
|
||||||
"column": "EP_CHF",
|
|
||||||
"errors": "coerce"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"dropna": {
|
|
||||||
"subset": ["EP_CHF"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### 1.3 Zusätzliche Tabellen (im Chatbot referenziert, aber nicht in der Config)
|
|
||||||
|
|
||||||
Aus den SQL-Beispielen in `bridges/tools.py` und `chatbot.py`:
|
|
||||||
|
|
||||||
| Tabelle | Spalten (referenziert im Code) | Joins |
|
|
||||||
|---|---|---|
|
|
||||||
| `Lagerplatz_Artikel` | `R_ARTIKEL`, `R_LAGERPLATZ`, `S_IST_BESTAND`, `S_RESERVIERTER__BESTAND` | ON `Artikel.I_ID = Lagerplatz_Artikel.R_ARTIKEL` |
|
|
||||||
| `Lagerplatz` | `I_ID`, `Lagerplatz` (Name) | ON `Lagerplatz_Artikel.R_LAGERPLATZ = Lagerplatz.I_ID` |
|
|
||||||
|
|
||||||
Diese Tabellen sind vermutlich in einer älteren Config-Version oder direkt im Preprocessor konfiguriert.
|
|
||||||
|
|
||||||
### 1.4 API-Schnittstellen
|
|
||||||
|
|
||||||
**Abfrage-API** (genutzt vom `PreprocessorConnector`):
|
|
||||||
- Methode: `POST`
|
|
||||||
- Payload: `{"query": "SELECT ..."}`
|
|
||||||
- Header: `X-DB-API-Key: <api_key>`
|
|
||||||
- Response: `{"success": true/false, "data": [...], "row_count": N, "message": "..."}`
|
|
||||||
- Einschränkung: Nur SELECT-Queries (validiert im Gateway)
|
|
||||||
|
|
||||||
**Update-API** (genutzt vom Automation-Template):
|
|
||||||
- Methode: `POST`
|
|
||||||
- Payload: `configJson` (Tabellendefinitionen + Transformationsschritte)
|
|
||||||
- Header: `X-PP-API-Key: <secret>`
|
|
||||||
- Zweck: Datenbank mit neuer Konfiguration aktualisieren
|
|
||||||
|
|
||||||
### 1.5 Transformation-Steps (bekannte Operationen)
|
|
||||||
|
|
||||||
Aus der Config-JSON abgeleitet:
|
|
||||||
|
|
||||||
| Operation | Parameter | Beschreibung |
|
|
||||||
|---|---|---|
|
|
||||||
| `keep` | `columns: [...]` | Nur angegebene Spalten behalten |
|
|
||||||
| `fillna` | `column`, `value` | NULL-Werte ersetzen |
|
|
||||||
| `to_numeric` | `column`, `errors` | Spalte in numerischen Typ konvertieren |
|
|
||||||
| `dropna` | `subset: [...]` | Zeilen mit NULL in angegebenen Spalten entfernen |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 2. Benötigte Erweiterungen (nach Position)
|
|
||||||
|
|
||||||
### 2.1 Position 1.5: Kundenartikelnummern
|
|
||||||
|
|
||||||
**Neue Tabelle: `Kundenartikelnummer`**
|
|
||||||
|
|
||||||
| Spalte (geschätzt) | Typ | Beschreibung |
|
|
||||||
|---|---|---|
|
|
||||||
| `I_ID` | INT | Primary Key |
|
|
||||||
| `R_ARTIKEL` | INT | FK auf Artikel.I_ID |
|
|
||||||
| `Kundenummer` | VARCHAR | Kundennummer |
|
|
||||||
| `Kundenartikelnummer` | VARCHAR | Kunden-eigene Artikelnummer |
|
|
||||||
| `Bezeichnung` | VARCHAR | Kundenbezeichnung (optional) |
|
|
||||||
|
|
||||||
**Config-Erweiterung:**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"name": "Kundenartikelnummer",
|
|
||||||
"powerbi_table_name": "Kundenartikelnummer",
|
|
||||||
"steps": [
|
|
||||||
{"keep": {"columns": ["I_ID", "R_ARTIKEL", "Kundenummer", "Kundenartikelnummer", "Bezeichnung"]}}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Aufwand-Bewertung:** Falls der Preprocessor neue Tabellen per Config akzeptiert: ~2-3h Config + Test. Falls neuer Code nötig: ~6-8h.
|
|
||||||
|
|
||||||
### 2.2 Position 3.1: Bestellwesen (Materialmanagement 1)
|
|
||||||
|
|
||||||
**Neue Tabellen (geschätzt 3-4 Tabellen):**
|
|
||||||
|
|
||||||
| Tabelle | Wichtige Spalten | Zweck |
|
|
||||||
|---|---|---|
|
|
||||||
| `Bestellkopf` | ID, Bestellnummer, Lieferant, Bestelldatum, Status, Wunschtermin | Bestellübersicht |
|
|
||||||
| `Bestellposition` | ID, R_Bestellung, R_Artikel, Menge, Preis, Status, Bestätigter_Termin | Positionsdetails |
|
|
||||||
| `Wareneingang` | ID, R_Bestellung, R_Position, Eingangsdatum, Menge, Qualität | Lieferverfolgung |
|
|
||||||
| `Auftrag` | ID, Auftragsnummer, Kunde, R_Artikel, Menge, Termin | Betroffene Aufträge |
|
|
||||||
|
|
||||||
**Aufwand-Bewertung:** 4 Tabellen × ~4h pro Tabelle (Config + Code + Transformationen + Test) = ~16h. Bei komplexen Transformationen (Joins, Aggregationen): +4-6h.
|
|
||||||
|
|
||||||
### 2.3 Position 4.1: KPI-Daten (Materialmanagement 2)
|
|
||||||
|
|
||||||
**Neue Tabellen/Views (geschätzt 3-4):**
|
|
||||||
|
|
||||||
| Tabelle/View | Wichtige Spalten | Zweck |
|
|
||||||
|---|---|---|
|
|
||||||
| `Lagerjournal` | ID, R_Artikel, Buchungsdatum, Menge, Typ | Lagerbewegungen |
|
|
||||||
| `Preishistorie` | ID, R_Artikel, R_Lieferant, Datum, Preis, Währung | Preisentwicklung |
|
|
||||||
| `Bestandesbedarfsliste` | R_Artikel, Bedarf, Bestand, Fehlmenge, Datum | Dispositionsplanung |
|
|
||||||
| `View_Termintreue` | R_Lieferant, Wunschtermin, Bestätigt, Geliefert, Abweichung_Tage | Aggregierte KPIs |
|
|
||||||
|
|
||||||
**Aufwand-Bewertung:** 4 Tabellen/Views × ~4h = ~16h. Aggregierte Views (Termintreue): +4-6h für Berechnungslogik im Preprocessor.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 3. Gesamtbewertung Preprocessor-Erweiterungen
|
|
||||||
|
|
||||||
### 3.1 Zusammenfassung
|
|
||||||
|
|
||||||
| Position | Neue Tabellen | Config-Aufwand | Code-Aufwand | Test | Gesamt |
|
|
||||||
|---|:-:|:-:|:-:|:-:|:-:|
|
|
||||||
| 1.5 (Kundenartikelnummern) | 1 | 1h | 3-5h | 2h | **6-8h** |
|
|
||||||
| 3.1 (Bestellwesen) | 3-4 | 2h | 8-12h | 4h | **14-18h** |
|
|
||||||
| 4.1 (KPIs) | 3-4 | 2h | 8-12h | 4h | **14-18h** |
|
|
||||||
| **Gesamt** | **7-9** | **5h** | **19-29h** | **10h** | **34-44h** |
|
|
||||||
|
|
||||||
### 3.2 Offene Fragen (Code-Review des Preprocessor-Repos erforderlich)
|
|
||||||
|
|
||||||
| # | Frage | Auswirkung |
|
|
||||||
|---|---|---|
|
|
||||||
| P1 | Unterstützt der Preprocessor neue Tabellen per Config-Erweiterung, oder muss für jede Tabelle Code geschrieben werden? | Bestimmt ob Config-only (~2h/Tabelle) oder Code (~4h/Tabelle) |
|
|
||||||
| P2 | Können aggregierte Views/Berechnungen im Preprocessor definiert werden? | Termintreue-KPI, Bestandsreichweite |
|
|
||||||
| P3 | Wie werden Joins zwischen Tabellen gehandhabt? (SQLite-seitig oder Preprocessor-seitig) | Komplexität der Cross-Table-Queries |
|
|
||||||
| P4 | Gibt es Rate-Limits oder Grössen-Limits bei der Query-API? | Performance bei komplexen KPI-Abfragen |
|
|
||||||
| P5 | Wie gross ist die aktuelle SQLite-Datenbank? Wie viele Artikel? | Dimensionierung für 8-10 neue Tabellen |
|
|
||||||
|
|
||||||
### 3.3 Empfehlung
|
|
||||||
|
|
||||||
**Vor Projektstart sollte ein Code-Review des Preprocessor-Repos durchgeführt werden** (geschätzter Aufwand: 2-4h). Dabei klären:
|
|
||||||
|
|
||||||
1. Erweiterbarkeit: Kann der Preprocessor neue Tabellen per Config akzeptieren?
|
|
||||||
2. Transformationen: Welche Operationen sind neben `keep`, `fillna`, `to_numeric`, `dropna` verfügbar?
|
|
||||||
3. Performance: Wie skaliert die SQLite-DB mit 8-10 zusätzlichen Tabellen?
|
|
||||||
4. Deployment: Wie wird der Preprocessor deployed? (CI/CD, manuell, Azure DevOps)
|
|
||||||
|
|
||||||
Das Ergebnis dieses Reviews kann die Aufwandsschätzung für Pos. 1.5, 3.1 und 4.1 um jeweils 4-6h nach oben oder unten korrigieren.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 4. Aktueller Datenfluss (zur Referenz)
|
|
||||||
|
|
||||||
```
|
|
||||||
ERP (Althaus)
|
|
||||||
│
|
|
||||||
▼ (Power BI Export / API / DB-Zugriff -- Mechanismus unklar)
|
|
||||||
Preprocessor Server (Azure)
|
|
||||||
│
|
|
||||||
├── /api/v1/dataprocessor/update-db-with-config ← Automation-Template
|
|
||||||
│ (Tabellen laden, transformieren, in SQLite schreiben)
|
|
||||||
│
|
|
||||||
└── /api/v1/dataquery/query ← PreprocessorConnector (Gateway)
|
|
||||||
(SQL SELECT auf SQLite ausführen)
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
Gateway (Chatbot LangGraph)
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
React Frontend (Chat-UI)
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
*Assessment erstellt auf Basis der Gateway-Code-Analyse. Für eine genauere Schätzung ist ein Code-Review des Preprocessor-Repos erforderlich.*
|
|
||||||
|
|
@ -71,7 +71,7 @@ Connector_AiMistral_API_SECRET = DEV_ENC:Z0FBQUFBQnFCdlFlelh2T2hqNGcxV0hMV1FKbmF
|
||||||
Service_MSFT_TENANT_ID = common
|
Service_MSFT_TENANT_ID = common
|
||||||
|
|
||||||
# Google Cloud Speech Services configuration
|
# Google Cloud Speech Services configuration
|
||||||
Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0=
|
Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQnFIc3YtU0x4LTlHbTY1NUVGY2V2bUdmck85dDh1ZWVKa2ktR0N6NjdlTGFrUHMybVQ2bVRLN01XNFRZR2lyN0ZNSHhzWVVGNnVtZjRjV2hhR0ViTDYwT25lSmxJY0pSTkl3OUEyT0JxMFVYRndfUFJudExMajdTYUNXS01JU2lhQzZmNWFYdXA4aVZ5Zkh4Zko1Z00tcEE5ZFEwQkFVa1oyR296YXozRFI2WUdXN0ZSREFFclFNaTd6OUVlSmFxS1BTSlNJbnlWNHNfbkk4QzVOUGlkMzdfQUZxUlJOVEZzUlN1aWRWY01JZmlRM0JNZE1EZ3BmbW10c3BDdERpa2FMakstQUlqVEVlRC1hUmZoeFVoQ3pYNXRlRFVSTlI3ekJrU0QwSHBSaWxiSGU0akFGMXUtY2Q0RnUzS0tPOEQtcTdVdWhQeHFDM1hRRVVMcUxCeklvWHNWRUN2bjVHZUUwLTVtaGpUbWdPUnJabWlIcHZ5UjNtN0NMTUNRN29ZRGVXU28xQmhJTVg2eEZnaUdrcW9UVklHMHJycm1nT0JkdGJReVVHeV8tYm12UDlOU0lpNHFidXBQbUFSSVVmWUl1M1BVMFFncm0xSldkVzBrb2poRFMyaVUwcUZvMHl0QlZIZ1h1MjZwR3AtZWhqdzN4UVhtT2hUa1lQU3VudzNXdW1FcVY3VnQ3RmpkQnFQemlrQlF3WGhBNWxOZXJ6Zm9KVFlEZExUXzlqODhYaFNNMzVWTzFNMmVTcWdodDZoRmZTUzlhLVlOSU5fYW1vNXctaFpFMC1pUllRZW11d1JQN25sbldHVjI1anc2UC1ycndjTGtxWk55WmpJeU1wOVR0RnlTdFpad1dkRmlUNDE0d240TDlKc3JFUXdOYzd5UTFYSXUzLTQ2Y1ZGcWE3R2RyQ0I1WDMtMHBScEFzZDV4UEkyanh4ckJZUjdTYnJGZjAxQkU3MEJ6OXdybGRaWHNod1hZZEhVOXRpMWRLbVJsRGd0UDRDN3JsRzF4T0RpcnczRU5TM0RKVjVkWTRqNTl6bmhQdmdvaEg1U2kya0QtQ0l4ZHVUcGxkNi1vNVVVOEcyWXhxZWc5N1lKMk4tT0o3ZFVzYjJtT3NVZFJiSTFNUnpaSmFOeDZaLWVpZlc0VUhZRHdXOUMyQ3cwaXBQUDRJN1g1YkwzaTFiRVRxRFY5UTdZU1dSaGR6NUw3aEtac2RENXF3WEpVN0dXVTlQR0F6MFlpWl83MU44NVR1ZUtPVUNlZ205YUIwOFoxUDBvTlI0SU52emVvQ3VZXy1jTlFXRWZXQ0d5RHJ0eV9JeE5wMHl0b3FVSjNoVzg2d21hYVNYY3Q0dkFaVEZwa09tRnFBbEtoOUlGY2xkeVJoZGYzQUxYNFZfb0ZiaU5VRjJPbGhieXYtWTFKckZwenVCUGFva1IwVVFORVQ4SDMxWHVuRWhBRGd0cVlsc3kyQ0RyY2ZIVDlwcGh5ampySV9uOVpsVmlWbGoxMEg3SXh6NzRJbmZXRlhMMWc0RXhzeWtnQlJ0VnZSdENkbEpOdENwUzItUjZhZWFYRFhzbDM1WDBxaGFPX19CSG1KZjRTTU5JemcxZzJRSFY5bkx4TTlIZFNHOW1USWxBYWhEZ1FSNVdSSDJETUZwMi1Hd0RESkF2cVA1TVJGTEtPUl9oN3gzVEIwSzZOVzlOWXhNa2I1Vzc1SV9tdENfRy1rQTNzRlZGSTYwQmJIaGswZUNWSnRDVXFfdWFCckZZcnJOT2Rfb3FrcWI4S1lVRTMyRnZJQTRZV1VsU0xobGRjekhtbG9LamR2d1hfVklsM3JBeW9SRzJnWVdiWDRzN1ltcXdSVGoxRVBvczViVXNjMUxBazZUdS1WbkRQX0h1MzdNd3ltVDUzd2FGdi1XeUMybV9ia1YxQVBPdnUxY1dfT2M5eEpZR2JHMkdZbWdDZTRERXRYOWxodndkTXltVW40c0t0bVA5YWxuRzM3LWlCdmJiYmF5dkNBY3ozbUw1Zm5zRmpBdk5ORmFZRWJKM3Q2UDdKNl9zaUV5eVVGbkF0QmZSZzk5dGo3UjNIQWxwcjRlVTdUT2s1VGFjdndvX2c3d1VmaHRMZU10M1ZKVk9Ma3dZb1kwYVV5Z2NlTjUxdUYtZXRnRTRzQlp1aFp0OUF5TVBwN1gzU21kRmJ6OUlOeUFOOEhEOU5WSENNZndvLXdoVUFJYVFDTWEyakJEcTVSVDhJOWJscU8taThqNUZkdThCOUlXcldndFBTZk9QVnlMaUphUU5sUktpb1plZDZOQnFzNFNMUzRWbWFVQWhUWmJfem96X0cxWXVTcUxCeDhOc3E2OEpFa2lzWHFIV0p3eGdBZmN1aXBhYjExZTZqaUY4S0ZudTNhcUx2WlpuTU9lNUk2ZmNyN0JCODdYMGNEU2JsZkZXYlRFaTJQUTI5RU5SMmtkV1NHQTVTTjEyZGZLYnhTNTg2Nl9aaWJqX2Q1U1NwQ3pRTGRBSUw0N3FNQ0ItMks1QVZmbURYVWdHMWFZTWhGNURVOUg0bGVuMUozanlxTnRwbVlGX2RnN2FBVTZlZjhDaXVzZEtVR1Z5azhzWHRrS1dYSG9rYkowTjQ1N0hyRWdNVWMya1ZmWmZvSnVTdHNiMHFDODNLckpjQ081SFlieGxuM0picGhKMnNQRURwY2hpQzF3dHRnNEFWcUlPYjVxZEhod0JDbWZhU01Ob21UWmRwd0NQRlpjOE5CUFBOT004U2JKNkFSUlFzRklYZGJobUoxQzZzT2wzZ3J1Z05aYThRVVNzcFktMGJDcXFfSkxVS2hhajI3dTdrR2poa21ZM3Z4UzFRblFsOFlOZVVUM0YxaFRuNjFWQ2E4ZlhvZjZpMWFtOGRuaGx0MTZxZE9TY1dsTTMyMHhsNXJ2MkduaGRkZXpYUWJ3cEt1U3YwMC1IRzM5eWRCb0lvaUhTQ2R4XzhEZl9zRk5GeHhCSWx2X3BkUkJ4NFZLVzdVRFZkbnpNNkpjUTFHY1pDV0ZOMFBaNTVpLUlmSnFrX1N5X05MTjRUeTVERUs5MG9kMFJ3di03U3BpMUM4YXNwaG1fangwYURIVjBpSVdCUkt4UW5HbWtGOUh3TUdPZjMxYXpVZDcwTmlDcTR6WldZb3VzbHRpRUgyN2lFTjlpUV85T0M4blJxMWx0cC1iU0FDOHhueDBLYjdLZGhNbjFPbE1RdmhhNlEzX3ZpT2ZsYllwNkU5TE9fZWFabDE4RWRoRWxiMk5aVFZrWmxjaW5MX1VrUGhUN29vbU1tWldESnczYTNBQ1RPd1VTNGNJdjdJU3p3QXZQLVlDNkQ1cTh4Rk1WNnRMUi1DT3VGREFPa28xejc2NUl1dzJSa2hCTlJublBRNGkydlJVRjlFbFotOWtraWFqQkNNTXBpT1hZM0NXNEpObGMxQUNuS29rOExMSnMxT3NLbjNfLTdpQW1BcDMxR1RZdVRvbElGbENWbHJqRlVrTXhYbFdiMmItUzlxR2ZxT2FCWXpMVVJYZXBfSFVwNTczU3JHUVhET3hSWm80Ry1KcE9mV3FYejVHSEVSS0pxOUtCc3V2VHNFVkRqYk5Od20tM0ttdFQ1eGdsc091WGFYNFgybzNVd3ZvbzEwUDJ0T0hvTVd3YnlHNnpNWC0wbkJOQTIwQ3VYdlUzaXY5NFhDNlNOOW9UdGZNUk4zZ0VJakpwS21SZlJtQjVWLUxfejFYZFc1cjRwR3ZUOGdZb2VJaTdJUS1MYlRJb0ZFYW9uYzM3MDd4b09BR1pnTEh3RFpnaGhxZURQamllNUhqTHg0cHJfN08wMkdGSVQwQUlqWDhLVGViY3J5NlVFTzY3RGhGQ0R6aXNsb2w4dnBVYndTd1Jhd3IwS1BxY0h1X05RcGsySzVNbXR5YlBVQi1IOGFUNkh5QjhRZk5BQmZvcGF6ZTNXenZkdy1GRjFGdE1saGdMSnotUkIyX1VqTlZFWnJER1YyNGQtMFZHU3hmRVNPUWFCdXV3QUxzOGVSbF9EdEZGUFNxbTdiYm5oWHdYak5qa3Zoem5WY1ZUdDREVUxGX0VQeS1jckhqS2lRLXQ1Y2tyOFRjYnVhajNUZmZOUE9kbU9PYXdqdk5DYUtEOVFiMW9yZTYxMFNUaDdvUTExUFZ1bklYSkRKTnJ1RURvOTR3ODREcWdWeHpRS2RETjZqeXpvbUpxMW5lWl84RzVocmJFQ3JfZlpMd3RCZEo5RWZ0MzIxNWV6bHlwdWJJWXhoaWxlM2FHSjBhWG14Sk94ZV96cXFvU1JwWDdKZldmZWdvdWVKdXVfaS1jZjdENXQzSzNyb1d3eWhUMU53QzgxemRiTTlkdFRxZU1OdEN5c1kxOEd2MTJMcnBJWEE0eXdJdFpOYVNMQTNLR292UFlGb0Ztdz0=
|
||||||
|
|
||||||
# Teamsbot Browser Bot Service
|
# Teamsbot Browser Bot Service
|
||||||
# For local testing: run the bot locally with `npm run dev` in service-teams-browser-bot
|
# For local testing: run the bot locally with `npm run dev` in service-teams-browser-bot
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,7 @@ Connector_AiMistral_API_SECRET = INT_ENC:Z0FBQUFBQnFCdlFlU2tMLTFnQWhET2Nia2pTcVp
|
||||||
Service_MSFT_TENANT_ID = common
|
Service_MSFT_TENANT_ID = common
|
||||||
|
|
||||||
# Google Cloud Speech Services configuration
|
# Google Cloud Speech Services configuration
|
||||||
Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0=
|
Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQnFIc3YtSjhlcklrU2JCOW5mdHFHd0dLTUZZZk9PT3o5RWt5RjAxX2s3ekJRLUUzU0dNSnNseTE4bUpNTnZSTWg0QV9mWm5iX19aWjV4YnRXU1JBSm1INVB5dXNRT2JiYk1tLWRSS29pdTRMdS1lMDZxMkx4VTh3bU5aVWh3cEwyOE1QcXVockgtZWh5bzdNVXQyemFuSmZqRzZZYmNGN21JdjNwNWpPRXB6WU1qSU5rZUVSb3JBS0lhcThvakkwbTRUUHhBdjRZdWNsZ1Z1RmFaNGZLcEpaNVNLdFAxYzFXdTJydU9COWJ0bkNyYUF2X2FNc1BfT05teEs1SE9PeGhPd3VJSFY2VFJ5VEl6V3R3bzd6OTVKTEVRcmt5ZzdBMXBFY1A5dUFJRFJONFBlaDlJcjNBQnBraC0wMTBhNW8wYWZaeHNWclVTOVotLTdWSmVuYzJKcUZSUkdrdXB3VEVESzd4UTI0bGd6SzdCajdoazZXVTVCaGRiaWJaOHg5Z2thSWItcS05U25DbUdrT2M1QV81WEg2dlJfMlBtZU9Bc3V5bmtBWHRoRUVLR2lWNHY3M3hHcU1raFRFOWQwSEtUU1RDWDFRNFlkNHVnTkZDbk5zS3RZeGR2Z015RnRGc3NndFVEQjc4bVpNeE81bXc1MnQ2QjNZeHZCbUJJZVJ2TE5xWEd4M3hHT2hJWW5DOWMxQlNmZE9uMVRGVnRwTUlXZjZCRUZBLU9GWVZGWFpZbUE3WVlpZU1DX1Z0bWQ0bjlaRThHOE9WR3VOVzlYWS1JampTNmxkNmFxWG54WDJjallIT3UyT0tGSzJpeG1tX0JoQjZxbEpESHBhMWZFa205bjdvTVFwSVVidnVzdURZVDAzVVpkekJ2SVZTZmhxQVJ2OWpuRGR2WFE3elMtb3B2ZzhpQVNvRmkzbzRrY1BuamVzM0E2eVM0bXBHTHgtYmhsVG5jNlB1Q1JHZU9HUlNfaTJSQkcwS2FSZnZSOW9oZzdXa1RUVTVTZTgwY01GYXQyQ0xWX1Fnb0xaOTRQY3hTclgweVJ5clc5OVpRWWlDb0JQVXoxVDA0bW8zUE55aGowb1ZZNEpBN2UtSTZTY2llRGhISFFkYWFYVlVBQ0IzbGxzVTQ2V2dsUGV1Y2I5bEZLRnlwdXRHMWZVcnBaTXNzNzNkUVFqR2xnSEQ1VlpTdXpwMFVVYjQ0enFlUnk0d3dDQUtSS1dUVnNyYnBKQW9TRjJxN2JNY2NhRWNONWRpWU5RbzNNZVJBS3EzN2ZMZ1E5VXQtMDFTZklLY1JiSDNYRlFuOF9VYUktS0xoY2IyR0xkT19qTEpIV1p6RFExUWNCQTdqN1kyS0Jaa2lyMDluenc1MS1vdmhPVlE5OUphWEY2dXFYNE04Z3lBUG5DNGZjTUVnYzEzYWhzTHpMdVBzT0dzRGJaT2x5b0pVbWJtUzJxdEd2VGtrc01kTlNPNURoVHhwZzU1d3pTZGJiTUZIME5tQ0xqNWJ2QS1QSEJHV2FEOExHWDByV19rVnc2R2pibnNENEo1cTh4bGNMX2ZpSTBMcjRvQWRhbW5xYVBiZkZzWTRERlVESEU2aHpvdzNMTjlCazRYeEJhMmZwdXY5T25IYkFTaUM3SmdIV1FCX2xxRXctWHZQOHgxLXI1c1JkWmcydkFTUmxFSU03cGtnallnTXplOElQbEJRSEE2aW5KREU0YUxwX25wOFhuS2RIbms1dXNIRHBtNjFtb3B3UGVGb0hwOENKM1hMclBwa3NBa2pFYnZYbEtFbUF0Y3pmeFRmMDNMaTZrR1BZWnBrNUQ1WlU1NVZQSWUxN3dwcXhhcjdXNTl4LVVpYVF3Y0wtRmFyNXZRNTE3UUc2cHVaVVNpaVdHbXRqQVJNZWZmNjdQQ2lwTGd6RFFZN2tSY2NEdmxvaXk4MTZMcmg0VGo3MTN2R2V6cmV3YjdQVlNEZTQySUpaY2pkTHZzUzdJLVJ2WnlOQ3Vmem5FZXRaWjBMWjF4ZEF3ZHJ4VF8tMVNsRnljejVsaEpGOU5JbnhydjNVdzNMOENrWUVsbXp0ZEhuVE1Vd0RJcnp2N0RXUGFuNDM2OXBPbV9LRDUwTWk1NHYwaDhlVEhKUmtEa09INURwNjV5ZE1VWmpRSGdjeXJNc3FqcjZDdmx5WXluNWZ2VlpsWmR2TXVXVnBubEFmQlRfaGRwRndCVXVkMjkyLWVhaDQtZDN1cmFZLUoybGRwbGQ5MTExU2NnZ2lueVNfSjFDQ2NkWGtNX2M1T2I4YnVJOUFueGIxbG1EYlZOcFYtQlE3cm90SE40X0ZjalhLdXM5S2l5aW84ZUJPMlR4MU9EVkhZcHdrX1Zqc0NhWEJacDZHMzQwSzdkdi1Rd2s4Y1dfLS1ES0NfYTNxYl84UTN1S0lIM0pVTTNEYlJ0YW55Tk4yVjBONXNTQWtVZTJ2V3B5eHBJcG9IWGRMMklob0hMbVVZZzJKbTFMUExOQm5HSEZzWHU0VGVIWlJMVzFLeFB0NkkyWFkwWk0wdjdHRmxSWFFoSkJ2Vm5NUWNQQlp6YWlIc2NKLUdhOVVycHd5N3NFMDNVWlAxZGQ1NzRGbm9LcWxEb2tKR1RnVEtvRUc1d3l4aU1IOUQ5RldUT3Z0a3lpRHpVSWJ4MjU4RWY5MEpCQ0VFdHNMbnkxOGswcE44QzJwNXFCVGpIa0VGc2VNXy1qdzVNRU9DaXg2MW9VX3FjUk41QVFVLURwVGFLRTkyNWlENy1IcGZjNW9wY0Y5Q3d5eFg5emVUUF9hV3ZTQWNaNEN0VzdJRlFBR0picXJoUERacWNLbDZhTE8wdWlfZ3kxd2QzOXBOZV9uaUNGMkNJbGhNd3k0S2t3dTRGWVVxTTFRRlg3Ui1zLW1FLU1Mai1yaURjb2Fob2c4MDUyRHN5aldUVWMxLTVNbm5VQTdrYy0zLVFyOHRkNzZ3dGdhbXZXN3JHNkdfZ2RuRXFDM3R2TVB1cDNOdWZGTmpFNnNFTmMxTmFuZDdJUld5bERyQkJ0TGZXRk54NEdqN09hSmVMYV91NXUwNXFvMl9KV0hBNlB4bklNQ2U5WGZLUTdlX2dJenVGcDYwWHBsdTNpbE5mWGhWeXFuUkFPV0puR2h0RkhrR2MwTzJGUmp4bUR6UFlUWTlNbTJLa19hTUZZR0dscVpBbFBReTBRMDNseXo4SXNnZWt4VFdpOERqLV9ZczRkR0QwRFJQM0pqdHluWktDUlp6WU9XSjVNZi1tYnNzcVlGTDRFMzNlSmRTazFfTkNxSjAwM0wxNk9Sd2h1SWpfOW5MVWMtVXYyYlVZR0VuaHRpN1pnNnpHME5raVBMd2h2dDRyMV8yZGFJNnlkcmhtSWdmNlpLN19NcjNkc002dXFxQzhTaDZzRlgzNUJ1SzVpVnp6NVU1Y2luUlM4UEJoajNTOUJadnE1MlhzV0kxSzBObXkteVhNM3RKYW9heDVWWFJ1NGlDM0l0elRPbThwUU9oYkVkbC1PZFNLSHY3WHJiZWpEamNIVC00MlNNWV9qcHdjNDRjRlVhZXlrLTlicVBNaDlDeXdRb0Fwc3RmUGFvbURQZ29yckliaS1VUDNxcXVlYTJJRUhXNUVobk1KUDhHZE16UzBLeDViYVRwZWY3d2w0d253eEZYcExKRGpsaGlBUElaTzB3eUVadnROX1dabENGb3R4ZF9aS05KY0dHTVZaYzRFc1Z4TlZGbFd2NjdYRzJMTzVwU2NaN1Y3MzQ2Z2pzV2RSMzJBbjg0MEhaZmhoREloY0oxOFdjNDZNdVZfYlRKU1Q1M2hYdHgwUjVsTV9USjZCZXlQTTdNRWc3bUxOcXRDVkpTdnJxR0hkWWpaRUdrOEFyNHk4MENwVzdob0hUSkJvam4zZW1kcGxZUjg0RXFRNnBxSUg1MDVHdHRwVlFkWWhHM0ZyZVFvMF96R2V5YjBuMnVZTU5CQ3pVci16SGJlQTQtbnFLa1E2eHFncUg3UmYyYlZvOF82a3d2ZE4tbmxIUlNYYjlrck9QYk5CcV9faXludS1yem1JNjFBdVYyb21RQWFMMFkxX0s1TjQ4czZ2WXI3X0FzRWdNTlZndHl4bnVOTHl2YlZfaURQV053dHl4N1czRFdzaVFnRHB0MWRDV2ZuU2lzX1NZZkRQYzhsT3ItZWw0dVJlVmtFWUM5cEppOGxuYVdpQkN5dV9hQ2dodTJvV3REVkw2dVVDaGtvc0Zqd0V2dldLZEVNRVRRNVRUVmw5aHZmZEpHdk1wS0xwRFc5Vmx4dTdfdGZDRUtCU29qdEVIOW5VdjBmeGpFMFZHSUthamtVN1E2bDZqaEFackVSQnZMN0tyaUhIcUs1ZHMzMzl2TnhadGIwZW5QNS1BM3pSODY3WVFsLU1jeUpCMG1PWmhPVT0=
|
||||||
|
|
||||||
# Teamsbot Browser Bot Service (service-main-teams-browser-bot on Infomaniak)
|
# Teamsbot Browser Bot Service (service-main-teams-browser-bot on Infomaniak)
|
||||||
TEAMSBOT_BROWSER_BOT_URL = http://teamsbot.poweron.swiss:4100
|
TEAMSBOT_BROWSER_BOT_URL = http://teamsbot.poweron.swiss:4100
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,7 @@ Connector_AiMistral_API_SECRET = PROD_ENC:Z0FBQUFBQnFGdnVLRHplbzNheDhIdndsU0xUeG
|
||||||
Service_MSFT_TENANT_ID = common
|
Service_MSFT_TENANT_ID = common
|
||||||
|
|
||||||
# Google Cloud Speech Services configuration
|
# Google Cloud Speech Services configuration
|
||||||
Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnBDM1Z4NFQxaF9uN3h1cVB6dnZid1c1R1VfNDlSQ1NHMEVDZWtKanpMQ29CLXc1MXBqRm1hQ0YtWVhaejBMY1ZTOEFEVlpWQ3hrYkFza1E2RDNsYkdMMndNR0VGNTMwVDRGdURJY3hyaVFxVjEtSEYwNHJzeWM3WmlpZW9jU2E3NTgycEV2allqQ3dJRTNyRFAzaDJ6dklKeXpNRkJhYjFzUkptN2dpbkNpMklrcGxuZl9vTkt3T0JvNm1YTXd5UlkwZWptUXdWVFpnV2J4X3J2WUhIUlFkSElFVnlqMnlJRnNHTnlpMWs2R1dZc2ROWjNYZG85cndmd1E5cUZnVmZRYnVjTG43dXFmSWd2bGFfVWFWSmtpWkpndWNlSUNwcnFNU2NqZXFaV0xsY3l3SElLRkVHcHZGZERKV1ltcGhTS0dhTko1VTJLYzNoZjRkSGVEX3dTMWVVTmdDczV5cE1JQUdSbUJGUm11eFhTVjJHbkt0SzB4UG1Dc2xmbnp1Y041Y2RTeWRuWGdmQy1sTGx0MGtnM2VJQ3EyLXViRlNhTU9ybzZkR1N1bXE5SXhlZENWRFpWSGlYOWx4SUQ3UlR0ZEVxQkxNakRUVFRiUmFnbklOalphLUZkRFVVaXBRUk5NZW5PaUZydTFmQkNPSTdTVTNZd0plWXllNVFJdmN4MVcyTGlwMGFtVjBzOGRxR1FjbzhfYW5zdTB0ZEZBTTJhakltazh1dktNMUZsOUItdFdTb1pIaUxySllXNkdlY20zUS0wTnpFNTB2SU5acG1VcXhyaHBmME8takw3RDh5T043T2VGOV92TzNya2pWSlpYVjZDdXlZcjM3a0hPTlhkaW9oQmxqQlpGRFYyTTY4WmZmT3k4Tk1tdXRuSGdTUVpNT2NKenhXb05PdXBfSEdhMTNxNjdpNXlKUUI2YUgydFFPX1VvXzVJb0UxWTU2YVNiNDQ0QndZanhMMHR1cGdHWGhvcEg1QXEtSXZJdTdZUE12ZEVVWkF4QmtsQS1GYnY3SFIxSHlsOGVfcEpGS1A4QUVEQWNEOFZYYlljQ3ByTU03YU16Y0UzUnJQZEprSWNjT1ZXVEtDWi03Y3ZzRVdYUTlabXJISEo5THRHVXVuM0xqbzA4bGVlZVpOMk1QMmptb21tV0pTMlVoOXdWVU95UW1iQmttc2w1RG9mMWwxXzg1T2IxYUVmTUJEZkpUdTFDTzZ3RlBFeUFiX01iRTZNWkNaSG45TkFOM2pzbUJRZ2N0VFpoejJUTG1RODY3TzZpSzVkYUQzaEpfY2pSTkRzU0VpanlkdXVQQmJ2WU5peno4QWNLTDVxZTlhSHI3NnNiM0k0Y3JkQ0xaOU05bGtsQl8zQklvaktWSDZ4aVp2MHlYelJuUDJyTU9CZC1OZjJxNFc1dDcwSUlxaVh1LTMyWWFwU0IwUU9kOUFpMWpnOERtLTh1VmJiNGVwcXBMbU5fMjVZc0hFbmxQT2puSFd1ZGpyTkphLU5sVlBZWWxrWEZrWGJQWmVkN19tZFZfZ1l1V3pSWlA0V0ZxM2lrWnl2NU9WeTdCbDROSmhfeENKTFhMVXk1d195S2JMUFJoRXZjcVo4V2g0MTNKRnZhUE1wRkNPM3FZOGdVazJPeW5PSGpuZnFGTTdJMkRnam5rUlV6NFlqODlIelRYaEN5VjdJNnVwbllNODNCTFRHMWlXbmM1VlRxbXB3Wm9LRjVrQUpjYzRNMThUMWwwSVhBMUlyamtPZnE4R0o4bEdHay1zMjR5RDJkZ1lYRHZaNHVHU2otR3ZpN25LZlEySEU0UmdTNzJGVHNWQXMyb0dVMV9WUE13ODhZWUFaakxGOWZieGNXZkNYRnV5djEyWTZLcmdrajRBLU1rS1Z0VVRkOWlDMU9fMGVmYXFhZXJGMUhpNkdmb2hkbzZ1OWV6VlNmVzNISjVYTFh6SjJNdWR5MWZidE8yVEo2dnRrZXhMRXBPczUwTG13OGhNUVpIQm0zQmRKRnJ0Nl8wNW1Ob0dHRDVpU0NWREV3TkY2SjktdVBkMFU1ZXBmSFpHQ3FHNTRZdTJvaExpZVEtLTU4YTVyeFBpNDdEajZtWUc4c1dBeUJqQ3NIY1NLS0FIMUxGZzZxNFNkOG9ORGNHWWJCVnZuNnJVTEtoQi1mRTZyUl81ZWJJMi1KOGdERzBhNVRZeHRYUUlqY2JvMFlaNHhWMU9pWFFiZjdaLUhkaG15TTBPZVlkS2R5UVdENTI4QVFiY1RJV0ZNZnlpVWxfZmlnN1BXbGdrbjFGUkhzYl9qeHBxVVJacUE4bjZETENHVFpSamh0NVpOM2hMYTZjYzBuS3J0a3hhZGxSM1V5UHd2OTU3ZHY0Yy1xWDBkWUk0Ymp0MWVrS3YzSktKODhQZnY3QTZ1Wm1VZkZJbS1jamdreks1ZlhpQjFOUDFiOHJ2Nm9NcmdTdU5LQXV2RkZWZEFNZnVKUjVwcVY3dDdhQnpmRVJ6SmlvVXpDM0ZiYXh5bGE2X04tTE9qZ3BiTnN3TF9ZaFRxSUpjNjB1dXZBcy1TZHRHTjFjSUR3WUl4cE9VNzB5Rkk4U3Z1SVZYTl9sYXlZVk83UnFrMlVmcnBpam9lRUlCY19DdVJwOXl2TVVDV1pMRFZTZk9MY3Z1eXA0MnhGazc5YllQaWtOeTc4NjlOa2lGY05RRzY1cG9nbGpYelc4c3FicWxWRkg0YzRSamFlQ19zOU14YWJreU9pNDREZVJ3a0REMUxGTzF1XzI1bEF3VXVZRjlBeWFiLXJsOXgza3VZem1WckhWSnVNbDBNcldadU8xQ3RwOTl5NGgtVlR0QklCLWl5WkE4V1FlQTBCOVU1RE9sQlRrYUNZOGdfUmEwbEZvUTFGUEFWVmQ4V1FhOU9VNjZqemRpZm1sUDhZQTJ0YVBRbWZldkF5THV4QXpfdUtNZ0tlcGdSRFM3c0lDOTNQbnBxdmxYYWNpTmI3MW9BMlZIdTQ5RldudHpNQWQ5NDNPLVVTLXVVNzdHZXh4UXpZa3dVa2J4dTFDV1RkYjRnWXU2M3lJekRYWGNMcWU5OVh6U2xZWDh6MmpqcnpiOHlnMjA5S3RFQm1NZjNSM21adkVnTUpSYVhkTzNkNnJCTmljY0x1cl9kMkx3UHhySjZEdHREanZERzNEUTFlTkR0NWlBczAtdmFGTjdZNVpTMlkxV2czYW5RN2lqemg4eUViZDV6RjdKNXdFcUlvcVhoNkJ6eVJkR1pua1hnNzQwOEs2TXJYSlpGcW9qRDU2QjBOWFFtdXBJRkRKbmdZUF9ZSmRPVEtvUjVhLTV1NjdXQjRhS0duaEtJb2FrQnNjUTRvdFMxdkdTNk1NYlFHUFhhYTJ1eUN3WHN4UlJ4UjdrZjY0SzFGYWVFN1k0cGJnc1RjNmFUenR4NHljbVhablZSWHZmUVN3cXRHNjhsX1BSZWEzdTJUZFA0S2pTaU9YMnZIQ1ZPcGhWMFJqZkVEMWRMR1h3SnU0Z2FzZ3VGM3puNzdhVjhaQXNIWHFsbjB0TDVYSFdSNV9rdWhUUUhSZHBGYkJIVDB5SDdlMC13QTVnS0g5Qkg5RGNxSGJlelVndUhPcEQ0QkRKMTJTZUM1OXJhVm0zYjU0OVY2dk9MQVBheklIQXpVNW9Yc0ROVjEzaFZTWmVxYlBWMlNlSzladzJ6TmNuMG5FVVZkN1VZN1pfS2ZHa0lQcE80S24wSnQtVlJVV09OVWJ3M09YMkZpV2ktVF9ENHhKU2dfYUQ2aUVyamk0VHJHQmVfVHU4clpUTFoteW5aSWRPV1M0RDRMTms4NGRoYmJfVE82aUl2X3VieVJOdDhBQmRwdzdnRTVBNzZwaW93dUlZb3ZRYUtOeG9ULWxvNVp5a0haSjdkcUhRb3d6UGIxRUpCVkVYX2d6TkRqQVozUWxkNGFoc1FXYVd2YWNkME9Qclo0bjYxMFRWTy1nbnI5NTBJNzRMMDluUXRKYTFqQUN4d0d5aHVlamN3Tkk3NWJXeXR0TW9BeUg5Vnp4Q2RnZUY3b3AtMDlrNmlrSGR0eGRtbUdUd2lFRWg4MklEeWJHN2wwZEpVSXMxNDNOWjRFS0tPdWxhMmFCckhfRENIY184aEFDZXNrRDl2dHQtQW12UnRuQXJjaDJoTUpiYkNWQUtfRG9GMUZoNWM4UnBYZ29RWWs2NHcyUm5kdTF3Vk1GeFpiRUJLaVZ2UGFjbi1jV3lMV0N2ZDl4VERPN295X01NNG56ZjZkRzZoYUtmY1E5NlVXemx2SnVfb19iSXg0R2M3Mjd1a2JRPT0=
|
Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQnFIc3YtNDZzenJuZEZiQnVMOWRmZjl3R29QOWZRaGlPdk56WG1DR0FSZU5DM3dENWdoMmRpaks1U1VDNDJkZ3d3UXhSbXlkZ2h3SGZfdk54WXVidF82VkdJQXZiRTk0UlhZaUY1b2kwNzNPSm52VFdsdkwtaHJBb2dpRDBVLXRwd19Bb0dUZDkyV1VWZDJ1TG5mZ0ktYXpuS3U1U0JkZUk5TXpMdnhOaUtMN3BIb0pEZ1N0SlpFN3NNby15VTRfWWtxaF9DYjlJcnVKb0ZualVMTUx2aVNGY0JJdE1oZy1xSVBUZDF1aDM0TGVlTzVrNkFHcjlhcEk0SmRIMTFGdDFTMVUxX1dERk9NTXZMb0tVTFRoc20xME1uRkdVV0Z5N200ZTQzSjVsVExoa2VRZmFBU21ZczF0Vm9Ib3BZM2ZneDkwak12UmFyWWd0eng3ZVVFTUFLVzNOazcxeUhLVWUxcEFIZWtNRi1mT29kM1pqNGJJUUh3UVBlNGY3SlotOWZFUk5aQXFXcUFVdnUzc0Z5bERXYUNPbG14VnBNenFvb2tiQ3lZeHNHUVBlQTdTdVdXOEkxaGxCX016WWktWmN2WFcwM0VmVHdvMHVnY212VFE2cjJwUjdENkFCZF9GcUktWWpmWlNXNWVTMHBPdzVxRi15d3FSRDFra2k0NEFmTmpUeVh3SHRuZWE3WGJ4eUNIcE5tdnRqX2NCZnJoMEI2emU4U0ZYN1Nmdlhva1NacFo3UFh3WnpSdGw5ZmNpSGhicFo0ZThReXl3LW9vUzZaMkFHX2lJalFEMWtjZVdqbVpIZGk0cEdEU01TMl9xQkdSNDllTS1GV3lXS0xROTJvSlhaTjlXenJhQ3lOd2p0VjR5ZjEyektUZGJ3UThJOVJuMzhsTTVBVW9BcDFtcjk5Y0pVeW0zX3R0Nk81R3VDRWEzZnRqSXhFUW5ONHFTSWlwQU4yazlDb01KYlFQRjBFVTljdEJIY29WdF9hUkRJOThVTVFfWlJQUXI0Z3RzWFlzR1ZxUWFBd2I1SW1EMWlKdVprT3dKYTlaREp6TkZEZmVsZGEyalZGc3dHaUkyamdmQWtUT2czNzBCZEg0Vk1HSHFpRnhRYzBRNnN3TFkyaE9uMTVXN1VJTmJwbTNUMTdZbVRyc2d6Yl9aaVBXNmFvanROQVhfbWpXTDRlR1RfbklnYnJUQTZPX2JfNnlrWDVDUWJ4Z3YwNXVsTkJFQlRhTG5DVHpwejdsMGl1bzRfRXRTU2dmb3BVMUo4VkQwa0hsTmFBZnVjVzRrQmNzS2R0ZHNGV24yQnktWENtMUp6eG1MQW1ENE1vWFpFUF9PMEpWZVlxX05hSW1QUGlVT1l3MFp4bDBDZVVldHlEUlVCY1VvVlBNTlBhWFlmcVRobDNqRHo0QjZvNDBqVUVKN3JOb2dtYXQxSWw5NERSeEVRdHNUWndzUkY5RjdBOG1FZFRiVTNVSzl5bDNwdTl2SVd5aW5Ub2Q1YlBDRnpBUDkteU44YnV5X05ONmNndm9teUpqaFZVcVlHdGVRcXRpZkJLVnRuMTJSUFhGWndibExqRW03YUJTWXZXUXJ5WXlvd01ISDFuUFpaMFJzNFVQbWRUb2h1Zi1rcXJXMkRQSUFPeWFJN3lzOFc1d3BjWG1kbWlQWGUwelNiSnJXbUpnajdlQTlQR19XNTF0Q3JYcUMzaGp3eU0yZGhKa3FtX0tleHBfekZaWlRJRlZlSzNDVU56cml0TnFJeUc3b09uYVlwbGxFVFR6WFJVMzRmak5yWjBhcjl5ZmJpQ3hpajRXV1dwbDF5N25tNnI2bWtFem1TS08yV3JybUF0enYxRXpkUVdTNVp4WVB0aldJUUN3TnhHcHdMczh5MTFETzNWLXZFSktsdU1vM1JSNXhraDlJRDl0MEhvR1NOQWRaQW1NdzhpZnFVa1hvdXNwY2FvaThHQjVMOXdySnNIcWJlWERfLXVOcHhpN2ZZOW4yVzB3VTI2a3hvVmFkc29aX2ZUZkY5bi04WEV4MTlxNXQ4cTcwaHE4X3hDWkQxelRwSUl2amZOQ0JXRlJjRFhJNVhjNjRmaXp5eG15LTN1MFRvN3BHTFRZQ1ZFVFYyNUxleFpKTHlIVzRnVHk1Y3ZUbV9RUDdqN1Z2M2ZqVG8wa2RoVHJPeENFRDNHV0wwdi1DbEdOVDFJZnRiZGEydlZyM2tQVExOVlo3LXhIUnhZUnB6a2UzZXNtTjR0S2NzUmFNOWNiSHhHTnJDWHowWk1tbVFKUC14M25aQ1hyYjhJM2pxOEtZY0J1WTZrU3l6cDJOdk5iSXpBUk41MFFVellVZFU4UWVDZXFkQnJFbGxQX2J0S3pReU8zZUdsZUgtTnJuSlpfTjdxR3UxWTBEV0JaRV93eE9qa2dNa2tVTHRxMWNyeUh2VWNrYkdKM3BZOURkUlBxUDA3R2M4NnlMTVR2dmNMZi1lZlhzalRJWlFocGRleVRJYXBBY2hCXzFGZEU4ZVFxbHNic3RDV2FYN1dNaWpkaGdwYTEzRkZYRlEtRXR1cERHdnJKX1Zzb1Q0MnVYZkVhb0VYU1JPdFhoV29TMlhTaEppR1lTTURLYmZnNS1pSzl4T1k5MXJ0YV9qX0ZyQ1R6RFFzRndrTW9IUVlxcG5jcTEyYVU3dkpIR0tZZTZiOXNIRFpIalRtUDFBLVNyd1NfNUMtLW52NVpFZGpQenJCOGw0UlJZNlZVT1ZXTm92R3k4c3hTQXFoNFE3TUFHcjRWc01zT082anJZT0laakl5VUk1WDdDaWlubjIwS3RNcjBjTTdpbUNxSmxNR05JaWtEQURlS1h6N2h0NE9CcW5rQ3NXWkwyNXVBUU5mLTU5MG8xX29xZ0t6Z2pKWmhMNG1BNXBhYWkzY0loSmluUXNKdURwQWRIV2laM2dHQTFxV19lbkZXWmdfWEdiWEZsMGVIWDdoMnJ5dzM0ZGtBM3BSRVp2QzFNbFJSWXBManN5WmFVMlp6aUpWMF9jMTRPbWptM1lsTE41NG1kUW4tT0ZqTzNaZnZ5ZzBLZzNNc1N1X2FMMVJ0N3o4a25LMkxKVUE0dTNhU3hZX3RFMUtKcEgtX1B0cTdEMmYyMzdPaEhoeWhaUGRITC11NzRWYTJnZldiUkFvdG95a1RwWnNKaERkT0kxN1RJMzZQZzFiSjl1SlJieTJjaHBMYmZDUlhTT2hvQnRPaTNhS3NzaVc1Tms0X0FyUHRsSXdCLW1OUWk1RkRKc3pqSjVQTFFROEN5M3pxUGVjZHI4SVM3Qmx1S1A2bEEzNWlVWkFndGpUSm4wcV9jRjQ5T0l1c3ZqN0w3Z1dMV2ZtbU9MbTVSOXphX3VLMko2ZEs3U0NIaFFIMVFIcnN0OGIxSjdxNGlHUHRnOEJDaGwzcXJYNFBnOGdFSVFuSGUyOWJ3WmtlVGhGQWk0THdZd1hUbGRydk83SWVzWUJrb21tSlNvVkJjdWYtcWo0aEc1Ri1XNTZoSENaRWJISmp3UlJNMU9vSnNzZ0VudXpxMDA3aGdfSDBNZlA0Y1gybkF4dGl6SzFOc1VMN0dzVkQxVllkSDhyby12SWNxTFRYdThJUm13S3p3cGFYc05TbVc2YVNtZEdCOFBCUXhadkIzNmdkbXpnc1pLYUhzOEtsY2kxVmNYZm9wOS1LOERLRHJhY2VhanNjaThUZW1rS01wUW05SFJxOGd1VF9STlJZWDRiTV92dXlQTkdxN3BYYTN1SUhRSjRNTy1PZWpGd0xhUlVES0hiWE5LUkM5dHNvenR3TVMySC1ueUZXUkxFY2VyRmhISGc2U2ZxeXY2VkJULV9pOTU1QkI5VUNndnVQcVItTW96VTBqRTdzem1IQ1UxVWtWdjhvTERFeGJ6M3dJNERUV1BTeUlRcG1fbUVjQ0lNREF5QkpLeHJHRkFxQS1kZEE4bXJ2aVVSckVoTkZwNGtoRElIcUktQjA1bkNRclM4dWlqUVRXXzdlQ0VjQWZGSTZlR01NQmU5bHQ3bGNtZWU1eHVvRVdQRVU4Rmx0OFRTaWF3cGgyeFJoM25sRk1GNXJtdEpfcEJmYVFrZXd4eXl0c0ZKVjQ3MkFNRjh5bDBTbFZNd256dmxpQlo5Z1FRM1ZmVTJSb3VrZTk3cXVQYmZ6SnNUWGhlSUhrUjVWUHFwemNmbW1scWVxTkcxT1p5dVlvUjhCSVJaSnBjU0dpc3YzVkt1WUtrd2xoQlVNQXh1eDhmTXNISWMyUnBUMmIwamxlS0tjMVRiWDlBcE03b1BHR1FmdmlsX2ZlMTNCaFNvNG1TeTNiQXRNZ2Y1eE1IaFAxTUZGZ1YyZjEzTG9PaGRCdHJzVlB5Mm12T1NiX2RyT2d2RERCRWFHT0dadW5DZjNtdXE4cHhEQlpub2l3bz0=
|
||||||
|
|
||||||
# Teamsbot Browser Bot Service
|
# Teamsbot Browser Bot Service
|
||||||
TEAMSBOT_BROWSER_BOT_URL = http://teamsbot.poweron.swiss:4100
|
TEAMSBOT_BROWSER_BOT_URL = http://teamsbot.poweron.swiss:4100
|
||||||
|
|
|
||||||
|
|
@ -341,8 +341,8 @@ class ModelRegistry:
|
||||||
modelRegistry = ModelRegistry()
|
modelRegistry = ModelRegistry()
|
||||||
|
|
||||||
# Eager pre-warm on first import: ensures connectors are ready in this process.
|
# Eager pre-warm on first import: ensures connectors are ready in this process.
|
||||||
# Critical for chatbot performance — avoids 4–8 s latency on first request.
|
# Critical for AI/agent performance — avoids 4–8 s latency on first request.
|
||||||
# Runs when this module is first imported (lifespan or first chatbot request).
|
# Runs when this module is first imported (lifespan or first AI request).
|
||||||
def _eager_prewarm() -> None:
|
def _eager_prewarm() -> None:
|
||||||
try:
|
try:
|
||||||
modelRegistry.ensureConnectorsRegistered()
|
modelRegistry.ensureConnectorsRegistered()
|
||||||
|
|
|
||||||
|
|
@ -16,11 +16,7 @@ Models (next-gen — RTX PRO 6000 96 GB, auto-activated when pulled in Ollama):
|
||||||
- poweron-vision-general: Vision (llama4:scout); multimodal, long-context documents
|
- poweron-vision-general: Vision (llama4:scout); multimodal, long-context documents
|
||||||
- poweron-embed: Embedding (nomic-embed-text); local RAG embedding
|
- poweron-embed: Embedding (nomic-embed-text); local RAG embedding
|
||||||
|
|
||||||
Pricing (CHF per call):
|
Pricing: byte-based (~per-token via bytes/4), configured via the PRICE_* constants below.
|
||||||
- Text models: CHF 0.010
|
|
||||||
- Vision models: CHF 0.100
|
|
||||||
- Reasoning models: CHF 0.050
|
|
||||||
- Embedding: CHF 0.000 (flat rate)
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
@ -43,11 +39,20 @@ from modules.datamodels.datamodelAi import (
|
||||||
# Configure logger
|
# Configure logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Pricing constants (CHF)
|
# Pricing constants (CHF per 1k tokens; billed byte-based via bytes/4 ~ 1 token)
|
||||||
PRICE_TEXT_PER_CALL = 0.01 # CHF 0.010 per text model call
|
PRICE_INPUT_PER_1K = 0.0075
|
||||||
PRICE_VISION_PER_CALL = 0.10 # CHF 0.100 per vision model call
|
PRICE_OUTPUT_PER_1K = 0.0375
|
||||||
PRICE_REASONING_PER_CALL = 0.05 # CHF 0.050 per reasoning call (longer runtime)
|
PRICE_EMBED_PER_1K = 0.0005
|
||||||
PRICE_EMBED_PER_CALL = 0.00 # CHF 0.000 flat rate (local embedding)
|
|
||||||
|
|
||||||
|
def _calcPrivatePriceCHF(processingTime, bytesSent, bytesReceived):
|
||||||
|
"""Byte-based price for private text/vision/reasoning models."""
|
||||||
|
return (bytesSent / 4 / 1000) * PRICE_INPUT_PER_1K + (bytesReceived / 4 / 1000) * PRICE_OUTPUT_PER_1K
|
||||||
|
|
||||||
|
|
||||||
|
def _calcPrivateEmbedPriceCHF(processingTime, bytesSent, bytesReceived):
|
||||||
|
"""Byte-based price for private embedding (input only)."""
|
||||||
|
return (bytesSent / 4 / 1000) * PRICE_EMBED_PER_1K
|
||||||
|
|
||||||
|
|
||||||
# Private-LLM Service URL (fix, nicht via env konfigurierbar)
|
# Private-LLM Service URL (fix, nicht via env konfigurierbar)
|
||||||
|
|
@ -242,8 +247,8 @@ class AiPrivateLlm(BaseConnectorAi):
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
maxTokens=4096,
|
maxTokens=4096,
|
||||||
contextLength=8192, # Reduced for RAM constraints
|
contextLength=8192, # Reduced for RAM constraints
|
||||||
costPer1kTokensInput=0.0, # Flat rate pricing
|
costPer1kTokensInput=PRICE_INPUT_PER_1K,
|
||||||
costPer1kTokensOutput=0.0, # Flat rate pricing
|
costPer1kTokensOutput=PRICE_OUTPUT_PER_1K,
|
||||||
speedRating=8, # Fast and efficient
|
speedRating=8, # Fast and efficient
|
||||||
qualityRating=9, # High quality text model
|
qualityRating=9, # High quality text model
|
||||||
functionCall=self.callAiText,
|
functionCall=self.callAiText,
|
||||||
|
|
@ -259,7 +264,7 @@ class AiPrivateLlm(BaseConnectorAi):
|
||||||
(OperationTypeEnum.AGENT, 8),
|
(OperationTypeEnum.AGENT, 8),
|
||||||
),
|
),
|
||||||
version="qwen2.5:7b",
|
version="qwen2.5:7b",
|
||||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_TEXT_PER_CALL
|
calculatepriceCHF=_calcPrivatePriceCHF
|
||||||
),
|
),
|
||||||
"ollamaModel": "qwen2.5:7b"
|
"ollamaModel": "qwen2.5:7b"
|
||||||
},
|
},
|
||||||
|
|
@ -273,8 +278,8 @@ class AiPrivateLlm(BaseConnectorAi):
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
maxTokens=2048,
|
maxTokens=2048,
|
||||||
contextLength=4096, # Reduced for RAM constraints (vision needs more)
|
contextLength=4096, # Reduced for RAM constraints (vision needs more)
|
||||||
costPer1kTokensInput=0.0, # Flat rate pricing
|
costPer1kTokensInput=PRICE_INPUT_PER_1K,
|
||||||
costPer1kTokensOutput=0.0, # Flat rate pricing
|
costPer1kTokensOutput=PRICE_OUTPUT_PER_1K,
|
||||||
speedRating=7,
|
speedRating=7,
|
||||||
qualityRating=9,
|
qualityRating=9,
|
||||||
functionCall=self.callAiVision,
|
functionCall=self.callAiVision,
|
||||||
|
|
@ -285,7 +290,7 @@ class AiPrivateLlm(BaseConnectorAi):
|
||||||
(OperationTypeEnum.NEUTRALIZATION_IMAGE, 9),
|
(OperationTypeEnum.NEUTRALIZATION_IMAGE, 9),
|
||||||
),
|
),
|
||||||
version="qwen2.5vl:7b",
|
version="qwen2.5vl:7b",
|
||||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_VISION_PER_CALL
|
calculatepriceCHF=_calcPrivatePriceCHF
|
||||||
),
|
),
|
||||||
"ollamaModel": "qwen2.5vl:7b"
|
"ollamaModel": "qwen2.5vl:7b"
|
||||||
},
|
},
|
||||||
|
|
@ -299,8 +304,8 @@ class AiPrivateLlm(BaseConnectorAi):
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
maxTokens=2048,
|
maxTokens=2048,
|
||||||
contextLength=4096, # Reduced for RAM constraints
|
contextLength=4096, # Reduced for RAM constraints
|
||||||
costPer1kTokensInput=0.0, # Flat rate pricing
|
costPer1kTokensInput=PRICE_INPUT_PER_1K,
|
||||||
costPer1kTokensOutput=0.0, # Flat rate pricing
|
costPer1kTokensOutput=PRICE_OUTPUT_PER_1K,
|
||||||
speedRating=9, # Fast due to small 2B model
|
speedRating=9, # Fast due to small 2B model
|
||||||
qualityRating=8, # Good for document understanding
|
qualityRating=8, # Good for document understanding
|
||||||
functionCall=self.callAiVision,
|
functionCall=self.callAiVision,
|
||||||
|
|
@ -311,7 +316,7 @@ class AiPrivateLlm(BaseConnectorAi):
|
||||||
(OperationTypeEnum.NEUTRALIZATION_IMAGE, 9),
|
(OperationTypeEnum.NEUTRALIZATION_IMAGE, 9),
|
||||||
),
|
),
|
||||||
version="granite3.2-vision",
|
version="granite3.2-vision",
|
||||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_VISION_PER_CALL
|
calculatepriceCHF=_calcPrivatePriceCHF
|
||||||
),
|
),
|
||||||
"ollamaModel": "granite3.2-vision"
|
"ollamaModel": "granite3.2-vision"
|
||||||
},
|
},
|
||||||
|
|
@ -326,8 +331,8 @@ class AiPrivateLlm(BaseConnectorAi):
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
maxTokens=8192,
|
maxTokens=8192,
|
||||||
contextLength=65536,
|
contextLength=65536,
|
||||||
costPer1kTokensInput=0.0,
|
costPer1kTokensInput=PRICE_INPUT_PER_1K,
|
||||||
costPer1kTokensOutput=0.0,
|
costPer1kTokensOutput=PRICE_OUTPUT_PER_1K,
|
||||||
speedRating=5,
|
speedRating=5,
|
||||||
qualityRating=10,
|
qualityRating=10,
|
||||||
functionCall=self.callAiText,
|
functionCall=self.callAiText,
|
||||||
|
|
@ -342,7 +347,7 @@ class AiPrivateLlm(BaseConnectorAi):
|
||||||
(OperationTypeEnum.AGENT, 9),
|
(OperationTypeEnum.AGENT, 9),
|
||||||
),
|
),
|
||||||
version="deepseek-r1:70b",
|
version="deepseek-r1:70b",
|
||||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_REASONING_PER_CALL
|
calculatepriceCHF=_calcPrivatePriceCHF
|
||||||
),
|
),
|
||||||
"ollamaModel": "deepseek-r1:70b"
|
"ollamaModel": "deepseek-r1:70b"
|
||||||
},
|
},
|
||||||
|
|
@ -356,8 +361,8 @@ class AiPrivateLlm(BaseConnectorAi):
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
maxTokens=4096,
|
maxTokens=4096,
|
||||||
contextLength=131072,
|
contextLength=131072,
|
||||||
costPer1kTokensInput=0.0,
|
costPer1kTokensInput=PRICE_INPUT_PER_1K,
|
||||||
costPer1kTokensOutput=0.0,
|
costPer1kTokensOutput=PRICE_OUTPUT_PER_1K,
|
||||||
speedRating=7,
|
speedRating=7,
|
||||||
qualityRating=10,
|
qualityRating=10,
|
||||||
functionCall=self.callAiVision,
|
functionCall=self.callAiVision,
|
||||||
|
|
@ -368,7 +373,7 @@ class AiPrivateLlm(BaseConnectorAi):
|
||||||
(OperationTypeEnum.NEUTRALIZATION_IMAGE, 10),
|
(OperationTypeEnum.NEUTRALIZATION_IMAGE, 10),
|
||||||
),
|
),
|
||||||
version="llama4:scout",
|
version="llama4:scout",
|
||||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_VISION_PER_CALL
|
calculatepriceCHF=_calcPrivatePriceCHF
|
||||||
),
|
),
|
||||||
"ollamaModel": "llama4:scout"
|
"ollamaModel": "llama4:scout"
|
||||||
},
|
},
|
||||||
|
|
@ -382,7 +387,7 @@ class AiPrivateLlm(BaseConnectorAi):
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
maxTokens=0,
|
maxTokens=0,
|
||||||
contextLength=8192,
|
contextLength=8192,
|
||||||
costPer1kTokensInput=0.0,
|
costPer1kTokensInput=PRICE_EMBED_PER_1K,
|
||||||
costPer1kTokensOutput=0.0,
|
costPer1kTokensOutput=0.0,
|
||||||
speedRating=10,
|
speedRating=10,
|
||||||
qualityRating=8,
|
qualityRating=8,
|
||||||
|
|
@ -393,7 +398,7 @@ class AiPrivateLlm(BaseConnectorAi):
|
||||||
(OperationTypeEnum.EMBEDDING, 9),
|
(OperationTypeEnum.EMBEDDING, 9),
|
||||||
),
|
),
|
||||||
version="nomic-embed-text",
|
version="nomic-embed-text",
|
||||||
calculatepriceCHF=lambda processingTime, bytesSent, bytesReceived: PRICE_EMBED_PER_CALL
|
calculatepriceCHF=_calcPrivateEmbedPriceCHF
|
||||||
),
|
),
|
||||||
"ollamaModel": "nomic-embed-text"
|
"ollamaModel": "nomic-embed-text"
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -230,6 +230,7 @@ _CONNECT_TIMEOUT_S = 10
|
||||||
# `_BORROW_WAIT_TIMEOUT_S` seconds before giving up.
|
# `_BORROW_WAIT_TIMEOUT_S` seconds before giving up.
|
||||||
_BORROW_WAIT_TIMEOUT_S = 30.0
|
_BORROW_WAIT_TIMEOUT_S = 30.0
|
||||||
_BORROW_WAIT_BACKOFF_S = 0.05
|
_BORROW_WAIT_BACKOFF_S = 0.05
|
||||||
|
_shuttingDown = False
|
||||||
|
|
||||||
|
|
||||||
def _resolvePoolMax() -> int:
|
def _resolvePoolMax() -> int:
|
||||||
|
|
@ -315,7 +316,13 @@ class _PoolRegistry:
|
||||||
|
|
||||||
|
|
||||||
def closeAllPools() -> None:
|
def closeAllPools() -> None:
|
||||||
"""Public entry point for FastAPI lifespan shutdown hook."""
|
"""Public entry point for FastAPI lifespan shutdown hook.
|
||||||
|
|
||||||
|
Sets the shutdown flag first so that any in-flight ``_acquireConn`` loops
|
||||||
|
abort immediately instead of polling for up to 30 s.
|
||||||
|
"""
|
||||||
|
global _shuttingDown
|
||||||
|
_shuttingDown = True
|
||||||
_PoolRegistry.closeAll()
|
_PoolRegistry.closeAll()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -590,7 +597,10 @@ class DatabaseConnector:
|
||||||
|
|
||||||
psycopg2's pool throws on exhaustion instead of queueing — this helper
|
psycopg2's pool throws on exhaustion instead of queueing — this helper
|
||||||
polls with a short backoff so callers see queue semantics.
|
polls with a short backoff so callers see queue semantics.
|
||||||
|
Aborts immediately when the application is shutting down.
|
||||||
"""
|
"""
|
||||||
|
if _shuttingDown:
|
||||||
|
raise psycopg2.pool.PoolError("Application is shutting down")
|
||||||
deadline = time.monotonic() + _BORROW_WAIT_TIMEOUT_S
|
deadline = time.monotonic() + _BORROW_WAIT_TIMEOUT_S
|
||||||
attempt = 0
|
attempt = 0
|
||||||
while True:
|
while True:
|
||||||
|
|
@ -598,6 +608,8 @@ class DatabaseConnector:
|
||||||
return pool.getconn()
|
return pool.getconn()
|
||||||
except psycopg2.pool.PoolError as e:
|
except psycopg2.pool.PoolError as e:
|
||||||
attempt += 1
|
attempt += 1
|
||||||
|
if _shuttingDown:
|
||||||
|
raise psycopg2.pool.PoolError("Application is shutting down")
|
||||||
if time.monotonic() >= deadline:
|
if time.monotonic() >= deadline:
|
||||||
logger.error(
|
logger.error(
|
||||||
"Connection pool exhausted after %.1fs wait (%d retries)",
|
"Connection pool exhausted after %.1fs wait (%d retries)",
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,9 @@
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
"""Google ProviderConnector -- Drive and Gmail via Google OAuth."""
|
"""Google ProviderConnector -- Drive and Gmail via Google OAuth."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
import urllib.parse
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|
@ -18,6 +20,41 @@ _CALENDAR_BASE = "https://www.googleapis.com/calendar/v3"
|
||||||
_PEOPLE_BASE = "https://people.googleapis.com/v1"
|
_PEOPLE_BASE = "https://people.googleapis.com/v1"
|
||||||
|
|
||||||
|
|
||||||
|
def _parseGoogleDateRange(text: Optional[str]) -> tuple:
|
||||||
|
"""Parse a date range from a filter/query string for Calendar timeMin/timeMax.
|
||||||
|
|
||||||
|
Supports two ISO dates, a single ISO date (~31 day window) or a YYYY-MM
|
||||||
|
month pattern. Returns RFC3339 UTC strings (timeMin, timeMax) or (None, None).
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
if not text:
|
||||||
|
return (None, None)
|
||||||
|
|
||||||
|
def _toRfc3339(value: str) -> str:
|
||||||
|
value = value.strip().rstrip("Z")
|
||||||
|
if "T" not in value:
|
||||||
|
value = f"{value}T00:00:00"
|
||||||
|
return f"{value}Z"
|
||||||
|
|
||||||
|
isoMatch = re.findall(r'\d{4}-\d{2}-\d{2}(?:T[\d:]+)?', text)
|
||||||
|
if len(isoMatch) >= 2:
|
||||||
|
return (_toRfc3339(isoMatch[0]), _toRfc3339(isoMatch[1]))
|
||||||
|
if len(isoMatch) == 1:
|
||||||
|
try:
|
||||||
|
dt = datetime.fromisoformat(isoMatch[0])
|
||||||
|
return (_toRfc3339(isoMatch[0]), _toRfc3339((dt + timedelta(days=31)).strftime('%Y-%m-%dT00:00:00')))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
monthMatch = re.match(r'^(\d{4})-(\d{2})$', text.strip())
|
||||||
|
if monthMatch:
|
||||||
|
year, month = int(monthMatch.group(1)), int(monthMatch.group(2))
|
||||||
|
start = f"{year}-{month:02d}-01T00:00:00"
|
||||||
|
end = f"{year + 1}-01-01T00:00:00" if month == 12 else f"{year}-{month + 1:02d}-01T00:00:00"
|
||||||
|
return (_toRfc3339(start), _toRfc3339(end))
|
||||||
|
return (None, None)
|
||||||
|
|
||||||
|
|
||||||
async def _googleGet(token: str, url: str) -> Dict[str, Any]:
|
async def _googleGet(token: str, url: str) -> Dict[str, Any]:
|
||||||
headers = {"Authorization": f"Bearer {token}"}
|
headers = {"Authorization": f"Bearer {token}"}
|
||||||
timeout = aiohttp.ClientTimeout(total=20)
|
timeout = aiohttp.ClientTimeout(total=20)
|
||||||
|
|
@ -33,6 +70,17 @@ async def _googleGet(token: str, url: str) -> Dict[str, Any]:
|
||||||
return {"error": str(e)}
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def _raiseGoogleError(result: Dict[str, Any], ctx: str) -> None:
|
||||||
|
"""Raise a clear error for a failed Google API response.
|
||||||
|
|
||||||
|
Browse/search must NOT swallow API failures into an empty result list, which
|
||||||
|
masks a real error as 'empty'. Callers wrap these in try/except.
|
||||||
|
"""
|
||||||
|
err = result.get("error") if isinstance(result, dict) else None
|
||||||
|
logger.warning("Google error (%s): %s", ctx, err or result)
|
||||||
|
raise RuntimeError(f"Google error ({ctx}): {err or result}")
|
||||||
|
|
||||||
|
|
||||||
class DriveAdapter(ServiceAdapter):
|
class DriveAdapter(ServiceAdapter):
|
||||||
"""Google Drive ServiceAdapter -- browse files and folders."""
|
"""Google Drive ServiceAdapter -- browse files and folders."""
|
||||||
|
|
||||||
|
|
@ -53,8 +101,7 @@ class DriveAdapter(ServiceAdapter):
|
||||||
|
|
||||||
result = await _googleGet(self._token, url)
|
result = await _googleGet(self._token, url)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"Google Drive browse failed: {result['error']}")
|
_raiseGoogleError(result, "Google Drive browse")
|
||||||
return []
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for f in result.get("files", []):
|
for f in result.get("files", []):
|
||||||
|
|
@ -125,27 +172,51 @@ class DriveAdapter(ServiceAdapter):
|
||||||
path: Optional[str] = None,
|
path: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> List[ExternalEntry]:
|
) -> List[ExternalEntry]:
|
||||||
safeQuery = query.replace("'", "\\'")
|
safeQuery = query.replace("\\", "\\\\").replace("'", "\\'")
|
||||||
folderId = (path or "").strip("/")
|
folderId = (path or "").strip("/")
|
||||||
qParts = [f"name contains '{safeQuery}'", "trashed=false"]
|
# `fullText contains` matches file name AND content (and some metadata),
|
||||||
|
# which is what users expect from a search -- not just the file name.
|
||||||
|
qParts = [f"fullText contains '{safeQuery}'", "trashed=false"]
|
||||||
if folderId:
|
if folderId:
|
||||||
qParts.append(f"'{folderId}' in parents")
|
qParts.append(f"'{folderId}' in parents")
|
||||||
qStr = " and ".join(qParts)
|
qStr = " and ".join(qParts)
|
||||||
pageSize = max(1, min(int(limit or 100), 1000))
|
effectiveLimit = max(1, int(limit)) if limit is not None else None
|
||||||
url = f"{_DRIVE_BASE}/files?q={qStr}&fields=files(id,name,mimeType,size)&pageSize={pageSize}"
|
pageSize = min(effectiveLimit or 100, 1000)
|
||||||
logger.debug(f"Google Drive search: q={qStr}")
|
logger.debug(f"Google Drive search: q={qStr}")
|
||||||
result = await _googleGet(self._token, url)
|
entries: List[ExternalEntry] = []
|
||||||
if "error" in result:
|
pageToken: Optional[str] = None
|
||||||
return []
|
hardCap = effectiveLimit or 1000
|
||||||
return [
|
while len(entries) < hardCap:
|
||||||
ExternalEntry(
|
params = {
|
||||||
name=f.get("name", ""),
|
"q": qStr,
|
||||||
path=f"/{f.get('id', '')}",
|
"fields": "nextPageToken,files(id,name,mimeType,size,modifiedTime)",
|
||||||
isFolder=f.get("mimeType") == "application/vnd.google-apps.folder",
|
"pageSize": str(pageSize),
|
||||||
size=int(f.get("size", 0)) if f.get("size") else None,
|
}
|
||||||
)
|
if pageToken:
|
||||||
for f in result.get("files", [])
|
params["pageToken"] = pageToken
|
||||||
]
|
url = f"{_DRIVE_BASE}/files?{urllib.parse.urlencode(params)}"
|
||||||
|
result = await _googleGet(self._token, url)
|
||||||
|
if "error" in result:
|
||||||
|
if not entries:
|
||||||
|
_raiseGoogleError(result, "Google Drive search")
|
||||||
|
break
|
||||||
|
for f in result.get("files", []):
|
||||||
|
entries.append(ExternalEntry(
|
||||||
|
name=f.get("name", ""),
|
||||||
|
path=f"/{f.get('id', '')}",
|
||||||
|
isFolder=f.get("mimeType") == "application/vnd.google-apps.folder",
|
||||||
|
size=int(f.get("size", 0)) if f.get("size") else None,
|
||||||
|
mimeType=f.get("mimeType"),
|
||||||
|
metadata={"id": f.get("id"), "modifiedTime": f.get("modifiedTime")},
|
||||||
|
))
|
||||||
|
if len(entries) >= hardCap:
|
||||||
|
break
|
||||||
|
pageToken = result.get("nextPageToken")
|
||||||
|
if not pageToken:
|
||||||
|
break
|
||||||
|
if effectiveLimit is not None:
|
||||||
|
entries = entries[:effectiveLimit]
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
class GmailAdapter(ServiceAdapter):
|
class GmailAdapter(ServiceAdapter):
|
||||||
|
|
@ -155,7 +226,8 @@ class GmailAdapter(ServiceAdapter):
|
||||||
self._token = accessToken
|
self._token = accessToken
|
||||||
|
|
||||||
_DEFAULT_MESSAGE_LIMIT = 100
|
_DEFAULT_MESSAGE_LIMIT = 100
|
||||||
_MAX_MESSAGE_LIMIT = 500
|
_MAX_MESSAGE_LIMIT = 1000
|
||||||
|
_METADATA_FETCH_CAP = 200
|
||||||
|
|
||||||
async def browse(
|
async def browse(
|
||||||
self,
|
self,
|
||||||
|
|
@ -169,8 +241,7 @@ class GmailAdapter(ServiceAdapter):
|
||||||
url = f"{_GMAIL_BASE}/users/me/labels"
|
url = f"{_GMAIL_BASE}/users/me/labels"
|
||||||
result = await _googleGet(self._token, url)
|
result = await _googleGet(self._token, url)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"Gmail labels failed: {result['error']}")
|
_raiseGoogleError(result, "Gmail labels")
|
||||||
return []
|
|
||||||
_SYSTEM_LABELS = {"INBOX", "SENT", "DRAFT", "TRASH", "SPAM", "STARRED", "IMPORTANT"}
|
_SYSTEM_LABELS = {"INBOX", "SENT", "DRAFT", "TRASH", "SPAM", "STARRED", "IMPORTANT"}
|
||||||
labels = []
|
labels = []
|
||||||
for lbl in result.get("labels", []):
|
for lbl in result.get("labels", []):
|
||||||
|
|
@ -188,23 +259,116 @@ class GmailAdapter(ServiceAdapter):
|
||||||
return labels
|
return labels
|
||||||
|
|
||||||
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
|
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
|
||||||
url = f"{_GMAIL_BASE}/users/me/messages?labelIds={cleanPath}&maxResults={effectiveLimit}"
|
labelId = await self._resolveLabelId(cleanPath)
|
||||||
result = await _googleGet(self._token, url)
|
if not labelId:
|
||||||
if "error" in result:
|
raise ValueError(
|
||||||
return []
|
f"Gmail label not found: '{cleanPath}'. Browse the mailbox root ('/') "
|
||||||
|
f"to list available labels."
|
||||||
|
)
|
||||||
|
msgIds, totalEstimate = await self._listMessageIds(
|
||||||
|
params={"labelIds": labelId}, limit=effectiveLimit,
|
||||||
|
)
|
||||||
|
entries = await self._fetchMessageEntries(
|
||||||
|
msgIds[:self._METADATA_FETCH_CAP], labelPath=labelId,
|
||||||
|
)
|
||||||
|
if totalEstimate and totalEstimate > len(msgIds):
|
||||||
|
entries.append(ExternalEntry(
|
||||||
|
name=f"(~{totalEstimate} total messages estimated, {len(msgIds)} listed)",
|
||||||
|
path=f"/{labelId}/_count", isFolder=False,
|
||||||
|
metadata={"totalEstimate": totalEstimate, "listed": len(msgIds)},
|
||||||
|
))
|
||||||
|
elif len(msgIds) > self._METADATA_FETCH_CAP:
|
||||||
|
entries.append(ExternalEntry(
|
||||||
|
name=f"({len(msgIds)} messages listed, metadata shown for first {self._METADATA_FETCH_CAP})",
|
||||||
|
path=f"/{labelId}/_count", isFolder=False,
|
||||||
|
metadata={"listed": len(msgIds), "metadataShown": self._METADATA_FETCH_CAP},
|
||||||
|
))
|
||||||
|
return entries
|
||||||
|
|
||||||
entries = []
|
async def _resolveLabelId(self, ref: str) -> Optional[str]:
|
||||||
for msg in result.get("messages", [])[:effectiveLimit]:
|
"""Resolve a Gmail label reference (display name / system name / id) to a
|
||||||
msgId = msg.get("id", "")
|
label id. Returns None if nothing matches so the caller can raise a clear
|
||||||
detailUrl = f"{_GMAIL_BASE}/users/me/messages/{msgId}?format=metadata&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=Date"
|
error instead of querying with an invalid label."""
|
||||||
|
if not ref:
|
||||||
|
return None
|
||||||
|
r = ref.strip()
|
||||||
|
result = await _googleGet(self._token, f"{_GMAIL_BASE}/users/me/labels")
|
||||||
|
if "error" in result:
|
||||||
|
_raiseGoogleError(result, "Gmail labels")
|
||||||
|
labels = result.get("labels", [])
|
||||||
|
# 1) exact id match (already-resolved id passes through)
|
||||||
|
for lbl in labels:
|
||||||
|
if lbl.get("id") == r:
|
||||||
|
return r
|
||||||
|
# 2) case-insensitive display-name match
|
||||||
|
for lbl in labels:
|
||||||
|
if (lbl.get("name") or "").strip().lower() == r.lower():
|
||||||
|
return lbl.get("id")
|
||||||
|
# 3) system label by uppercased name (INBOX, SENT, ...)
|
||||||
|
up = r.upper()
|
||||||
|
for lbl in labels:
|
||||||
|
if lbl.get("id") == up:
|
||||||
|
return up
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _listMessageIds(
|
||||||
|
self, params: Dict[str, str], limit: int,
|
||||||
|
) -> tuple:
|
||||||
|
"""Page through ``messages.list`` and return (msgIds, totalEstimate).
|
||||||
|
|
||||||
|
Gmail's ``maxResults`` caps at 500 per page, so we follow
|
||||||
|
``nextPageToken`` until we have ``limit`` ids or there are no more pages.
|
||||||
|
``resultSizeEstimate`` from the first page gives the agent an approximate
|
||||||
|
total count without having to download every message.
|
||||||
|
"""
|
||||||
|
msgIds: List[str] = []
|
||||||
|
totalEstimate: Optional[int] = None
|
||||||
|
pageToken: Optional[str] = None
|
||||||
|
pageSize = min(limit, 500)
|
||||||
|
while len(msgIds) < limit:
|
||||||
|
p = {**params, "maxResults": str(pageSize)}
|
||||||
|
if pageToken:
|
||||||
|
p["pageToken"] = pageToken
|
||||||
|
url = f"{_GMAIL_BASE}/users/me/messages?{urllib.parse.urlencode(p)}"
|
||||||
|
result = await _googleGet(self._token, url)
|
||||||
|
if "error" in result:
|
||||||
|
if not msgIds:
|
||||||
|
_raiseGoogleError(result, "Gmail list messages")
|
||||||
|
break
|
||||||
|
if totalEstimate is None:
|
||||||
|
totalEstimate = result.get("resultSizeEstimate")
|
||||||
|
for m in result.get("messages", []):
|
||||||
|
mid = m.get("id", "")
|
||||||
|
if mid:
|
||||||
|
msgIds.append(mid)
|
||||||
|
if len(msgIds) >= limit:
|
||||||
|
break
|
||||||
|
pageToken = result.get("nextPageToken")
|
||||||
|
if not pageToken:
|
||||||
|
break
|
||||||
|
return msgIds, totalEstimate
|
||||||
|
|
||||||
|
async def _fetchMessageEntries(self, msgIds: List[str], labelPath: str = "") -> List[ExternalEntry]:
|
||||||
|
"""Resolve a list of Gmail message ids into ExternalEntries with
|
||||||
|
Subject/From/Date metadata. Detail fetches run concurrently to avoid a
|
||||||
|
slow sequential N+1 round-trip per message."""
|
||||||
|
if not msgIds:
|
||||||
|
return []
|
||||||
|
pathPrefix = f"/{labelPath}" if labelPath else ""
|
||||||
|
|
||||||
|
async def _one(msgId: str) -> ExternalEntry:
|
||||||
|
detailUrl = (
|
||||||
|
f"{_GMAIL_BASE}/users/me/messages/{msgId}"
|
||||||
|
f"?format=metadata&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=Date"
|
||||||
|
)
|
||||||
detail = await _googleGet(self._token, detailUrl)
|
detail = await _googleGet(self._token, detailUrl)
|
||||||
if "error" in detail:
|
if "error" in detail:
|
||||||
entries.append(ExternalEntry(name=f"Message {msgId}", path=f"/{cleanPath}/{msgId}", isFolder=False))
|
return ExternalEntry(name=f"Message {msgId}", path=f"{pathPrefix}/{msgId}", isFolder=False,
|
||||||
continue
|
metadata={"id": msgId})
|
||||||
headers = {h.get("name", ""): h.get("value", "") for h in detail.get("payload", {}).get("headers", [])}
|
headers = {h.get("name", ""): h.get("value", "") for h in detail.get("payload", {}).get("headers", [])}
|
||||||
entries.append(ExternalEntry(
|
return ExternalEntry(
|
||||||
name=headers.get("Subject", "(no subject)"),
|
name=headers.get("Subject", "(no subject)"),
|
||||||
path=f"/{cleanPath}/{msgId}",
|
path=f"{pathPrefix}/{msgId}",
|
||||||
isFolder=False,
|
isFolder=False,
|
||||||
metadata={
|
metadata={
|
||||||
"id": msgId,
|
"id": msgId,
|
||||||
|
|
@ -212,8 +376,9 @@ class GmailAdapter(ServiceAdapter):
|
||||||
"date": headers.get("Date", ""),
|
"date": headers.get("Date", ""),
|
||||||
"snippet": detail.get("snippet", ""),
|
"snippet": detail.get("snippet", ""),
|
||||||
},
|
},
|
||||||
))
|
)
|
||||||
return entries
|
|
||||||
|
return list(await asyncio.gather(*[_one(mid) for mid in msgIds]))
|
||||||
|
|
||||||
async def download(self, path: str) -> DownloadResult:
|
async def download(self, path: str) -> DownloadResult:
|
||||||
"""Download a Gmail message as RFC 822 EML via format=raw."""
|
"""Download a Gmail message as RFC 822 EML via format=raw."""
|
||||||
|
|
@ -261,19 +426,34 @@ class GmailAdapter(ServiceAdapter):
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> list:
|
) -> list:
|
||||||
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
|
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
|
||||||
url = f"{_GMAIL_BASE}/users/me/messages?q={query}&maxResults={effectiveLimit}"
|
params: Dict[str, str] = {"q": query}
|
||||||
result = await _googleGet(self._token, url)
|
labelPath = (path or "").strip("/")
|
||||||
if "error" in result:
|
if labelPath:
|
||||||
return []
|
labelId = await self._resolveLabelId(labelPath)
|
||||||
return [
|
if not labelId:
|
||||||
ExternalEntry(
|
raise ValueError(
|
||||||
name=f"Message {m.get('id', '')}",
|
f"Gmail label not found: '{labelPath}'. Browse the mailbox root ('/') "
|
||||||
path=f"/{m.get('id', '')}",
|
f"to list available labels, or search without a label scope."
|
||||||
isFolder=False,
|
)
|
||||||
metadata={"id": m.get("id")},
|
labelPath = labelId
|
||||||
)
|
params["labelIds"] = labelId
|
||||||
for m in result.get("messages", [])
|
msgIds, totalEstimate = await self._listMessageIds(params, limit=effectiveLimit)
|
||||||
]
|
entries = await self._fetchMessageEntries(
|
||||||
|
msgIds[:self._METADATA_FETCH_CAP], labelPath=labelPath,
|
||||||
|
)
|
||||||
|
if totalEstimate and totalEstimate > len(msgIds):
|
||||||
|
entries.append(ExternalEntry(
|
||||||
|
name=f"(~{totalEstimate} total results estimated, {len(msgIds)} listed)",
|
||||||
|
path=f"/{labelPath or 'search'}/_count", isFolder=False,
|
||||||
|
metadata={"totalEstimate": totalEstimate, "listed": len(msgIds)},
|
||||||
|
))
|
||||||
|
elif len(msgIds) > self._METADATA_FETCH_CAP:
|
||||||
|
entries.append(ExternalEntry(
|
||||||
|
name=f"({len(msgIds)} results listed, metadata shown for first {self._METADATA_FETCH_CAP})",
|
||||||
|
path=f"/{labelPath or 'search'}/_count", isFolder=False,
|
||||||
|
metadata={"listed": len(msgIds), "metadataShown": self._METADATA_FETCH_CAP},
|
||||||
|
))
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
class CalendarAdapter(ServiceAdapter):
|
class CalendarAdapter(ServiceAdapter):
|
||||||
|
|
@ -302,8 +482,7 @@ class CalendarAdapter(ServiceAdapter):
|
||||||
url = f"{_CALENDAR_BASE}/users/me/calendarList?maxResults=250"
|
url = f"{_CALENDAR_BASE}/users/me/calendarList?maxResults=250"
|
||||||
result = await _googleGet(self._token, url)
|
result = await _googleGet(self._token, url)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"Google Calendar list failed: {result['error']}")
|
_raiseGoogleError(result, "Google Calendar list")
|
||||||
return []
|
|
||||||
calendars = result.get("items", [])
|
calendars = result.get("items", [])
|
||||||
if filter:
|
if filter:
|
||||||
f = filter.lower()
|
f = filter.lower()
|
||||||
|
|
@ -331,10 +510,14 @@ class CalendarAdapter(ServiceAdapter):
|
||||||
f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
|
f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
|
||||||
f"?maxResults={effectiveLimit}&orderBy=startTime&singleEvents=true"
|
f"?maxResults={effectiveLimit}&orderBy=startTime&singleEvents=true"
|
||||||
)
|
)
|
||||||
|
# Restrict to a date window when the filter is a date range, so large
|
||||||
|
# multi-year calendars only return the relevant period.
|
||||||
|
timeMin, timeMax = _parseGoogleDateRange(filter)
|
||||||
|
if timeMin and timeMax:
|
||||||
|
url += f"&timeMin={quote(timeMin, safe='')}&timeMax={quote(timeMax, safe='')}"
|
||||||
result = await _googleGet(self._token, url)
|
result = await _googleGet(self._token, url)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"Google Calendar events failed: {result['error']}")
|
_raiseGoogleError(result, "Google Calendar events")
|
||||||
return []
|
|
||||||
events = result.get("items", [])
|
events = result.get("items", [])
|
||||||
return [
|
return [
|
||||||
ExternalEntry(
|
ExternalEntry(
|
||||||
|
|
@ -387,13 +570,23 @@ class CalendarAdapter(ServiceAdapter):
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
calendarId = (path or "").strip("/").split("/", 1)[0] or "primary"
|
calendarId = (path or "").strip("/").split("/", 1)[0] or "primary"
|
||||||
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
|
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
|
||||||
url = (
|
# A date-range query maps to timeMin/timeMax (efficient window fetch);
|
||||||
f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
|
# otherwise fall back to the free-text q parameter.
|
||||||
f"?q={quote(query, safe='')}&maxResults={effectiveLimit}&singleEvents=true"
|
timeMin, timeMax = _parseGoogleDateRange(query)
|
||||||
)
|
if timeMin and timeMax:
|
||||||
|
url = (
|
||||||
|
f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
|
||||||
|
f"?timeMin={quote(timeMin, safe='')}&timeMax={quote(timeMax, safe='')}"
|
||||||
|
f"&maxResults={effectiveLimit}&orderBy=startTime&singleEvents=true"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
url = (
|
||||||
|
f"{_CALENDAR_BASE}/calendars/{quote(calendarId, safe='')}/events"
|
||||||
|
f"?q={quote(query, safe='')}&maxResults={effectiveLimit}&singleEvents=true"
|
||||||
|
)
|
||||||
result = await _googleGet(self._token, url)
|
result = await _googleGet(self._token, url)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
return []
|
_raiseGoogleError(result, "Google Calendar search")
|
||||||
return [
|
return [
|
||||||
ExternalEntry(
|
ExternalEntry(
|
||||||
name=ev.get("summary", "(no title)"),
|
name=ev.get("summary", "(no title)"),
|
||||||
|
|
@ -479,8 +672,7 @@ class ContactsAdapter(ServiceAdapter):
|
||||||
)
|
)
|
||||||
result = await _googleGet(self._token, url)
|
result = await _googleGet(self._token, url)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"Google People connections failed: {result['error']}")
|
_raiseGoogleError(result, "Google People connections")
|
||||||
return []
|
|
||||||
people = result.get("connections", [])
|
people = result.get("connections", [])
|
||||||
else:
|
else:
|
||||||
groupResource = groupRef
|
groupResource = groupRef
|
||||||
|
|
@ -490,8 +682,7 @@ class ContactsAdapter(ServiceAdapter):
|
||||||
)
|
)
|
||||||
grpResult = await _googleGet(self._token, grpUrl)
|
grpResult = await _googleGet(self._token, grpUrl)
|
||||||
if "error" in grpResult:
|
if "error" in grpResult:
|
||||||
logger.warning(f"Google contactGroup detail failed: {grpResult['error']}")
|
_raiseGoogleError(grpResult, "Google contactGroup detail")
|
||||||
return []
|
|
||||||
memberResourceNames = grpResult.get("memberResourceNames") or []
|
memberResourceNames = grpResult.get("memberResourceNames") or []
|
||||||
if not memberResourceNames:
|
if not memberResourceNames:
|
||||||
return []
|
return []
|
||||||
|
|
@ -568,7 +759,7 @@ class ContactsAdapter(ServiceAdapter):
|
||||||
)
|
)
|
||||||
result = await _googleGet(self._token, url)
|
result = await _googleGet(self._token, url)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
return []
|
_raiseGoogleError(result, "Google Contacts search")
|
||||||
entries: List[ExternalEntry] = []
|
entries: List[ExternalEntry] = []
|
||||||
for r in result.get("results", []):
|
for r in result.get("results", []):
|
||||||
p = r.get("person") or {}
|
p = r.get("person") or {}
|
||||||
|
|
@ -581,6 +772,8 @@ class ContactsAdapter(ServiceAdapter):
|
||||||
metadata={
|
metadata={
|
||||||
"id": p.get("resourceName"),
|
"id": p.get("resourceName"),
|
||||||
"emails": [e.get("value") for e in (p.get("emailAddresses") or []) if e.get("value")],
|
"emails": [e.get("value") for e in (p.get("emailAddresses") or []) if e.get("value")],
|
||||||
|
"phones": [pn.get("value") for pn in (p.get("phoneNumbers") or []) if pn.get("value")],
|
||||||
|
"organization": (p.get("organizations") or [{}])[0].get("name") if p.get("organizations") else None,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -96,6 +96,17 @@ async def _infomaniakGet(
|
||||||
return {"error": str(e)}
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def _raiseInfomaniakError(result: Dict[str, Any], ctx: str) -> None:
|
||||||
|
"""Raise a clear error for a failed Infomaniak API response.
|
||||||
|
|
||||||
|
Browse/search must NOT swallow API failures into an empty result list, which
|
||||||
|
masks a real error as 'empty'. Callers wrap these in try/except.
|
||||||
|
"""
|
||||||
|
err = result.get("error") if isinstance(result, dict) else None
|
||||||
|
logger.warning("Infomaniak error (%s): %s", ctx, err or result)
|
||||||
|
raise RuntimeError(f"Infomaniak error ({ctx}): {err or result}")
|
||||||
|
|
||||||
|
|
||||||
async def _infomaniakDownload(
|
async def _infomaniakDownload(
|
||||||
token: str,
|
token: str,
|
||||||
endpoint: str,
|
endpoint: str,
|
||||||
|
|
@ -358,10 +369,7 @@ class KdriveAdapter(ServiceAdapter):
|
||||||
|
|
||||||
result = await _infomaniakGet(self._token, endpoint)
|
result = await _infomaniakGet(self._token, endpoint)
|
||||||
if isinstance(result, dict) and result.get("error"):
|
if isinstance(result, dict) and result.get("error"):
|
||||||
logger.warning(
|
_raiseInfomaniakError(result, f"kDrive list-children {driveId}/{fileId or 'root'}")
|
||||||
f"kDrive list-children {driveId}/{fileId or 'root'} failed: {result['error']}"
|
|
||||||
)
|
|
||||||
return []
|
|
||||||
data = _unwrapData(result)
|
data = _unwrapData(result)
|
||||||
items = data if isinstance(data, list) else data.get("items", []) if isinstance(data, dict) else []
|
items = data if isinstance(data, list) else data.get("items", []) if isinstance(data, dict) else []
|
||||||
|
|
||||||
|
|
@ -426,7 +434,7 @@ class KdriveAdapter(ServiceAdapter):
|
||||||
endpoint = f"/2/drive/{driveId}/files/search?query={query}&per_page={pageSize}"
|
endpoint = f"/2/drive/{driveId}/files/search?query={query}&per_page={pageSize}"
|
||||||
result = await _infomaniakGet(self._token, endpoint)
|
result = await _infomaniakGet(self._token, endpoint)
|
||||||
if isinstance(result, dict) and result.get("error"):
|
if isinstance(result, dict) and result.get("error"):
|
||||||
return []
|
_raiseInfomaniakError(result, "kDrive search")
|
||||||
data = _unwrapData(result)
|
data = _unwrapData(result)
|
||||||
items = data if isinstance(data, list) else data.get("items", []) if isinstance(data, dict) else []
|
items = data if isinstance(data, list) else data.get("items", []) if isinstance(data, dict) else []
|
||||||
|
|
||||||
|
|
@ -495,7 +503,7 @@ class CalendarAdapter(ServiceAdapter):
|
||||||
if not segments:
|
if not segments:
|
||||||
return await self._listCalendars()
|
return await self._listCalendars()
|
||||||
if len(segments) == 1:
|
if len(segments) == 1:
|
||||||
return await self._listEvents(segments[0], limit=limit)
|
return await self._listEvents(segments[0], limit=limit, filter=filter)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def _listCalendars(self) -> List[ExternalEntry]:
|
async def _listCalendars(self) -> List[ExternalEntry]:
|
||||||
|
|
@ -503,8 +511,7 @@ class CalendarAdapter(ServiceAdapter):
|
||||||
self._token, f"{_PIM_PREFIX}/calendar", baseUrl=_CALENDAR_BASE
|
self._token, f"{_PIM_PREFIX}/calendar", baseUrl=_CALENDAR_BASE
|
||||||
)
|
)
|
||||||
if isinstance(result, dict) and result.get("error"):
|
if isinstance(result, dict) and result.get("error"):
|
||||||
logger.warning(f"Calendar list-calendars failed: {result['error']}")
|
_raiseInfomaniakError(result, "Calendar list-calendars")
|
||||||
return []
|
|
||||||
data = _unwrapData(result)
|
data = _unwrapData(result)
|
||||||
calendars = data.get("calendars", []) if isinstance(data, dict) else []
|
calendars = data.get("calendars", []) if isinstance(data, dict) else []
|
||||||
entries: List[ExternalEntry] = []
|
entries: List[ExternalEntry] = []
|
||||||
|
|
@ -527,18 +534,64 @@ class CalendarAdapter(ServiceAdapter):
|
||||||
))
|
))
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
def _eventWindow(self) -> tuple:
|
def _eventWindow(self, filter: Optional[str] = None) -> tuple:
|
||||||
|
# Honour an explicit date range from the agent (e.g. "2026-06" or
|
||||||
|
# "2026-06-01 2026-06-30"), clamped to the vendor's <3 month limit.
|
||||||
|
# Otherwise fall back to the default 90-day browsing window.
|
||||||
|
rng = self._parseFilterWindow(filter)
|
||||||
|
if rng:
|
||||||
|
return rng
|
||||||
now = datetime.now(timezone.utc)
|
now = datetime.now(timezone.utc)
|
||||||
fromStr = (now - timedelta(days=self._PAST_DAYS)).strftime("%Y-%m-%d %H:%M:%S")
|
fromStr = (now - timedelta(days=self._PAST_DAYS)).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
toStr = (now + timedelta(days=self._FUTURE_DAYS)).strftime("%Y-%m-%d %H:%M:%S")
|
toStr = (now + timedelta(days=self._FUTURE_DAYS)).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
return fromStr, toStr
|
return fromStr, toStr
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parseFilterWindow(filter: Optional[str]) -> Optional[tuple]:
|
||||||
|
"""Parse a date range from a filter string into Infomaniak's
|
||||||
|
'Y-m-d H:i:s' from/to window, clamped to <3 months. Returns None when
|
||||||
|
the filter is not a parseable date range."""
|
||||||
|
if not filter:
|
||||||
|
return None
|
||||||
|
iso = re.findall(r'\d{4}-\d{2}-\d{2}', filter)
|
||||||
|
start = end = None
|
||||||
|
if len(iso) >= 2:
|
||||||
|
start, end = iso[0], iso[1]
|
||||||
|
elif len(iso) == 1:
|
||||||
|
start = iso[0]
|
||||||
|
else:
|
||||||
|
month = re.match(r'^(\d{4})-(\d{2})$', filter.strip())
|
||||||
|
if not month:
|
||||||
|
return None
|
||||||
|
year, mon = int(month.group(1)), int(month.group(2))
|
||||||
|
start = f"{year}-{mon:02d}-01"
|
||||||
|
end = f"{year + 1}-01-01" if mon == 12 else f"{year}-{mon + 1:02d}-01"
|
||||||
|
try:
|
||||||
|
startDt = datetime.fromisoformat(start)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
if end:
|
||||||
|
try:
|
||||||
|
endDt = datetime.fromisoformat(end)
|
||||||
|
except ValueError:
|
||||||
|
endDt = startDt + timedelta(days=31)
|
||||||
|
else:
|
||||||
|
endDt = startDt + timedelta(days=31)
|
||||||
|
# Clamp to vendor limit (<3 months).
|
||||||
|
if endDt - startDt > timedelta(days=85):
|
||||||
|
endDt = startDt + timedelta(days=85)
|
||||||
|
return (
|
||||||
|
startDt.strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
|
endDt.strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
|
)
|
||||||
|
|
||||||
async def _listEvents(
|
async def _listEvents(
|
||||||
self,
|
self,
|
||||||
calendarId: str,
|
calendarId: str,
|
||||||
limit: Optional[int],
|
limit: Optional[int],
|
||||||
|
filter: Optional[str] = None,
|
||||||
) -> List[ExternalEntry]:
|
) -> List[ExternalEntry]:
|
||||||
fromStr, toStr = self._eventWindow()
|
fromStr, toStr = self._eventWindow(filter)
|
||||||
endpoint = (
|
endpoint = (
|
||||||
f"{_PIM_PREFIX}/event"
|
f"{_PIM_PREFIX}/event"
|
||||||
f"?calendar_id={calendarId}"
|
f"?calendar_id={calendarId}"
|
||||||
|
|
@ -547,8 +600,7 @@ class CalendarAdapter(ServiceAdapter):
|
||||||
)
|
)
|
||||||
result = await _infomaniakGet(self._token, endpoint, baseUrl=_CALENDAR_BASE)
|
result = await _infomaniakGet(self._token, endpoint, baseUrl=_CALENDAR_BASE)
|
||||||
if isinstance(result, dict) and result.get("error"):
|
if isinstance(result, dict) and result.get("error"):
|
||||||
logger.warning(f"Calendar list-events {calendarId} failed: {result['error']}")
|
_raiseInfomaniakError(result, f"Calendar list-events {calendarId}")
|
||||||
return []
|
|
||||||
data = _unwrapData(result)
|
data = _unwrapData(result)
|
||||||
events = data if isinstance(data, list) else data.get("events", []) if isinstance(data, dict) else []
|
events = data if isinstance(data, list) else data.get("events", []) if isinstance(data, dict) else []
|
||||||
entries: List[ExternalEntry] = []
|
entries: List[ExternalEntry] = []
|
||||||
|
|
@ -626,11 +678,14 @@ class CalendarAdapter(ServiceAdapter):
|
||||||
)
|
)
|
||||||
if not calendars:
|
if not calendars:
|
||||||
return []
|
return []
|
||||||
needle = (query or "").strip().lower()
|
# A date-range query maps directly to the event window; a free-text
|
||||||
|
# query keeps the default window and filters on title/location.
|
||||||
|
dateWindow = self._parseFilterWindow(query)
|
||||||
|
needle = "" if dateWindow else (query or "").strip().lower()
|
||||||
results: List[ExternalEntry] = []
|
results: List[ExternalEntry] = []
|
||||||
for cal in calendars:
|
for cal in calendars:
|
||||||
calId = (cal.metadata or {}).get("id") or cal.path.strip("/")
|
calId = (cal.metadata or {}).get("id") or cal.path.strip("/")
|
||||||
for ev in await self._listEvents(calId, limit=limit):
|
for ev in await self._listEvents(calId, limit=limit, filter=query if dateWindow else None):
|
||||||
hay = " ".join(
|
hay = " ".join(
|
||||||
str(v) for v in (
|
str(v) for v in (
|
||||||
ev.name,
|
ev.name,
|
||||||
|
|
@ -768,8 +823,7 @@ class ContactAdapter(ServiceAdapter):
|
||||||
self._token, f"{_PIM_PREFIX}/addressbook", baseUrl=_CONTACTS_BASE
|
self._token, f"{_PIM_PREFIX}/addressbook", baseUrl=_CONTACTS_BASE
|
||||||
)
|
)
|
||||||
if isinstance(result, dict) and result.get("error"):
|
if isinstance(result, dict) and result.get("error"):
|
||||||
logger.warning(f"Contacts list-addressbooks failed: {result['error']}")
|
_raiseInfomaniakError(result, "Contacts list-addressbooks")
|
||||||
return []
|
|
||||||
data = _unwrapData(result)
|
data = _unwrapData(result)
|
||||||
books = data.get("addressbooks", []) if isinstance(data, dict) else []
|
books = data.get("addressbooks", []) if isinstance(data, dict) else []
|
||||||
entries: List[ExternalEntry] = []
|
entries: List[ExternalEntry] = []
|
||||||
|
|
@ -809,10 +863,7 @@ class ContactAdapter(ServiceAdapter):
|
||||||
)
|
)
|
||||||
result = await _infomaniakGet(self._token, endpoint, baseUrl=_CONTACTS_BASE)
|
result = await _infomaniakGet(self._token, endpoint, baseUrl=_CONTACTS_BASE)
|
||||||
if isinstance(result, dict) and result.get("error"):
|
if isinstance(result, dict) and result.get("error"):
|
||||||
logger.warning(
|
_raiseInfomaniakError(result, f"Contacts list-contacts {addressBookId}")
|
||||||
f"Contacts list-contacts {addressBookId} failed: {result['error']}"
|
|
||||||
)
|
|
||||||
return []
|
|
||||||
data = _unwrapData(result)
|
data = _unwrapData(result)
|
||||||
if isinstance(data, list):
|
if isinstance(data, list):
|
||||||
return [c for c in data if isinstance(c, dict)]
|
return [c for c in data if isinstance(c, dict)]
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ UserConnection (authority=msft).
|
||||||
import logging
|
import logging
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import urllib.parse
|
||||||
from typing import Dict, Any, List, Optional
|
from typing import Dict, Any, List, Optional
|
||||||
|
|
||||||
from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter, DownloadResult
|
from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter, DownloadResult
|
||||||
|
|
@ -69,6 +70,8 @@ async def _makeGraphCall(
|
||||||
"Authorization": f"Bearer {token}",
|
"Authorization": f"Bearer {token}",
|
||||||
"Content-Type": contentType,
|
"Content-Type": contentType,
|
||||||
}
|
}
|
||||||
|
if "$count=true" in endpoint:
|
||||||
|
headers["ConsistencyLevel"] = "eventual"
|
||||||
timeout = aiohttp.ClientTimeout(total=30)
|
timeout = aiohttp.ClientTimeout(total=30)
|
||||||
try:
|
try:
|
||||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
|
|
@ -124,6 +127,18 @@ def _stripGraphBase(url: str) -> str:
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
def _raiseGraphError(result: Dict[str, Any], ctx: str) -> None:
|
||||||
|
"""Raise a clear error for a failed Graph response.
|
||||||
|
|
||||||
|
Browse/search must NOT swallow API failures into an empty result list, which
|
||||||
|
makes a real error look like 'empty directory'. Callers (data-source tools,
|
||||||
|
tree-builder, sync jobs) already wrap these in try/except.
|
||||||
|
"""
|
||||||
|
err = result.get("error") if isinstance(result, dict) else None
|
||||||
|
logger.warning("Graph error (%s): %s", ctx, err or result)
|
||||||
|
raise RuntimeError(f"Graph error ({ctx}): {err or result}")
|
||||||
|
|
||||||
|
|
||||||
def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> ExternalEntry:
|
def _graphItemToExternalEntry(item: Dict[str, Any], basePath: str = "") -> ExternalEntry:
|
||||||
isFolder = "folder" in item
|
isFolder = "folder" in item
|
||||||
# Graph exposes the driveItem content hash as ``eTag`` (quoted) or
|
# Graph exposes the driveItem content hash as ``eTag`` (quoted) or
|
||||||
|
|
@ -189,7 +204,8 @@ class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
while endpoint and len(items) < hardCap:
|
while endpoint and len(items) < hardCap:
|
||||||
result = await self._graphGet(endpoint)
|
result = await self._graphGet(endpoint)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"SharePoint browse failed: {result['error']}")
|
if not items:
|
||||||
|
_raiseGraphError(result, "SharePoint browse")
|
||||||
break
|
break
|
||||||
for raw in result.get("value", []) or []:
|
for raw in result.get("value", []) or []:
|
||||||
items.append(raw)
|
items.append(raw)
|
||||||
|
|
@ -211,8 +227,7 @@ class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
"""Discover accessible SharePoint sites."""
|
"""Discover accessible SharePoint sites."""
|
||||||
result = await self._graphGet("sites?search=*&$top=50")
|
result = await self._graphGet("sites?search=*&$top=50")
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"SharePoint site discovery failed: {result['error']}")
|
_raiseGraphError(result, "SharePoint site discovery")
|
||||||
return []
|
|
||||||
return [
|
return [
|
||||||
ExternalEntry(
|
ExternalEntry(
|
||||||
name=s.get("displayName") or s.get("name", ""),
|
name=s.get("displayName") or s.get("name", ""),
|
||||||
|
|
@ -253,17 +268,37 @@ class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
path: Optional[str] = None,
|
path: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> List[ExternalEntry]:
|
) -> List[ExternalEntry]:
|
||||||
siteId, _ = _parseSharepointPath(path or "")
|
siteId, folderPath = _parseSharepointPath(path or "")
|
||||||
if not siteId:
|
if not siteId:
|
||||||
return []
|
return []
|
||||||
safeQuery = query.replace("'", "''")
|
safeQuery = query.replace("'", "''")
|
||||||
endpoint = f"sites/{siteId}/drive/root/search(q='{safeQuery}')"
|
cleanFolder = (folderPath or "").strip("/")
|
||||||
result = await self._graphGet(endpoint)
|
# Scope the search to the attached folder when one is given, so the agent
|
||||||
if "error" in result:
|
# does not get hits from unrelated parts of the site drive.
|
||||||
return []
|
if cleanFolder:
|
||||||
entries = [_graphItemToExternalEntry(item) for item in result.get("value", [])]
|
endpoint: Optional[str] = f"sites/{siteId}/drive/root:/{cleanFolder}:/search(q='{safeQuery}')?$top=200"
|
||||||
if limit is not None:
|
else:
|
||||||
entries = entries[: max(1, int(limit))]
|
endpoint = f"sites/{siteId}/drive/root/search(q='{safeQuery}')?$top=200"
|
||||||
|
effectiveLimit = int(limit) if limit is not None else None
|
||||||
|
items: List[Dict[str, Any]] = []
|
||||||
|
hardCap = 1000
|
||||||
|
while endpoint and len(items) < hardCap:
|
||||||
|
result = await self._graphGet(endpoint)
|
||||||
|
if "error" in result:
|
||||||
|
if not items:
|
||||||
|
_raiseGraphError(result, "SharePoint search")
|
||||||
|
break
|
||||||
|
for raw in result.get("value", []) or []:
|
||||||
|
items.append(raw)
|
||||||
|
if effectiveLimit is not None and len(items) >= effectiveLimit:
|
||||||
|
break
|
||||||
|
if effectiveLimit is not None and len(items) >= effectiveLimit:
|
||||||
|
break
|
||||||
|
nextLink = result.get("@odata.nextLink")
|
||||||
|
endpoint = _stripGraphBase(nextLink) if nextLink else None
|
||||||
|
entries = [_graphItemToExternalEntry(item) for item in items]
|
||||||
|
if effectiveLimit is not None:
|
||||||
|
entries = entries[: max(1, effectiveLimit)]
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -273,6 +308,50 @@ class SharepointAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
|
|
||||||
_CHARSET_META = '<meta charset="utf-8">'
|
_CHARSET_META = '<meta charset="utf-8">'
|
||||||
|
|
||||||
|
|
||||||
|
def _parseDateRange(filterStr: Optional[str]) -> tuple:
|
||||||
|
"""Parse a date range from a filter/query string.
|
||||||
|
|
||||||
|
Supports two ISO dates ("2026-06-01 2026-06-30"), a single ISO date
|
||||||
|
(treated as a ~31 day window), or a YYYY-MM month pattern. Returns
|
||||||
|
(startDateTime, endDateTime) ISO strings, or (None, None) if not parseable.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
if not filterStr:
|
||||||
|
return (None, None)
|
||||||
|
isoMatch = re.findall(r'\d{4}-\d{2}-\d{2}(?:T[\d:]+)?', filterStr)
|
||||||
|
if len(isoMatch) >= 2:
|
||||||
|
return (isoMatch[0], isoMatch[1])
|
||||||
|
if len(isoMatch) == 1:
|
||||||
|
try:
|
||||||
|
dt = datetime.fromisoformat(isoMatch[0])
|
||||||
|
return (isoMatch[0], (dt + timedelta(days=31)).strftime('%Y-%m-%dT00:00:00'))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
monthMatch = re.match(r'^(\d{4})-(\d{2})$', filterStr.strip())
|
||||||
|
if monthMatch:
|
||||||
|
year, month = int(monthMatch.group(1)), int(monthMatch.group(2))
|
||||||
|
start = f"{year}-{month:02d}-01T00:00:00"
|
||||||
|
if month == 12:
|
||||||
|
end = f"{year + 1}-01-01T00:00:00"
|
||||||
|
else:
|
||||||
|
end = f"{year}-{month + 1:02d}-01T00:00:00"
|
||||||
|
return (start, end)
|
||||||
|
return (None, None)
|
||||||
|
|
||||||
|
|
||||||
|
def _toGraphUtc(isoStr: str) -> str:
|
||||||
|
"""Normalise an ISO date/datetime to a Graph-compatible UTC string
|
||||||
|
(always 'YYYY-MM-DDTHH:MM:SSZ')."""
|
||||||
|
if not isoStr:
|
||||||
|
return isoStr
|
||||||
|
value = isoStr.strip().rstrip("Z")
|
||||||
|
if "T" not in value:
|
||||||
|
value = f"{value}T00:00:00"
|
||||||
|
return f"{value}Z"
|
||||||
|
|
||||||
|
|
||||||
def _ensureHtmlCharset(html: str) -> str:
|
def _ensureHtmlCharset(html: str) -> str:
|
||||||
"""Ensure HTML body has a charset meta tag so Outlook renders UTF-8 correctly."""
|
"""Ensure HTML body has a charset meta tag so Outlook renders UTF-8 correctly."""
|
||||||
if "charset" in html.lower():
|
if "charset" in html.lower():
|
||||||
|
|
@ -350,25 +429,62 @@ class OutlookAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
for f in folders
|
for f in folders
|
||||||
]
|
]
|
||||||
|
|
||||||
folderId = path.strip("/")
|
# The incoming path segment may be a display name ("MGB-Ablage"), a
|
||||||
|
# well-known shortcut ("inbox") or an already-resolved Graph folder id.
|
||||||
|
# Resolve it to a real id first; otherwise Graph rejects the URL with
|
||||||
|
# 400 ErrorInvalidIdMalformed.
|
||||||
|
folderRef = path.strip("/")
|
||||||
|
folderId = await self._resolveFolderId(folderRef)
|
||||||
|
if not folderId:
|
||||||
|
raise ValueError(
|
||||||
|
f"Outlook folder not found: '{folderRef}'. Browse the mailbox root "
|
||||||
|
f"(path '/') or call listMailFolders to obtain a valid folder id."
|
||||||
|
)
|
||||||
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
|
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
|
||||||
pageSize = min(self._PAGE_SIZE, effectiveLimit)
|
pageSize = min(self._PAGE_SIZE, effectiveLimit)
|
||||||
endpoint: Optional[str] = (
|
# Optional date-range filter (e.g. "2026-06" or "2026-06-01 2026-06-30")
|
||||||
f"me/mailFolders/{folderId}/messages"
|
# so only that period is fetched server-side instead of paging the whole
|
||||||
f"?$top={pageSize}&$orderby=receivedDateTime desc"
|
# folder. Falls back to a plain newest-first listing otherwise.
|
||||||
)
|
startDateTime, endDateTime = _parseDateRange(filter)
|
||||||
|
countParam = "&$count=true"
|
||||||
|
if startDateTime and endDateTime:
|
||||||
|
dateFilter = (
|
||||||
|
f"receivedDateTime ge {_toGraphUtc(startDateTime)} and "
|
||||||
|
f"receivedDateTime lt {_toGraphUtc(endDateTime)}"
|
||||||
|
)
|
||||||
|
endpoint: Optional[str] = (
|
||||||
|
f"me/mailFolders/{folderId}/messages"
|
||||||
|
f"?$top={pageSize}&$orderby=receivedDateTime desc"
|
||||||
|
f"&$filter={urllib.parse.quote(dateFilter)}{countParam}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
endpoint = (
|
||||||
|
f"me/mailFolders/{folderId}/messages"
|
||||||
|
f"?$top={pageSize}&$orderby=receivedDateTime desc{countParam}"
|
||||||
|
)
|
||||||
messages: List[Dict[str, Any]] = []
|
messages: List[Dict[str, Any]] = []
|
||||||
|
totalCount: Optional[int] = None
|
||||||
|
firstPage = True
|
||||||
while endpoint and len(messages) < effectiveLimit:
|
while endpoint and len(messages) < effectiveLimit:
|
||||||
result = await self._graphGet(endpoint)
|
result = await self._graphGet(endpoint)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
|
if firstPage:
|
||||||
|
err = result.get("error") or {}
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Graph error listing messages in folder '{folderRef}': "
|
||||||
|
f"{err.get('message') or err}"
|
||||||
|
)
|
||||||
break
|
break
|
||||||
|
if firstPage and "@odata.count" in result:
|
||||||
|
totalCount = result["@odata.count"]
|
||||||
|
firstPage = False
|
||||||
for m in result.get("value", []):
|
for m in result.get("value", []):
|
||||||
messages.append(m)
|
messages.append(m)
|
||||||
if len(messages) >= effectiveLimit:
|
if len(messages) >= effectiveLimit:
|
||||||
break
|
break
|
||||||
nextLink = result.get("@odata.nextLink")
|
nextLink = result.get("@odata.nextLink")
|
||||||
endpoint = _stripGraphBase(nextLink) if nextLink else None
|
endpoint = _stripGraphBase(nextLink) if nextLink else None
|
||||||
return [
|
entries = [
|
||||||
ExternalEntry(
|
ExternalEntry(
|
||||||
name=m.get("subject", "(no subject)"),
|
name=m.get("subject", "(no subject)"),
|
||||||
path=f"{path}/{m.get('id', '')}",
|
path=f"{path}/{m.get('id', '')}",
|
||||||
|
|
@ -382,6 +498,13 @@ class OutlookAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
)
|
)
|
||||||
for m in messages
|
for m in messages
|
||||||
]
|
]
|
||||||
|
if totalCount is not None and totalCount > len(entries):
|
||||||
|
entries.append(ExternalEntry(
|
||||||
|
name=f"({totalCount} total messages in folder, {len(entries)} listed)",
|
||||||
|
path=f"{path}/_count", isFolder=False,
|
||||||
|
metadata={"totalCount": totalCount, "listed": len(entries)},
|
||||||
|
))
|
||||||
|
return entries
|
||||||
|
|
||||||
async def download(self, path: str) -> DownloadResult:
|
async def download(self, path: str) -> DownloadResult:
|
||||||
"""Download a mail message as RFC 822 EML via Graph API $value endpoint."""
|
"""Download a mail message as RFC 822 EML via Graph API $value endpoint."""
|
||||||
|
|
@ -412,14 +535,28 @@ class OutlookAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
path: Optional[str] = None,
|
path: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> List[ExternalEntry]:
|
) -> List[ExternalEntry]:
|
||||||
safeQuery = query.replace("'", "''")
|
safeQuery = query.replace('"', '\\"')
|
||||||
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
|
effectiveLimit = self._DEFAULT_MESSAGE_LIMIT if limit is None else max(1, min(int(limit), self._MAX_MESSAGE_LIMIT))
|
||||||
|
# Scope the search to the attached folder when one is given, so the agent
|
||||||
|
# gets hits only from e.g. the Inbox instead of the whole mailbox. Resolve
|
||||||
|
# the folder reference (display name / well-known / id) to a real id first.
|
||||||
|
folderRef = (path or "").strip("/")
|
||||||
|
base = "me/messages"
|
||||||
|
if folderRef:
|
||||||
|
folderId = await self._resolveFolderId(folderRef)
|
||||||
|
if not folderId:
|
||||||
|
raise ValueError(
|
||||||
|
f"Outlook folder not found: '{folderRef}'. Call listMailFolders "
|
||||||
|
f"to obtain a valid folder id, or search without a folder scope."
|
||||||
|
)
|
||||||
|
base = f"me/mailFolders/{folderId}/messages"
|
||||||
# NOTE: Graph $search does not support $orderby and may return a single
|
# NOTE: Graph $search does not support $orderby and may return a single
|
||||||
# page (no @odata.nextLink). We still pass $top to lift the implicit 25.
|
# page (no @odata.nextLink). We still pass $top to lift the implicit 25.
|
||||||
endpoint = f"me/messages?$search=\"{safeQuery}\"&$top={effectiveLimit}"
|
endpoint = f"{base}?$search=\"{safeQuery}\"&$top={effectiveLimit}"
|
||||||
result = await self._graphGet(endpoint)
|
result = await self._graphGet(endpoint)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
return []
|
err = result.get("error") or {}
|
||||||
|
raise RuntimeError(f"Graph error searching mail: {err.get('message') or err}")
|
||||||
return [
|
return [
|
||||||
ExternalEntry(
|
ExternalEntry(
|
||||||
name=m.get("subject", "(no subject)"),
|
name=m.get("subject", "(no subject)"),
|
||||||
|
|
@ -774,8 +911,7 @@ class TeamsAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
if not cleanPath:
|
if not cleanPath:
|
||||||
result = await self._graphGet("me/joinedTeams")
|
result = await self._graphGet("me/joinedTeams")
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"Teams browse failed: {result['error']}")
|
_raiseGraphError(result, "Teams browse")
|
||||||
return []
|
|
||||||
return [
|
return [
|
||||||
ExternalEntry(
|
ExternalEntry(
|
||||||
name=t.get("displayName", ""),
|
name=t.get("displayName", ""),
|
||||||
|
|
@ -791,7 +927,7 @@ class TeamsAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
if len(parts) == 1:
|
if len(parts) == 1:
|
||||||
result = await self._graphGet(f"teams/{teamId}/channels")
|
result = await self._graphGet(f"teams/{teamId}/channels")
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
return []
|
_raiseGraphError(result, "Teams channels")
|
||||||
return [
|
return [
|
||||||
ExternalEntry(
|
ExternalEntry(
|
||||||
name=ch.get("displayName", ""),
|
name=ch.get("displayName", ""),
|
||||||
|
|
@ -834,18 +970,33 @@ class OneDriveAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
) -> List[ExternalEntry]:
|
) -> List[ExternalEntry]:
|
||||||
cleanPath = (path or "").strip("/")
|
cleanPath = (path or "").strip("/")
|
||||||
if not cleanPath:
|
if not cleanPath:
|
||||||
endpoint = "me/drive/root/children"
|
endpoint: Optional[str] = "me/drive/root/children?$top=200"
|
||||||
else:
|
else:
|
||||||
endpoint = f"me/drive/root:/{cleanPath}:/children"
|
endpoint = f"me/drive/root:/{cleanPath}:/children?$top=200"
|
||||||
|
|
||||||
result = await self._graphGet(endpoint)
|
effectiveLimit = int(limit) if limit is not None else None
|
||||||
if "error" in result:
|
items: List[Dict[str, Any]] = []
|
||||||
return []
|
hardCap = 5000
|
||||||
entries = [_graphItemToExternalEntry(item, path) for item in result.get("value", [])]
|
while endpoint and len(items) < hardCap:
|
||||||
|
result = await self._graphGet(endpoint)
|
||||||
|
if "error" in result:
|
||||||
|
if not items:
|
||||||
|
_raiseGraphError(result, "OneDrive browse")
|
||||||
|
break
|
||||||
|
for raw in result.get("value", []) or []:
|
||||||
|
items.append(raw)
|
||||||
|
if effectiveLimit is not None and len(items) >= effectiveLimit:
|
||||||
|
break
|
||||||
|
if effectiveLimit is not None and len(items) >= effectiveLimit:
|
||||||
|
break
|
||||||
|
nextLink = result.get("@odata.nextLink")
|
||||||
|
endpoint = _stripGraphBase(nextLink) if nextLink else None
|
||||||
|
|
||||||
|
entries = [_graphItemToExternalEntry(item, path) for item in items]
|
||||||
if filter:
|
if filter:
|
||||||
entries = [e for e in entries if _matchFilter(e, filter)]
|
entries = [e for e in entries if _matchFilter(e, filter)]
|
||||||
if limit is not None:
|
if effectiveLimit is not None:
|
||||||
entries = entries[: max(1, int(limit))]
|
entries = entries[: max(1, effectiveLimit)]
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
async def download(self, path: str) -> bytes:
|
async def download(self, path: str) -> bytes:
|
||||||
|
|
@ -868,13 +1019,32 @@ class OneDriveAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> List[ExternalEntry]:
|
) -> List[ExternalEntry]:
|
||||||
safeQuery = query.replace("'", "''")
|
safeQuery = query.replace("'", "''")
|
||||||
endpoint = f"me/drive/root/search(q='{safeQuery}')"
|
cleanPath = (path or "").strip("/")
|
||||||
result = await self._graphGet(endpoint)
|
# Scope to the attached folder if given, otherwise search the whole drive.
|
||||||
if "error" in result:
|
if cleanPath:
|
||||||
return []
|
endpoint: Optional[str] = f"me/drive/root:/{cleanPath}:/search(q='{safeQuery}')?$top=200"
|
||||||
entries = [_graphItemToExternalEntry(item) for item in result.get("value", [])]
|
else:
|
||||||
if limit is not None:
|
endpoint = f"me/drive/root/search(q='{safeQuery}')?$top=200"
|
||||||
entries = entries[: max(1, int(limit))]
|
effectiveLimit = int(limit) if limit is not None else None
|
||||||
|
items: List[Dict[str, Any]] = []
|
||||||
|
hardCap = 1000
|
||||||
|
while endpoint and len(items) < hardCap:
|
||||||
|
result = await self._graphGet(endpoint)
|
||||||
|
if "error" in result:
|
||||||
|
if not items:
|
||||||
|
_raiseGraphError(result, "OneDrive search")
|
||||||
|
break
|
||||||
|
for raw in result.get("value", []) or []:
|
||||||
|
items.append(raw)
|
||||||
|
if effectiveLimit is not None and len(items) >= effectiveLimit:
|
||||||
|
break
|
||||||
|
if effectiveLimit is not None and len(items) >= effectiveLimit:
|
||||||
|
break
|
||||||
|
nextLink = result.get("@odata.nextLink")
|
||||||
|
endpoint = _stripGraphBase(nextLink) if nextLink else None
|
||||||
|
entries = [_graphItemToExternalEntry(item) for item in items]
|
||||||
|
if effectiveLimit is not None:
|
||||||
|
entries = entries[: max(1, effectiveLimit)]
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -908,8 +1078,7 @@ class CalendarAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
if not cleanPath:
|
if not cleanPath:
|
||||||
result = await self._graphGet("me/calendars?$top=100")
|
result = await self._graphGet("me/calendars?$top=100")
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"MSFT Calendar list failed: {result['error']}")
|
_raiseGraphError(result, "MSFT Calendar list")
|
||||||
return []
|
|
||||||
calendars = result.get("value", [])
|
calendars = result.get("value", [])
|
||||||
if filter:
|
if filter:
|
||||||
calendars = [c for c in calendars if filter.lower() in (c.get("name") or "").lower()]
|
calendars = [c for c in calendars if filter.lower() in (c.get("name") or "").lower()]
|
||||||
|
|
@ -929,7 +1098,15 @@ class CalendarAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
for c in calendars
|
for c in calendars
|
||||||
]
|
]
|
||||||
|
|
||||||
calendarId = cleanPath.split("/", 1)[0]
|
# The path segment may be a calendar display name or an already-resolved
|
||||||
|
# calendar id; resolve first so a name does not produce a malformed URL.
|
||||||
|
calendarRef = cleanPath.split("/", 1)[0]
|
||||||
|
calendarId = await self._resolveCalendarId(calendarRef)
|
||||||
|
if not calendarId:
|
||||||
|
raise ValueError(
|
||||||
|
f"Calendar not found: '{calendarRef}'. Browse the root ('/') to list "
|
||||||
|
f"calendars and use the returned id."
|
||||||
|
)
|
||||||
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
|
effectiveLimit = self._DEFAULT_EVENT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_EVENT_LIMIT))
|
||||||
pageSize = min(self._PAGE_SIZE, effectiveLimit)
|
pageSize = min(self._PAGE_SIZE, effectiveLimit)
|
||||||
|
|
||||||
|
|
@ -952,7 +1129,8 @@ class CalendarAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
while endpoint and len(events) < effectiveLimit:
|
while endpoint and len(events) < effectiveLimit:
|
||||||
result = await self._graphGet(endpoint)
|
result = await self._graphGet(endpoint)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"MSFT Calendar events failed: {result['error']}")
|
if not events:
|
||||||
|
_raiseGraphError(result, "MSFT Calendar events")
|
||||||
break
|
break
|
||||||
for ev in result.get("value", []):
|
for ev in result.get("value", []):
|
||||||
events.append(ev)
|
events.append(ev)
|
||||||
|
|
@ -980,32 +1158,34 @@ class CalendarAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
for ev in events
|
for ev in events
|
||||||
]
|
]
|
||||||
|
|
||||||
|
async def _resolveCalendarId(self, ref: str) -> Optional[str]:
|
||||||
|
"""Resolve a calendar reference (display name / 'default' / id) to a Graph
|
||||||
|
calendar id. Returns None if nothing matches."""
|
||||||
|
if not ref:
|
||||||
|
return None
|
||||||
|
r = ref.strip()
|
||||||
|
# Heuristic: Graph ids are long URL-safe strings without spaces.
|
||||||
|
if len(r) > 60 and " " not in r:
|
||||||
|
return r
|
||||||
|
result = await self._graphGet("me/calendars?$top=100")
|
||||||
|
if "error" in result:
|
||||||
|
_raiseGraphError(result, "MSFT Calendar list")
|
||||||
|
cals = result.get("value", [])
|
||||||
|
for c in cals:
|
||||||
|
if c.get("id") == r:
|
||||||
|
return r
|
||||||
|
if r.lower() in ("default", "primary", "calendar", "kalender"):
|
||||||
|
for c in cals:
|
||||||
|
if c.get("isDefaultCalendar"):
|
||||||
|
return c.get("id")
|
||||||
|
for c in cals:
|
||||||
|
if (c.get("name") or "").strip().lower() == r.lower():
|
||||||
|
return c.get("id")
|
||||||
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _parseDateRange(filterStr: Optional[str]) -> tuple:
|
def _parseDateRange(filterStr: Optional[str]) -> tuple:
|
||||||
"""Parse date range from filter string. Supports ISO dates or YYYY-MM patterns."""
|
return _parseDateRange(filterStr)
|
||||||
import re
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
if not filterStr:
|
|
||||||
return (None, None)
|
|
||||||
isoMatch = re.findall(r'\d{4}-\d{2}-\d{2}(?:T[\d:]+)?', filterStr)
|
|
||||||
if len(isoMatch) >= 2:
|
|
||||||
return (isoMatch[0], isoMatch[1])
|
|
||||||
if len(isoMatch) == 1:
|
|
||||||
try:
|
|
||||||
dt = datetime.fromisoformat(isoMatch[0])
|
|
||||||
return (isoMatch[0], (dt + timedelta(days=31)).strftime('%Y-%m-%dT00:00:00'))
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
monthMatch = re.match(r'^(\d{4})-(\d{2})$', filterStr.strip())
|
|
||||||
if monthMatch:
|
|
||||||
year, month = int(monthMatch.group(1)), int(monthMatch.group(2))
|
|
||||||
start = f"{year}-{month:02d}-01T00:00:00"
|
|
||||||
if month == 12:
|
|
||||||
end = f"{year + 1}-01-01T00:00:00"
|
|
||||||
else:
|
|
||||||
end = f"{year}-{month + 1:02d}-01T00:00:00"
|
|
||||||
return (start, end)
|
|
||||||
return (None, None)
|
|
||||||
|
|
||||||
async def download(self, path: str) -> DownloadResult:
|
async def download(self, path: str) -> DownloadResult:
|
||||||
cleanPath = (path or "").strip("/")
|
cleanPath = (path or "").strip("/")
|
||||||
|
|
@ -1050,7 +1230,7 @@ class CalendarAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
|
|
||||||
result = await self._graphGet(endpoint)
|
result = await self._graphGet(endpoint)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
return []
|
_raiseGraphError(result, "MSFT Calendar search")
|
||||||
calendarId = (path or "").strip("/").split("/")[0] if path else "search"
|
calendarId = (path or "").strip("/").split("/")[0] if path else "search"
|
||||||
return [
|
return [
|
||||||
ExternalEntry(
|
ExternalEntry(
|
||||||
|
|
@ -1126,7 +1306,15 @@ class ContactsAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
logger.warning(f"MSFT contactFolders list failed: {result['error']}")
|
logger.warning(f"MSFT contactFolders list failed: {result['error']}")
|
||||||
return folders
|
return folders
|
||||||
|
|
||||||
folderId = cleanPath.split("/", 1)[0]
|
# The path segment may be a contact-folder display name or an already-
|
||||||
|
# resolved folder id (or the virtual 'default'); resolve first.
|
||||||
|
folderRef = cleanPath.split("/", 1)[0]
|
||||||
|
folderId = await self._resolveContactFolderId(folderRef)
|
||||||
|
if not folderId:
|
||||||
|
raise ValueError(
|
||||||
|
f"Contact folder not found: '{folderRef}'. Browse the root ('/') to "
|
||||||
|
f"list folders and use the returned id."
|
||||||
|
)
|
||||||
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
|
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
|
||||||
pageSize = min(self._PAGE_SIZE, effectiveLimit)
|
pageSize = min(self._PAGE_SIZE, effectiveLimit)
|
||||||
if folderId == self._DEFAULT_FOLDER_ID:
|
if folderId == self._DEFAULT_FOLDER_ID:
|
||||||
|
|
@ -1138,7 +1326,8 @@ class ContactsAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
while endpoint and len(contacts) < effectiveLimit:
|
while endpoint and len(contacts) < effectiveLimit:
|
||||||
result = await self._graphGet(endpoint)
|
result = await self._graphGet(endpoint)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"MSFT contacts list failed: {result['error']}")
|
if not contacts:
|
||||||
|
_raiseGraphError(result, "MSFT contacts list")
|
||||||
break
|
break
|
||||||
for c in result.get("value", []):
|
for c in result.get("value", []):
|
||||||
contacts.append(c)
|
contacts.append(c)
|
||||||
|
|
@ -1166,6 +1355,28 @@ class ContactsAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
for c in contacts
|
for c in contacts
|
||||||
]
|
]
|
||||||
|
|
||||||
|
async def _resolveContactFolderId(self, ref: str) -> Optional[str]:
|
||||||
|
"""Resolve a contact-folder reference (display name / 'default' / id) to a
|
||||||
|
folder id. Returns None if nothing matches."""
|
||||||
|
if not ref:
|
||||||
|
return None
|
||||||
|
r = ref.strip()
|
||||||
|
if r == self._DEFAULT_FOLDER_ID or r.lower() in ("kontakte", "contacts", "default"):
|
||||||
|
return self._DEFAULT_FOLDER_ID
|
||||||
|
# Heuristic: Graph ids are long URL-safe strings without spaces.
|
||||||
|
if len(r) > 60 and " " not in r:
|
||||||
|
return r
|
||||||
|
result = await self._graphGet("me/contactFolders?$top=100")
|
||||||
|
if "error" in result:
|
||||||
|
_raiseGraphError(result, "MSFT contactFolders list")
|
||||||
|
for f in result.get("value", []):
|
||||||
|
if f.get("id") == r:
|
||||||
|
return r
|
||||||
|
for f in result.get("value", []):
|
||||||
|
if (f.get("displayName") or "").strip().lower() == r.lower():
|
||||||
|
return f.get("id")
|
||||||
|
return None
|
||||||
|
|
||||||
async def download(self, path: str) -> DownloadResult:
|
async def download(self, path: str) -> DownloadResult:
|
||||||
cleanPath = (path or "").strip("/")
|
cleanPath = (path or "").strip("/")
|
||||||
if "/" not in cleanPath:
|
if "/" not in cleanPath:
|
||||||
|
|
@ -1193,19 +1404,27 @@ class ContactsAdapter(_GraphApiMixin, ServiceAdapter):
|
||||||
path: Optional[str] = None,
|
path: Optional[str] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
) -> List[ExternalEntry]:
|
) -> List[ExternalEntry]:
|
||||||
safeQuery = query.replace("'", "''")
|
safeQuery = query.replace('"', '\\"')
|
||||||
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
|
effectiveLimit = self._DEFAULT_CONTACT_LIMIT if limit is None else max(1, min(int(limit), self._MAX_CONTACT_LIMIT))
|
||||||
endpoint = f"me/contacts?$search=\"{safeQuery}\"&$top={effectiveLimit}"
|
endpoint = f"me/contacts?$search=\"{safeQuery}\"&$top={effectiveLimit}"
|
||||||
result = await self._graphGet(endpoint)
|
result = await self._graphGet(endpoint)
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
return []
|
_raiseGraphError(result, "MSFT contacts search")
|
||||||
return [
|
return [
|
||||||
ExternalEntry(
|
ExternalEntry(
|
||||||
name=c.get("displayName") or _personLabel(c) or "(no name)",
|
name=c.get("displayName") or _personLabel(c) or "(no name)",
|
||||||
path=f"/search/{c.get('id', '')}",
|
path=f"/search/{c.get('id', '')}",
|
||||||
isFolder=False,
|
isFolder=False,
|
||||||
mimeType="text/vcard",
|
mimeType="text/vcard",
|
||||||
metadata={"id": c.get("id")},
|
metadata={
|
||||||
|
"id": c.get("id"),
|
||||||
|
"givenName": c.get("givenName"),
|
||||||
|
"surname": c.get("surname"),
|
||||||
|
"companyName": c.get("companyName"),
|
||||||
|
"emailAddresses": [e.get("address") for e in (c.get("emailAddresses") or []) if e.get("address")],
|
||||||
|
"businessPhones": c.get("businessPhones") or [],
|
||||||
|
"mobilePhone": c.get("mobilePhone"),
|
||||||
|
},
|
||||||
)
|
)
|
||||||
for c in result.get("value", [])
|
for c in result.get("value", [])
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -111,7 +111,6 @@ class ChatMessage(PowerOnModel):
|
||||||
class WorkflowModeEnum(str, Enum):
|
class WorkflowModeEnum(str, Enum):
|
||||||
WORKFLOW_DYNAMIC = "Dynamic"
|
WORKFLOW_DYNAMIC = "Dynamic"
|
||||||
WORKFLOW_AUTOMATION = "Automation"
|
WORKFLOW_AUTOMATION = "Automation"
|
||||||
WORKFLOW_CHATBOT = "Chatbot"
|
|
||||||
|
|
||||||
@i18nModel("Chat-Workflow")
|
@i18nModel("Chat-Workflow")
|
||||||
class ChatWorkflow(PowerOnModel):
|
class ChatWorkflow(PowerOnModel):
|
||||||
|
|
@ -169,10 +168,6 @@ class ChatWorkflow(PowerOnModel):
|
||||||
"value": WorkflowModeEnum.WORKFLOW_AUTOMATION.value,
|
"value": WorkflowModeEnum.WORKFLOW_AUTOMATION.value,
|
||||||
"label": "Automation",
|
"label": "Automation",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"value": WorkflowModeEnum.WORKFLOW_CHATBOT.value,
|
|
||||||
"label": "Chatbot",
|
|
||||||
},
|
|
||||||
]})
|
]})
|
||||||
maxSteps: int = Field(default=10, description="Maximum number of iterations in dynamic mode", json_schema_extra={"label": "Max. Schritte", "frontend_type": "integer", "frontend_readonly": False, "frontend_required": False})
|
maxSteps: int = Field(default=10, description="Maximum number of iterations in dynamic mode", json_schema_extra={"label": "Max. Schritte", "frontend_type": "integer", "frontend_readonly": False, "frontend_required": False})
|
||||||
expectedFormats: Optional[List[str]] = Field(None, description="List of expected file format extensions from user request (e.g., ['xlsx', 'pdf']). Extracted during intent analysis.", json_schema_extra={"label": "Erwartete Formate", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
|
expectedFormats: Optional[List[str]] = Field(None, description="List of expected file format extensions from user request (e.g., ['xlsx', 'pdf']). Extracted during intent analysis.", json_schema_extra={"label": "Erwartete Formate", "frontend_type": "text", "frontend_readonly": True, "frontend_required": False})
|
||||||
|
|
|
||||||
|
|
@ -77,19 +77,14 @@ class DataSource(PowerOnModel):
|
||||||
description="Timestamp of last successful RAG indexing run",
|
description="Timestamp of last successful RAG indexing run",
|
||||||
json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"},
|
json_schema_extra={"label": "Letzte Indexierung", "frontend_type": "timestamp"},
|
||||||
)
|
)
|
||||||
|
# scope was removed (privacy, 2026-06). Personal sources must not be
|
||||||
|
# shared across scopes. Only Files (folder-files) retain scope.
|
||||||
|
# The DB column is kept as deprecated-nullable to avoid a migration;
|
||||||
|
# it is never read or written by UDB/ingest/knowledge anymore.
|
||||||
scope: Optional[str] = Field(
|
scope: Optional[str] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description=(
|
description="DEPRECATED (2026-06, privacy). Always None. Use Files scope instead.",
|
||||||
"Data visibility scope with inherit semantics. "
|
json_schema_extra={"frontend_readonly": True, "frontend_hidden": True},
|
||||||
"None = inherit; values: personal, featureInstance, mandate, global. "
|
|
||||||
"Cascade-reset on parent toggle."
|
|
||||||
),
|
|
||||||
json_schema_extra={"label": "Sichtbarkeit", "frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [
|
|
||||||
{"value": "personal", "label": "Persönlich"},
|
|
||||||
{"value": "featureInstance", "label": "Feature-Instanz"},
|
|
||||||
{"value": "mandate", "label": "Mandant"},
|
|
||||||
{"value": "global", "label": "Global"},
|
|
||||||
]},
|
|
||||||
)
|
)
|
||||||
neutralize: Optional[bool] = Field(
|
neutralize: Optional[bool] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
|
|
|
||||||
|
|
@ -12,9 +12,9 @@ from modules.datamodels.datamodelUtils import TextMultilingual
|
||||||
|
|
||||||
@i18nModel("Feature")
|
@i18nModel("Feature")
|
||||||
class Feature(PowerOnModel):
|
class Feature(PowerOnModel):
|
||||||
"""Feature-Definition (global, z.B. 'trustee', 'chatbot'). Verfuegbare Funktionalitaeten der Plattform."""
|
"""Feature-Definition (global, z.B. 'trustee', 'commcoach'). Verfuegbare Funktionalitaeten der Plattform."""
|
||||||
code: str = Field(
|
code: str = Field(
|
||||||
description="Unique feature code (Primary Key), z.B. 'trustee', 'chatbot'",
|
description="Unique feature code (Primary Key), z.B. 'trustee', 'commcoach'",
|
||||||
json_schema_extra={"label": "Code", "frontend_type": "text", "frontend_readonly": False, "frontend_required": True}
|
json_schema_extra={"label": "Code", "frontend_type": "text", "frontend_readonly": False, "frontend_required": True}
|
||||||
)
|
)
|
||||||
label: TextMultilingual = Field(
|
label: TextMultilingual = Field(
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,9 @@ supportedSectionTypes: List[str] = [
|
||||||
"paragraph",
|
"paragraph",
|
||||||
"code_block",
|
"code_block",
|
||||||
"image",
|
"image",
|
||||||
|
# Layout primitives (A3): type-specific document layout.
|
||||||
|
"cover_page", # centered title page (subtitle/author/date/logo), ends with page break
|
||||||
|
"image_grid", # N-column arrangement of images (marketing-style layouts)
|
||||||
]
|
]
|
||||||
|
|
||||||
class InlineRun(TypedDict, total=False):
|
class InlineRun(TypedDict, total=False):
|
||||||
|
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""
|
|
||||||
Chatbot feature - LangGraph-based chatbot implementation.
|
|
||||||
Lazy-loaded to avoid importing langgraph/langchain at boot time.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
async def chatProcess(*args, **kwargs):
|
|
||||||
"""Lazy wrapper - imports the real chatProcess on first call to defer langgraph loading."""
|
|
||||||
from .service import chatProcess as _chatProcess
|
|
||||||
return await _chatProcess(*args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['chatProcess']
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""Bridges to external systems (AI models, database, tools)."""
|
|
||||||
|
|
@ -1,727 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""
|
|
||||||
AI Center to LangChain bridge.
|
|
||||||
Implements LangChain BaseChatModel interface using AI center models.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import asyncio
|
|
||||||
import time
|
|
||||||
from typing import Any, AsyncIterator, Callable, Dict, List, Optional
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from langchain_core.language_models.chat_models import BaseChatModel
|
|
||||||
from langchain_core.messages import (
|
|
||||||
BaseMessage,
|
|
||||||
HumanMessage,
|
|
||||||
SystemMessage,
|
|
||||||
AIMessage,
|
|
||||||
ToolMessage,
|
|
||||||
convert_to_openai_messages,
|
|
||||||
)
|
|
||||||
from langchain_core.outputs import ChatGeneration, ChatResult
|
|
||||||
from langchain_core.runnables import RunnableConfig
|
|
||||||
|
|
||||||
from modules.aicore.aicoreModelRegistry import modelRegistry
|
|
||||||
from modules.aicore.aicoreModelSelector import modelSelector
|
|
||||||
from modules.datamodels.datamodelAi import (
|
|
||||||
AiModel,
|
|
||||||
AiModelCall,
|
|
||||||
AiModelResponse,
|
|
||||||
AiCallResponse,
|
|
||||||
AiCallOptions,
|
|
||||||
OperationTypeEnum,
|
|
||||||
ProcessingModeEnum,
|
|
||||||
)
|
|
||||||
from modules.datamodels.datamodelUam import User
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Workflow-level store for allowed_providers and RBAC context (survives LangGraph/bind_tools
|
|
||||||
# execution context where instance attributes may be lost when model is wrapped or serialized)
|
|
||||||
_workflow_allowed_providers: Dict[str, List[str]] = {}
|
|
||||||
_workflow_rbac_context: Dict[str, tuple] = {} # workflow_id -> (mandateId, featureInstanceId)
|
|
||||||
|
|
||||||
|
|
||||||
def clear_workflow_allowed_providers(workflow_id: str) -> None:
|
|
||||||
"""Remove workflow from registry when stream completes to avoid memory growth."""
|
|
||||||
_workflow_allowed_providers.pop(workflow_id, None)
|
|
||||||
|
|
||||||
|
|
||||||
class AICenterChatModel(BaseChatModel):
|
|
||||||
"""
|
|
||||||
LangChain-compatible chat model that uses AI center models.
|
|
||||||
Bridges AI center model selection and calling to LangChain's BaseChatModel interface.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
user: User,
|
|
||||||
operation_type: OperationTypeEnum = OperationTypeEnum.DATA_ANALYSE,
|
|
||||||
processing_mode: ProcessingModeEnum = ProcessingModeEnum.DETAILED,
|
|
||||||
billing_callback: Optional[Callable[[AiCallResponse], None]] = None,
|
|
||||||
workflow_id: Optional[str] = None,
|
|
||||||
allowed_providers: Optional[List[str]] = None,
|
|
||||||
prefer_fast_model: bool = False,
|
|
||||||
mandate_id: Optional[str] = None,
|
|
||||||
feature_instance_id: Optional[str] = None,
|
|
||||||
**kwargs
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Initialize the AI center chat model bridge.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
user: Current user for RBAC and model selection
|
|
||||||
operation_type: Operation type for model selection
|
|
||||||
processing_mode: Processing mode for model selection
|
|
||||||
billing_callback: Optional callback invoked after each _agenerate with AiCallResponse for billing
|
|
||||||
workflow_id: Optional workflow/conversation ID for billing context
|
|
||||||
allowed_providers: Optional list of allowed provider connector types (empty/None = all)
|
|
||||||
prefer_fast_model: When True, strongly prefer faster models (e.g. gpt-4o-mini for planner)
|
|
||||||
**kwargs: Additional arguments passed to BaseChatModel
|
|
||||||
"""
|
|
||||||
super().__init__(**kwargs)
|
|
||||||
# Use object.__setattr__ to bypass Pydantic validation for custom attributes
|
|
||||||
object.__setattr__(self, "user", user)
|
|
||||||
object.__setattr__(self, "operation_type", operation_type)
|
|
||||||
object.__setattr__(self, "processing_mode", processing_mode)
|
|
||||||
object.__setattr__(self, "_selected_model", None)
|
|
||||||
object.__setattr__(self, "_billing_callback", billing_callback)
|
|
||||||
object.__setattr__(self, "_workflow_id", workflow_id)
|
|
||||||
object.__setattr__(self, "_allowed_providers", allowed_providers or [])
|
|
||||||
object.__setattr__(self, "_prefer_fast_model", prefer_fast_model)
|
|
||||||
object.__setattr__(self, "_mandate_id", mandate_id)
|
|
||||||
object.__setattr__(self, "_feature_instance_id", feature_instance_id)
|
|
||||||
# Store in workflow-level registry so it survives when instance attrs are lost (e.g. bind_tools)
|
|
||||||
if workflow_id and allowed_providers:
|
|
||||||
_workflow_allowed_providers[workflow_id] = list(allowed_providers)
|
|
||||||
if workflow_id and (mandate_id is not None or feature_instance_id is not None):
|
|
||||||
_workflow_rbac_context[workflow_id] = (mandate_id, feature_instance_id)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _llm_type(self) -> str:
|
|
||||||
"""Return type of LLM."""
|
|
||||||
return "aicenter"
|
|
||||||
|
|
||||||
def _select_model(self, messages: List[BaseMessage]) -> AiModel:
|
|
||||||
"""
|
|
||||||
Select the best AI center model for the given messages.
|
|
||||||
Uses caching to avoid repeated model selection within same session.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
messages: List of LangChain messages
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Selected AI model
|
|
||||||
"""
|
|
||||||
# Return cached model if already selected (significant performance improvement)
|
|
||||||
if self._selected_model is not None:
|
|
||||||
return self._selected_model
|
|
||||||
|
|
||||||
# Convert messages to prompt/context format for model selector
|
|
||||||
prompt_parts = []
|
|
||||||
context_parts = []
|
|
||||||
|
|
||||||
for msg in messages:
|
|
||||||
if isinstance(msg, SystemMessage):
|
|
||||||
prompt_parts.append(msg.content)
|
|
||||||
elif isinstance(msg, HumanMessage):
|
|
||||||
prompt_parts.append(msg.content)
|
|
||||||
elif isinstance(msg, AIMessage):
|
|
||||||
context_parts.append(msg.content)
|
|
||||||
elif isinstance(msg, ToolMessage):
|
|
||||||
context_parts.append(f"Tool {msg.name}: {msg.content}")
|
|
||||||
|
|
||||||
prompt = "\n".join(prompt_parts)
|
|
||||||
context = "\n".join(context_parts) if context_parts else ""
|
|
||||||
|
|
||||||
# Get available models with RBAC filtering
|
|
||||||
# Use cached/singleton interfaces for better performance
|
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
|
||||||
|
|
||||||
workflow_id = getattr(self, "_workflow_id", None)
|
|
||||||
rootInterface = getRootInterface()
|
|
||||||
rbac_instance = rootInterface.rbac
|
|
||||||
|
|
||||||
mandate_id = getattr(self, "_mandate_id", None)
|
|
||||||
feature_instance_id = getattr(self, "_feature_instance_id", None)
|
|
||||||
if workflow_id and (mandate_id is None and feature_instance_id is None):
|
|
||||||
ctx = _workflow_rbac_context.get(workflow_id)
|
|
||||||
if ctx:
|
|
||||||
mandate_id, feature_instance_id = ctx
|
|
||||||
available_models = modelRegistry.getAvailableModels(
|
|
||||||
currentUser=self.user,
|
|
||||||
rbacInstance=rbac_instance,
|
|
||||||
mandateId=mandate_id,
|
|
||||||
featureInstanceId=feature_instance_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Allowed providers: instance attr or workflow store (lost in LangGraph/bind_tools context)
|
|
||||||
allowed = (
|
|
||||||
(_workflow_allowed_providers.get(workflow_id) if workflow_id else None)
|
|
||||||
or getattr(self, '_allowed_providers', None)
|
|
||||||
or []
|
|
||||||
)
|
|
||||||
if allowed:
|
|
||||||
logger.info(f"AICenterChatModel _select_model: applying allowedProviders={allowed}")
|
|
||||||
filtered = [m for m in available_models if m.connectorType in allowed]
|
|
||||||
if filtered:
|
|
||||||
available_models = filtered
|
|
||||||
else:
|
|
||||||
logger.warning(f"No models match allowedProviders {allowed}, using all RBAC-permitted models")
|
|
||||||
options = AiCallOptions(
|
|
||||||
operationType=self.operation_type,
|
|
||||||
processingMode=self.processing_mode,
|
|
||||||
allowedProviders=allowed if allowed else None,
|
|
||||||
preferFastModel=getattr(self, "_prefer_fast_model", False),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Select model
|
|
||||||
selected_model = modelSelector.selectModel(
|
|
||||||
prompt=prompt,
|
|
||||||
context=context,
|
|
||||||
options=options,
|
|
||||||
availableModels=available_models
|
|
||||||
)
|
|
||||||
|
|
||||||
if not selected_model:
|
|
||||||
raise ValueError(f"No suitable model found for operation type {self.operation_type.value}")
|
|
||||||
|
|
||||||
logger.info(f"Selected AI center model: {selected_model.displayName} ({selected_model.name})")
|
|
||||||
object.__setattr__(self, "_selected_model", selected_model)
|
|
||||||
return selected_model
|
|
||||||
|
|
||||||
def _convert_messages_to_ai_format(self, messages: List[BaseMessage]) -> List[Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
Convert LangChain messages to AI center format (OpenAI-style).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
messages: List of LangChain messages
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of messages in OpenAI format
|
|
||||||
"""
|
|
||||||
# Use LangChain's built-in conversion
|
|
||||||
openai_messages = convert_to_openai_messages(messages)
|
|
||||||
return openai_messages
|
|
||||||
|
|
||||||
def _convert_ai_response_to_langchain(
|
|
||||||
self,
|
|
||||||
response: AiModelResponse,
|
|
||||||
tool_calls: Optional[List[Dict[str, Any]]] = None
|
|
||||||
) -> AIMessage:
|
|
||||||
"""
|
|
||||||
Convert AI center response to LangChain AIMessage.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
response: AI center response
|
|
||||||
tool_calls: Optional tool calls from the response (format: [{"id": "...", "name": "...", "args": {...}}])
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
LangChain AIMessage with tool_calls if present
|
|
||||||
"""
|
|
||||||
# LangChain expects tool_calls in format: [{"id": "...", "name": "...", "args": {...}}]
|
|
||||||
# The tool_calls parameter should already be in this format
|
|
||||||
|
|
||||||
kwargs = {}
|
|
||||||
if tool_calls:
|
|
||||||
kwargs["tool_calls"] = tool_calls
|
|
||||||
|
|
||||||
return AIMessage(content=response.content or "", **kwargs)
|
|
||||||
|
|
||||||
def _generate(
|
|
||||||
self,
|
|
||||||
messages: List[BaseMessage],
|
|
||||||
stop: Optional[List[str]] = None,
|
|
||||||
run_manager: Optional[Any] = None,
|
|
||||||
**kwargs: Any,
|
|
||||||
) -> ChatResult:
|
|
||||||
"""
|
|
||||||
Synchronous generate method required by BaseChatModel.
|
|
||||||
Wraps the async _agenerate method.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
messages: List of LangChain messages
|
|
||||||
stop: Optional stop sequences
|
|
||||||
run_manager: Optional callback manager
|
|
||||||
**kwargs: Additional arguments
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ChatResult with generations
|
|
||||||
"""
|
|
||||||
# Try to get the current event loop
|
|
||||||
try:
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
if loop.is_running():
|
|
||||||
# If we're in an async context, raise an error
|
|
||||||
raise RuntimeError(
|
|
||||||
"AICenterChatModel._generate() called from async context. "
|
|
||||||
"Use _agenerate() instead."
|
|
||||||
)
|
|
||||||
except RuntimeError:
|
|
||||||
# No event loop, we can create one
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Run the async method synchronously
|
|
||||||
return asyncio.run(self._agenerate(messages, stop=stop, run_manager=run_manager, **kwargs))
|
|
||||||
|
|
||||||
async def _call_openai_streaming(
|
|
||||||
self,
|
|
||||||
ai_messages: List[dict],
|
|
||||||
run_manager: Optional[Any],
|
|
||||||
model_call: "AiModelCall",
|
|
||||||
input_bytes: int,
|
|
||||||
start_time: float,
|
|
||||||
) -> "AiModelResponse":
|
|
||||||
"""Call OpenAI/Ollama with stream=True, emit tokens via run_manager, return full response."""
|
|
||||||
import httpx
|
|
||||||
import json as _json
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
|
||||||
|
|
||||||
if self._selected_model.connectorType == "openai":
|
|
||||||
api_url = getattr(self._selected_model, "apiUrl", None) or "https://api.openai.com/v1/chat/completions"
|
|
||||||
api_key = APP_CONFIG.get("Connector_AiOpenai_API_SECRET")
|
|
||||||
if not api_key:
|
|
||||||
raise ValueError("OpenAI API key not configured")
|
|
||||||
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
|
||||||
ollama_model = self._selected_model.name
|
|
||||||
else:
|
|
||||||
base_url = getattr(self._selected_model, "apiUrl", "").replace("/api/analyze", "")
|
|
||||||
api_url = f"{base_url.rstrip('/')}/v1/chat/completions"
|
|
||||||
api_key = APP_CONFIG.get("Connector_AiPrivateLlm_API_SECRET")
|
|
||||||
headers = {"Content-Type": "application/json"}
|
|
||||||
if api_key:
|
|
||||||
headers["X-API-Key"] = api_key
|
|
||||||
ollama_model = getattr(self._selected_model, "version", None) or self._selected_model.name
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
"model": ollama_model,
|
|
||||||
"messages": ai_messages,
|
|
||||||
"temperature": self._selected_model.temperature,
|
|
||||||
"max_tokens": self._selected_model.maxTokens,
|
|
||||||
"stream": True,
|
|
||||||
}
|
|
||||||
content_parts: List[str] = []
|
|
||||||
async with httpx.AsyncClient(timeout=600.0) as client:
|
|
||||||
async with client.stream("POST", api_url, headers=headers, json=payload) as resp:
|
|
||||||
if resp.status_code != 200:
|
|
||||||
raise ValueError(f"OpenAI stream error: {resp.status_code} - {await resp.aread()}")
|
|
||||||
buffer = ""
|
|
||||||
async for chunk in resp.aiter_text():
|
|
||||||
buffer += chunk
|
|
||||||
while "\n" in buffer or "\r\n" in buffer:
|
|
||||||
line, _, buffer = buffer.partition("\n")
|
|
||||||
line = line.strip()
|
|
||||||
if line.startswith("data: "):
|
|
||||||
data_str = line[6:].strip()
|
|
||||||
if data_str == "[DONE]":
|
|
||||||
break
|
|
||||||
try:
|
|
||||||
data = _json.loads(data_str)
|
|
||||||
choices = data.get("choices") or []
|
|
||||||
if choices:
|
|
||||||
delta = choices[0].get("delta") or {}
|
|
||||||
token = delta.get("content") or ""
|
|
||||||
if token and run_manager and hasattr(run_manager, "on_llm_new_token"):
|
|
||||||
run_manager.on_llm_new_token(token)
|
|
||||||
content_parts.append(token)
|
|
||||||
except _json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
content = "".join(content_parts)
|
|
||||||
processing_time = time.time() - start_time
|
|
||||||
output_bytes = len(content.encode("utf-8"))
|
|
||||||
price_chf = 0.0
|
|
||||||
if getattr(self._selected_model, "calculatepriceCHF", None):
|
|
||||||
try:
|
|
||||||
price_chf = self._selected_model.calculatepriceCHF(processing_time, input_bytes, output_bytes)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
billing_callback = getattr(self, "_billing_callback", None)
|
|
||||||
if billing_callback:
|
|
||||||
try:
|
|
||||||
billing_callback(AiCallResponse(
|
|
||||||
content=content,
|
|
||||||
modelName=self._selected_model.name,
|
|
||||||
provider=self._selected_model.connectorType or "unknown",
|
|
||||||
priceCHF=price_chf,
|
|
||||||
processingTime=processing_time,
|
|
||||||
bytesSent=input_bytes,
|
|
||||||
bytesReceived=output_bytes,
|
|
||||||
errorCount=0,
|
|
||||||
))
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Billing callback error: {e}")
|
|
||||||
|
|
||||||
return AiModelResponse(content=content, success=True, modelId=self._selected_model.name, metadata={})
|
|
||||||
|
|
||||||
async def _agenerate(
|
|
||||||
self,
|
|
||||||
messages: List[BaseMessage],
|
|
||||||
stop: Optional[List[str]] = None,
|
|
||||||
run_manager: Optional[Any] = None,
|
|
||||||
**kwargs: Any,
|
|
||||||
) -> ChatResult:
|
|
||||||
"""
|
|
||||||
Async generate method required by BaseChatModel.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
messages: List of LangChain messages
|
|
||||||
stop: Optional stop sequences
|
|
||||||
run_manager: Optional callback manager
|
|
||||||
**kwargs: Additional arguments (may include tools for tool calling)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ChatResult with generations
|
|
||||||
"""
|
|
||||||
# Select model if not already selected
|
|
||||||
if not self._selected_model:
|
|
||||||
self._select_model(messages)
|
|
||||||
|
|
||||||
# Check if tools are bound (for tool calling)
|
|
||||||
tools = getattr(self, "_bound_tools", None)
|
|
||||||
|
|
||||||
# Convert messages to AI center format
|
|
||||||
ai_messages = self._convert_messages_to_ai_format(messages)
|
|
||||||
|
|
||||||
# Compute input bytes for billing (sum of message content lengths)
|
|
||||||
input_bytes = sum(
|
|
||||||
len((m.get("content") or "").encode("utf-8"))
|
|
||||||
for m in ai_messages
|
|
||||||
if isinstance(m.get("content"), str)
|
|
||||||
)
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
# If tools are bound, add tool definitions to the system message
|
|
||||||
# This ensures the model knows about available tools
|
|
||||||
# Some models need explicit tool definitions to enable tool calling
|
|
||||||
if tools:
|
|
||||||
# Find or create system message
|
|
||||||
system_message_idx = None
|
|
||||||
for i, msg in enumerate(ai_messages):
|
|
||||||
if msg.get("role") == "system":
|
|
||||||
system_message_idx = i
|
|
||||||
break
|
|
||||||
|
|
||||||
# Build tool descriptions for the system message
|
|
||||||
tool_descriptions = []
|
|
||||||
for tool in tools:
|
|
||||||
if hasattr(tool, "name") and hasattr(tool, "description"):
|
|
||||||
# Get tool parameters for better description
|
|
||||||
args_schema = getattr(tool, "args_schema", None)
|
|
||||||
params_info = ""
|
|
||||||
if args_schema:
|
|
||||||
try:
|
|
||||||
if hasattr(args_schema, "model_json_schema"):
|
|
||||||
schema = args_schema.model_json_schema()
|
|
||||||
if "properties" in schema:
|
|
||||||
params = list(schema["properties"].keys())
|
|
||||||
params_info = f" (Parameter: {', '.join(params)})"
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
tool_descriptions.append(f"- {tool.name}: {tool.description}{params_info}")
|
|
||||||
|
|
||||||
if tool_descriptions:
|
|
||||||
tools_text = "\n".join(tool_descriptions)
|
|
||||||
tools_note = f"\n\n⚠️⚠️⚠️ KRITISCH - TOOL-NUTZUNG ⚠️⚠️⚠️\n\nVERFÜGBARE TOOLS:\n{tools_text}\n\nABSOLUT VERBINDLICH:\n- Du MUSST diese Tools verwenden, um Anfragen zu bearbeiten!\n- Für Status-Updates MUSST du IMMER das Tool 'send_streaming_message' verwenden!\n- VERBOTEN: Normale Text-Nachrichten für Status-Updates!\n- Du MUSST Tools aufrufen, nicht nur darüber sprechen!\n\nBeispiel FALSCH: \"Ich werde die Datenbank durchsuchen...\"\nBeispiel RICHTIG: Rufe das Tool 'send_streaming_message' mit \"Durchsuche Datenbank...\" auf!"
|
|
||||||
|
|
||||||
if system_message_idx is not None:
|
|
||||||
# Append to existing system message
|
|
||||||
ai_messages[system_message_idx]["content"] += tools_note
|
|
||||||
else:
|
|
||||||
# Add new system message at the beginning
|
|
||||||
ai_messages.insert(0, {
|
|
||||||
"role": "system",
|
|
||||||
"content": tools_note.strip()
|
|
||||||
})
|
|
||||||
|
|
||||||
# Convert LangChain tools to OpenAI/function-calling format (used by OpenAI and Ollama-compatible APIs)
|
|
||||||
openai_tools = None
|
|
||||||
if tools and self._selected_model.connectorType in ("openai", "privatellm"):
|
|
||||||
# Build tool schema in OpenAI format (Ollama uses same format for tool calling)
|
|
||||||
openai_tools = []
|
|
||||||
for tool in tools:
|
|
||||||
if hasattr(tool, "name") and hasattr(tool, "description"):
|
|
||||||
# Get tool parameters schema
|
|
||||||
args_schema = getattr(tool, "args_schema", None)
|
|
||||||
parameters = {}
|
|
||||||
if args_schema:
|
|
||||||
# Check if it's a Pydantic model class or instance
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
# Check if it's a class (not an instance)
|
|
||||||
if isinstance(args_schema, type) and issubclass(args_schema, BaseModel):
|
|
||||||
# It's a Pydantic model class - get JSON schema
|
|
||||||
if hasattr(args_schema, "model_json_schema"):
|
|
||||||
# Pydantic v2
|
|
||||||
parameters = args_schema.model_json_schema()
|
|
||||||
elif hasattr(args_schema, "schema"):
|
|
||||||
# Pydantic v1
|
|
||||||
parameters = args_schema.schema()
|
|
||||||
elif isinstance(args_schema, BaseModel):
|
|
||||||
# It's a Pydantic model instance
|
|
||||||
if hasattr(args_schema, "model_dump"):
|
|
||||||
parameters = args_schema.model_dump()
|
|
||||||
elif hasattr(args_schema, "schema"):
|
|
||||||
# Has schema method (might be a class)
|
|
||||||
try:
|
|
||||||
parameters = args_schema.schema()
|
|
||||||
except TypeError:
|
|
||||||
# If schema() requires instance, try model_json_schema
|
|
||||||
if hasattr(args_schema, "model_json_schema"):
|
|
||||||
parameters = args_schema.model_json_schema()
|
|
||||||
else:
|
|
||||||
parameters = {}
|
|
||||||
elif isinstance(args_schema, dict):
|
|
||||||
# Already a dict
|
|
||||||
parameters = args_schema
|
|
||||||
|
|
||||||
tool_schema = {
|
|
||||||
"type": "function",
|
|
||||||
"function": {
|
|
||||||
"name": tool.name,
|
|
||||||
"description": tool.description or "",
|
|
||||||
"parameters": parameters
|
|
||||||
}
|
|
||||||
}
|
|
||||||
openai_tools.append(tool_schema)
|
|
||||||
|
|
||||||
# Store tools for potential use by connector
|
|
||||||
# Note: The connector may need to access tools from the model_call
|
|
||||||
# This is a workaround since AiModelCall doesn't have a tools field
|
|
||||||
# Tools are added to system message above to ensure model knows about them
|
|
||||||
|
|
||||||
# Create model call
|
|
||||||
model_call = AiModelCall(
|
|
||||||
messages=ai_messages,
|
|
||||||
model=self._selected_model,
|
|
||||||
options=AiCallOptions(
|
|
||||||
operationType=self.operation_type,
|
|
||||||
processingMode=self.processing_mode,
|
|
||||||
temperature=self._selected_model.temperature
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# If tools are bound, use OpenAI-compatible API (OpenAI or Private-LLM Ollama endpoint)
|
|
||||||
if openai_tools and self._selected_model.connectorType in ("openai", "privatellm"):
|
|
||||||
import httpx
|
|
||||||
import json as _json
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
|
||||||
|
|
||||||
if self._selected_model.connectorType == "openai":
|
|
||||||
api_url = self._selected_model.apiUrl
|
|
||||||
api_key = APP_CONFIG.get("Connector_AiOpenai_API_SECRET")
|
|
||||||
if not api_key:
|
|
||||||
raise ValueError("OpenAI API key not configured")
|
|
||||||
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
|
||||||
ollama_model = self._selected_model.name
|
|
||||||
else:
|
|
||||||
# privatellm: use Ollama OpenAI-compatible /v1/chat/completions (same service, same provider)
|
|
||||||
base_url = self._selected_model.apiUrl.replace("/api/analyze", "")
|
|
||||||
api_url = f"{base_url}/v1/chat/completions"
|
|
||||||
api_key = APP_CONFIG.get("Connector_AiPrivateLlm_API_SECRET")
|
|
||||||
headers = {"Content-Type": "application/json"}
|
|
||||||
if api_key:
|
|
||||||
headers["X-API-Key"] = api_key
|
|
||||||
# Ollama needs the underlying model name (e.g. qwen2.5:7b), not poweron-text-general
|
|
||||||
ollama_model = getattr(self._selected_model, "version", None) or self._selected_model.name
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
"model": ollama_model,
|
|
||||||
"messages": ai_messages,
|
|
||||||
"tools": openai_tools,
|
|
||||||
"tool_choice": "auto",
|
|
||||||
"temperature": self._selected_model.temperature,
|
|
||||||
"max_tokens": self._selected_model.maxTokens,
|
|
||||||
}
|
|
||||||
|
|
||||||
use_connector_fallback = False
|
|
||||||
async with httpx.AsyncClient(timeout=600.0) as client:
|
|
||||||
response_obj = await client.post(api_url, headers=headers, json=payload)
|
|
||||||
|
|
||||||
if response_obj.status_code == 404 and self._selected_model.connectorType == "privatellm":
|
|
||||||
logger.warning(
|
|
||||||
"Private-LLM /v1/chat/completions not found (404). Falling back to /api/analyze. "
|
|
||||||
"Tool calling will not work until the service exposes an OpenAI-compatible endpoint."
|
|
||||||
)
|
|
||||||
use_connector_fallback = True
|
|
||||||
elif response_obj.status_code != 200:
|
|
||||||
error_msg = f"AI API error ({self._selected_model.connectorType}): {response_obj.status_code} - {response_obj.text}"
|
|
||||||
logger.error(error_msg)
|
|
||||||
raise ValueError(error_msg)
|
|
||||||
|
|
||||||
if use_connector_fallback:
|
|
||||||
if not self._selected_model.functionCall:
|
|
||||||
raise ValueError(f"Model {self._selected_model.displayName} has no functionCall defined")
|
|
||||||
response = await self._selected_model.functionCall(model_call)
|
|
||||||
else:
|
|
||||||
response_json = response_obj.json()
|
|
||||||
choice = response_json["choices"][0]
|
|
||||||
message = choice["message"]
|
|
||||||
|
|
||||||
content = message.get("content", "")
|
|
||||||
tool_calls_raw = message.get("tool_calls")
|
|
||||||
|
|
||||||
tool_calls = None
|
|
||||||
if tool_calls_raw:
|
|
||||||
tool_calls = []
|
|
||||||
for tc in tool_calls_raw:
|
|
||||||
func_data = tc.get("function", {})
|
|
||||||
func_name = func_data.get("name")
|
|
||||||
func_args_str = func_data.get("arguments", "{}")
|
|
||||||
try:
|
|
||||||
func_args = _json.loads(func_args_str) if isinstance(func_args_str, str) else func_args_str
|
|
||||||
except Exception:
|
|
||||||
func_args = {}
|
|
||||||
tool_calls.append({
|
|
||||||
"id": tc.get("id"),
|
|
||||||
"name": func_name,
|
|
||||||
"args": func_args,
|
|
||||||
})
|
|
||||||
|
|
||||||
response = AiModelResponse(
|
|
||||||
content=content or "",
|
|
||||||
success=True,
|
|
||||||
modelId=self._selected_model.name,
|
|
||||||
metadata={
|
|
||||||
"response_id": response_json.get("id", ""),
|
|
||||||
"tool_calls": tool_calls,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
elif not tools and self._selected_model.connectorType in ("openai", "privatellm"):
|
|
||||||
# Streaming path for OpenAI/Ollama without tools (ChatGPT-like token streaming)
|
|
||||||
response = await self._call_openai_streaming(
|
|
||||||
ai_messages, run_manager, model_call, input_bytes, start_time
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# No tools or not OpenAI - use connector normally
|
|
||||||
if not self._selected_model.functionCall:
|
|
||||||
raise ValueError(f"Model {self._selected_model.displayName} has no functionCall defined")
|
|
||||||
|
|
||||||
response: AiModelResponse = await self._selected_model.functionCall(model_call)
|
|
||||||
|
|
||||||
if not response.success:
|
|
||||||
raise ValueError(f"AI model call failed: {response.error or 'Unknown error'}")
|
|
||||||
|
|
||||||
# Billing: compute price and invoke callback
|
|
||||||
output_bytes = len((response.content or "").encode("utf-8"))
|
|
||||||
processing_time = time.time() - start_time
|
|
||||||
price_chf = 0.0
|
|
||||||
if getattr(self._selected_model, "calculatepriceCHF", None):
|
|
||||||
try:
|
|
||||||
price_chf = self._selected_model.calculatepriceCHF(
|
|
||||||
processing_time, input_bytes, output_bytes
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Billing: price calculation failed: {e}")
|
|
||||||
billing_callback = getattr(self, "_billing_callback", None)
|
|
||||||
if billing_callback:
|
|
||||||
try:
|
|
||||||
ai_response = AiCallResponse(
|
|
||||||
content=response.content or "",
|
|
||||||
modelName=self._selected_model.name,
|
|
||||||
provider=getattr(self._selected_model, "connectorType", "unknown") or "unknown",
|
|
||||||
priceCHF=price_chf,
|
|
||||||
processingTime=processing_time,
|
|
||||||
bytesSent=input_bytes,
|
|
||||||
bytesReceived=output_bytes,
|
|
||||||
errorCount=0,
|
|
||||||
)
|
|
||||||
billing_callback(ai_response)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Billing callback error: {e}")
|
|
||||||
|
|
||||||
# Extract tool calls from response metadata if present
|
|
||||||
tool_calls = None
|
|
||||||
if response.metadata:
|
|
||||||
# Check for tool calls in metadata (format may vary by connector)
|
|
||||||
tool_calls = response.metadata.get("tool_calls") or response.metadata.get("function_calls")
|
|
||||||
|
|
||||||
# Convert response to LangChain format with tool calls
|
|
||||||
ai_message = self._convert_ai_response_to_langchain(response, tool_calls=tool_calls)
|
|
||||||
|
|
||||||
# Create generation and result
|
|
||||||
generation = ChatGeneration(message=ai_message)
|
|
||||||
return ChatResult(generations=[generation])
|
|
||||||
|
|
||||||
def bind_tools(self, tools: List[Any], **kwargs: Any) -> "AICenterChatModel":
|
|
||||||
"""
|
|
||||||
Bind tools to the model (required for LangGraph tool calling).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
tools: List of LangChain tools
|
|
||||||
**kwargs: Additional arguments
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
New instance with tools bound
|
|
||||||
"""
|
|
||||||
# Create a new instance with tools bound
|
|
||||||
# Note: The actual tool binding happens in LangGraph's ToolNode
|
|
||||||
# This method is called by LangGraph to prepare the model
|
|
||||||
bound_model = AICenterChatModel(
|
|
||||||
user=self.user,
|
|
||||||
operation_type=self.operation_type,
|
|
||||||
processing_mode=self.processing_mode,
|
|
||||||
billing_callback=getattr(self, "_billing_callback", None),
|
|
||||||
workflow_id=getattr(self, "_workflow_id", None),
|
|
||||||
)
|
|
||||||
object.__setattr__(bound_model, "_selected_model", self._selected_model)
|
|
||||||
# Store tools for potential use in message conversion
|
|
||||||
object.__setattr__(bound_model, "_bound_tools", tools)
|
|
||||||
return bound_model
|
|
||||||
|
|
||||||
def invoke(
|
|
||||||
self,
|
|
||||||
input: List[BaseMessage],
|
|
||||||
config: Optional[RunnableConfig] = None,
|
|
||||||
**kwargs: Any,
|
|
||||||
) -> BaseMessage:
|
|
||||||
"""
|
|
||||||
Synchronous invoke method (required by BaseChatModel).
|
|
||||||
Note: This is a wrapper around async _agenerate.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
input: List of LangChain messages
|
|
||||||
config: Optional runnable config
|
|
||||||
**kwargs: Additional arguments
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
AIMessage response
|
|
||||||
"""
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
# Try to get existing event loop
|
|
||||||
try:
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
if loop.is_running():
|
|
||||||
# If loop is running, we need to use a different approach
|
|
||||||
# This shouldn't happen in LangGraph context, but handle it gracefully
|
|
||||||
raise RuntimeError("Cannot use synchronous invoke in async context. Use ainvoke instead.")
|
|
||||||
except RuntimeError:
|
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
# Run async generation
|
|
||||||
result = loop.run_until_complete(self._agenerate(input, **kwargs))
|
|
||||||
return result.generations[0].message
|
|
||||||
|
|
||||||
async def ainvoke(
|
|
||||||
self,
|
|
||||||
input: List[BaseMessage],
|
|
||||||
config: Optional[RunnableConfig] = None,
|
|
||||||
**kwargs: Any,
|
|
||||||
) -> BaseMessage:
|
|
||||||
"""
|
|
||||||
Async invoke method (required by BaseChatModel).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
input: List of LangChain messages
|
|
||||||
config: Optional runnable config
|
|
||||||
**kwargs: Additional arguments
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
AIMessage response
|
|
||||||
"""
|
|
||||||
result = await self._agenerate(input, **kwargs)
|
|
||||||
return result.generations[0].message
|
|
||||||
|
|
@ -1,576 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""
|
|
||||||
Custom LangGraph checkpointer using existing database interface.
|
|
||||||
Maps LangGraph state to existing message storage format.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import contextvars
|
|
||||||
import logging
|
|
||||||
import uuid
|
|
||||||
from typing import Any, Dict, List, Optional, Tuple, NamedTuple
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from langgraph.checkpoint.base import BaseCheckpointSaver, Checkpoint, CheckpointMetadata
|
|
||||||
|
|
||||||
# CheckpointTuple might not be directly importable, so we define it as a NamedTuple
|
|
||||||
# Based on LangGraph's usage, it needs config, checkpoint, metadata, parent_config, and pending_writes
|
|
||||||
class CheckpointTuple(NamedTuple):
|
|
||||||
"""Tuple containing config, checkpoint, metadata, parent_config, and pending_writes."""
|
|
||||||
config: Dict[str, Any]
|
|
||||||
checkpoint: Checkpoint
|
|
||||||
metadata: CheckpointMetadata
|
|
||||||
parent_config: Optional[Dict[str, Any]] = None
|
|
||||||
pending_writes: Optional[List[Tuple[str, Any]]] = None
|
|
||||||
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage
|
|
||||||
|
|
||||||
from modules.features.chatbot.interfaceFeatureChatbot import getInterface as getChatbotInterface
|
|
||||||
from modules.features.chatbot.interfaceFeatureChatbot import ChatbotMessage
|
|
||||||
from modules.datamodels.datamodelUam import User
|
|
||||||
from modules.shared.timeUtils import getUtcTimestamp
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_llm_response(text: str) -> str:
|
|
||||||
"""Strip chat template tokens and trailing junk that some models leak."""
|
|
||||||
if not text or not isinstance(text, str):
|
|
||||||
return text or ""
|
|
||||||
for sentinel in ("<|im_start|>", "<|im_end|>", "<|endoftext|>", "<|user|>", "<|assistant|>"):
|
|
||||||
if sentinel in text:
|
|
||||||
text = text.split(sentinel)[0]
|
|
||||||
return text.strip()
|
|
||||||
|
|
||||||
|
|
||||||
class DatabaseCheckpointer(BaseCheckpointSaver):
|
|
||||||
"""
|
|
||||||
Custom LangGraph checkpointer that uses the chatbot's own database interface.
|
|
||||||
Maps LangGraph thread_id to conversation.id; stores messages via interface (workflowId maps to conversationId).
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
user: User,
|
|
||||||
workflow_id: str,
|
|
||||||
mandateId: Optional[str] = None,
|
|
||||||
featureInstanceId: Optional[str] = None,
|
|
||||||
*,
|
|
||||||
interface=None,
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Initialize the database checkpointer.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
user: Current user for database access
|
|
||||||
workflow_id: Workflow ID (maps to LangGraph thread_id)
|
|
||||||
mandateId: Mandate ID for proper data isolation
|
|
||||||
featureInstanceId: Feature instance ID for proper data isolation
|
|
||||||
interface: Optional pre-created chatbot interface (avoids extra getInterface + DB init)
|
|
||||||
"""
|
|
||||||
self.user = user
|
|
||||||
self.workflow_id = workflow_id
|
|
||||||
self.interface = interface if interface is not None else getChatbotInterface(
|
|
||||||
user, mandateId=mandateId, featureInstanceId=featureInstanceId
|
|
||||||
)
|
|
||||||
|
|
||||||
def _convert_langchain_to_db_message(
|
|
||||||
self,
|
|
||||||
msg: BaseMessage,
|
|
||||||
sequence_nr: int,
|
|
||||||
round_number: int
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Convert LangChain message to database message format.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
msg: LangChain message
|
|
||||||
sequence_nr: Sequence number for ordering
|
|
||||||
round_number: Round number in workflow
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary in database message format
|
|
||||||
"""
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
role = "user"
|
|
||||||
content = ""
|
|
||||||
|
|
||||||
if isinstance(msg, HumanMessage):
|
|
||||||
role = "user"
|
|
||||||
content = msg.content if isinstance(msg.content, str) else str(msg.content)
|
|
||||||
elif isinstance(msg, AIMessage):
|
|
||||||
role = "assistant"
|
|
||||||
content = msg.content if isinstance(msg.content, str) else str(msg.content)
|
|
||||||
elif isinstance(msg, SystemMessage):
|
|
||||||
# System messages are stored but marked as system
|
|
||||||
role = "system"
|
|
||||||
content = msg.content if isinstance(msg.content, str) else str(msg.content)
|
|
||||||
elif isinstance(msg, ToolMessage):
|
|
||||||
# Tool messages are stored as assistant messages with tool info
|
|
||||||
role = "assistant"
|
|
||||||
content = f"Tool {msg.name}: {msg.content}"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"id": f"msg_{uuid.uuid4()}",
|
|
||||||
"workflowId": self.workflow_id,
|
|
||||||
"message": content,
|
|
||||||
"role": role,
|
|
||||||
"status": "step" if sequence_nr > 1 else "first",
|
|
||||||
"sequenceNr": sequence_nr,
|
|
||||||
"publishedAt": getUtcTimestamp(),
|
|
||||||
"roundNumber": round_number,
|
|
||||||
"taskNumber": 0,
|
|
||||||
"actionNumber": 0
|
|
||||||
}
|
|
||||||
|
|
||||||
def _convert_db_to_langchain_messages(
|
|
||||||
self,
|
|
||||||
messages: List[ChatbotMessage]
|
|
||||||
) -> List[BaseMessage]:
|
|
||||||
"""
|
|
||||||
Convert database messages to LangChain messages.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
messages: List of database ChatMessage objects
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of LangChain BaseMessage objects
|
|
||||||
"""
|
|
||||||
langchain_messages = []
|
|
||||||
|
|
||||||
for msg in messages:
|
|
||||||
if msg.role == "user":
|
|
||||||
langchain_messages.append(HumanMessage(content=msg.message))
|
|
||||||
elif msg.role == "assistant":
|
|
||||||
langchain_messages.append(AIMessage(content=msg.message))
|
|
||||||
elif msg.role == "system":
|
|
||||||
langchain_messages.append(SystemMessage(content=msg.message))
|
|
||||||
# Skip other roles for now
|
|
||||||
|
|
||||||
return langchain_messages
|
|
||||||
|
|
||||||
def put(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
checkpoint: Checkpoint,
|
|
||||||
metadata: CheckpointMetadata,
|
|
||||||
new_versions: Dict[str, int],
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Store a checkpoint in the database.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
config: LangGraph config (contains thread_id)
|
|
||||||
checkpoint: Checkpoint to store
|
|
||||||
metadata: Checkpoint metadata
|
|
||||||
new_versions: New version numbers
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Extract thread_id from config (maps to workflow_id)
|
|
||||||
thread_id = config.get("configurable", {}).get("thread_id", self.workflow_id)
|
|
||||||
|
|
||||||
# Get current workflow to determine round number
|
|
||||||
workflow = self.interface.getWorkflow(thread_id)
|
|
||||||
if not workflow:
|
|
||||||
logger.warning(f"Workflow {thread_id} not found, cannot store checkpoint")
|
|
||||||
return
|
|
||||||
|
|
||||||
round_number = workflow.currentRound if workflow else 1
|
|
||||||
|
|
||||||
# Extract messages from checkpoint
|
|
||||||
state = checkpoint.get("channel_values", {})
|
|
||||||
messages = state.get("messages", [])
|
|
||||||
|
|
||||||
if not messages:
|
|
||||||
logger.debug(f"No messages in checkpoint for workflow {thread_id}")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Get existing messages to determine what's already stored
|
|
||||||
existing_messages = self.interface.getMessages(thread_id)
|
|
||||||
existing_count = len(existing_messages) if existing_messages else 0
|
|
||||||
|
|
||||||
# Create a set of existing message content+role for quick lookup
|
|
||||||
existing_content_set = set()
|
|
||||||
if existing_messages:
|
|
||||||
for existing_msg in existing_messages:
|
|
||||||
# Create a unique key from role and message content
|
|
||||||
content_key = (existing_msg.role, existing_msg.message)
|
|
||||||
existing_content_set.add(content_key)
|
|
||||||
|
|
||||||
# Filter checkpoint messages to only user/assistant (skip system)
|
|
||||||
# Skip intermediate AIMessages with tool_calls (these are tool call requests, not final answers)
|
|
||||||
checkpoint_user_assistant_messages = []
|
|
||||||
for msg in messages:
|
|
||||||
if isinstance(msg, HumanMessage):
|
|
||||||
# Always store user messages
|
|
||||||
checkpoint_user_assistant_messages.append(msg)
|
|
||||||
elif isinstance(msg, AIMessage):
|
|
||||||
# Check if this message has tool_calls
|
|
||||||
tool_calls = getattr(msg, "tool_calls", None)
|
|
||||||
if tool_calls and len(tool_calls) > 0:
|
|
||||||
logger.debug(f"Skipping intermediate AIMessage with tool_calls for workflow {thread_id}")
|
|
||||||
continue
|
|
||||||
# Skip agent_sql_plan output (raw SQL block) - only store agent_formulate final answer
|
|
||||||
content = msg.content if isinstance(msg.content, str) else str(msg.content)
|
|
||||||
cu = (content or "").strip().upper()
|
|
||||||
if content and (
|
|
||||||
content.strip().startswith("```")
|
|
||||||
or (cu.startswith("SELECT") and ("FROM" in cu or "JOIN" in cu))
|
|
||||||
):
|
|
||||||
logger.debug(f"Skipping intermediate SQL AIMessage for workflow {thread_id}")
|
|
||||||
continue
|
|
||||||
checkpoint_user_assistant_messages.append(msg)
|
|
||||||
|
|
||||||
# Only store new messages that aren't already in the database
|
|
||||||
new_messages_to_store = []
|
|
||||||
for msg in checkpoint_user_assistant_messages:
|
|
||||||
role = "user" if isinstance(msg, HumanMessage) else "assistant"
|
|
||||||
content = msg.content if isinstance(msg.content, str) else str(msg.content)
|
|
||||||
if isinstance(msg, AIMessage):
|
|
||||||
content = _sanitize_llm_response(content)
|
|
||||||
if not content or not content.strip():
|
|
||||||
continue
|
|
||||||
content_key = (role, content)
|
|
||||||
if content_key not in existing_content_set:
|
|
||||||
if isinstance(msg, AIMessage) and msg.content != content:
|
|
||||||
msg = AIMessage(content=content)
|
|
||||||
new_messages_to_store.append(msg)
|
|
||||||
existing_content_set.add(content_key)
|
|
||||||
|
|
||||||
# Store only the new messages
|
|
||||||
if new_messages_to_store:
|
|
||||||
for i, msg in enumerate(new_messages_to_store, 1):
|
|
||||||
sequence_nr = existing_count + i
|
|
||||||
# Convert to database format
|
|
||||||
db_message_data = self._convert_langchain_to_db_message(
|
|
||||||
msg,
|
|
||||||
sequence_nr,
|
|
||||||
round_number
|
|
||||||
)
|
|
||||||
|
|
||||||
# Store the message
|
|
||||||
try:
|
|
||||||
self.interface.createMessage(db_message_data)
|
|
||||||
logger.debug(f"Stored message {db_message_data['id']} for workflow {thread_id}")
|
|
||||||
existing_count += 1 # Update count for next message
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error storing message: {e}", exc_info=True)
|
|
||||||
else:
|
|
||||||
logger.debug(f"No new messages to store for workflow {thread_id} (existing: {existing_count}, checkpoint: {len(checkpoint_user_assistant_messages)})")
|
|
||||||
|
|
||||||
# Update workflow last activity
|
|
||||||
self.interface.updateWorkflow(thread_id, {
|
|
||||||
"lastActivity": getUtcTimestamp()
|
|
||||||
})
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error storing checkpoint: {e}", exc_info=True)
|
|
||||||
raise
|
|
||||||
|
|
||||||
def get(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
) -> Optional[Checkpoint]:
|
|
||||||
"""
|
|
||||||
Retrieve a checkpoint from the database.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
config: LangGraph config (contains thread_id)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Checkpoint if found, None otherwise
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Extract thread_id from config (maps to workflow_id)
|
|
||||||
thread_id = config.get("configurable", {}).get("thread_id", self.workflow_id)
|
|
||||||
|
|
||||||
# Get workflow
|
|
||||||
workflow = self.interface.getWorkflow(thread_id)
|
|
||||||
if not workflow:
|
|
||||||
logger.debug(f"Workflow {thread_id} not found")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Get messages
|
|
||||||
messages = self.interface.getMessages(thread_id)
|
|
||||||
|
|
||||||
checkpoint_id = str(uuid.uuid4())
|
|
||||||
|
|
||||||
if not messages:
|
|
||||||
# Return empty checkpoint for new workflow
|
|
||||||
return {
|
|
||||||
"id": checkpoint_id,
|
|
||||||
"v": 1,
|
|
||||||
"ts": getUtcTimestamp(),
|
|
||||||
"channel_values": {
|
|
||||||
"messages": []
|
|
||||||
},
|
|
||||||
"channel_versions": {},
|
|
||||||
"versions_seen": {}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Convert to LangChain messages
|
|
||||||
langchain_messages = self._convert_db_to_langchain_messages(messages)
|
|
||||||
|
|
||||||
# Build checkpoint
|
|
||||||
checkpoint = {
|
|
||||||
"id": checkpoint_id,
|
|
||||||
"v": 1,
|
|
||||||
"ts": getUtcTimestamp(),
|
|
||||||
"channel_values": {
|
|
||||||
"messages": langchain_messages
|
|
||||||
},
|
|
||||||
"channel_versions": {},
|
|
||||||
"versions_seen": {}
|
|
||||||
}
|
|
||||||
|
|
||||||
return checkpoint
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error retrieving checkpoint: {e}", exc_info=True)
|
|
||||||
return None
|
|
||||||
|
|
||||||
def list(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
filter: Optional[Dict[str, Any]] = None,
|
|
||||||
before: Optional[str] = None,
|
|
||||||
limit: Optional[int] = None,
|
|
||||||
) -> List[Checkpoint]:
|
|
||||||
"""
|
|
||||||
List checkpoints (not fully implemented - returns current checkpoint).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
config: LangGraph config
|
|
||||||
filter: Optional filter
|
|
||||||
before: Optional timestamp before which to list
|
|
||||||
limit: Optional limit on number of results
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of checkpoints
|
|
||||||
"""
|
|
||||||
checkpoint = self.get(config)
|
|
||||||
if checkpoint:
|
|
||||||
return [checkpoint]
|
|
||||||
return []
|
|
||||||
|
|
||||||
def put_writes(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
writes: List[Tuple[str, Any]],
|
|
||||||
task_id: str,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Store checkpoint writes (not used in current implementation).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
config: LangGraph config
|
|
||||||
writes: List of write operations
|
|
||||||
task_id: Task ID
|
|
||||||
"""
|
|
||||||
# Not implemented - using put() instead
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def aget_tuple(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
) -> Optional[CheckpointTuple]:
|
|
||||||
"""
|
|
||||||
Async version of get that returns tuple of (config, checkpoint, metadata).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
config: LangGraph config (contains thread_id)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
CheckpointTuple with config, checkpoint and metadata if found, None otherwise
|
|
||||||
"""
|
|
||||||
checkpoint = self.get(config)
|
|
||||||
if checkpoint:
|
|
||||||
# Return checkpoint with metadata including step
|
|
||||||
# CheckpointMetadata is typically a TypedDict
|
|
||||||
# LangGraph expects 'step' in metadata
|
|
||||||
metadata: CheckpointMetadata = {
|
|
||||||
"step": 0 # Start at step 0, LangGraph will increment
|
|
||||||
}
|
|
||||||
return CheckpointTuple(
|
|
||||||
config=config,
|
|
||||||
checkpoint=checkpoint,
|
|
||||||
metadata=metadata,
|
|
||||||
parent_config=None, # No parent checkpoint for our implementation
|
|
||||||
pending_writes=None # No pending writes in our implementation
|
|
||||||
)
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def aput(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
checkpoint: Checkpoint,
|
|
||||||
metadata: CheckpointMetadata,
|
|
||||||
new_versions: Dict[str, int],
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Async version of put.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
config: LangGraph config (contains thread_id)
|
|
||||||
checkpoint: Checkpoint to store
|
|
||||||
metadata: Checkpoint metadata
|
|
||||||
new_versions: New version numbers
|
|
||||||
"""
|
|
||||||
self.put(config, checkpoint, metadata, new_versions)
|
|
||||||
|
|
||||||
async def alist(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
filter: Optional[Dict[str, Any]] = None,
|
|
||||||
before: Optional[str] = None,
|
|
||||||
limit: Optional[int] = None,
|
|
||||||
) -> List[Checkpoint]:
|
|
||||||
"""
|
|
||||||
Async version of list.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
config: LangGraph config
|
|
||||||
filter: Optional filter
|
|
||||||
before: Optional timestamp before which to list
|
|
||||||
limit: Optional limit on number of results
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of checkpoints
|
|
||||||
"""
|
|
||||||
return self.list(config, filter, before, limit)
|
|
||||||
|
|
||||||
async def aput_writes(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
writes: List[Tuple[str, Any]],
|
|
||||||
task_id: str,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Async version of put_writes.
|
|
||||||
Store checkpoint writes (not used in current implementation).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
config: LangGraph config
|
|
||||||
writes: List of write operations
|
|
||||||
task_id: Task ID
|
|
||||||
"""
|
|
||||||
# Not implemented - using aput() instead
|
|
||||||
# This method is called by LangGraph but we handle writes through aput()
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# ContextVar for per-request checkpointer (used by CheckpointerResolver for graph caching)
|
|
||||||
_current_checkpointer: contextvars.ContextVar[Optional[BaseCheckpointSaver]] = contextvars.ContextVar(
|
|
||||||
"chatbot_current_checkpointer", default=None
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def set_checkpointer(checkpointer: BaseCheckpointSaver) -> contextvars.Token:
|
|
||||||
"""Set the current request's checkpointer. Returns token to reset later."""
|
|
||||||
return _current_checkpointer.set(checkpointer)
|
|
||||||
|
|
||||||
|
|
||||||
def reset_checkpointer(token: contextvars.Token) -> None:
|
|
||||||
"""Reset checkpointer to prior value. Safe when called from a different async context."""
|
|
||||||
try:
|
|
||||||
_current_checkpointer.reset(token)
|
|
||||||
except ValueError:
|
|
||||||
# Token was created in a different context (e.g. after yield, generator cleanup)
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class CheckpointerResolver(BaseCheckpointSaver):
|
|
||||||
"""
|
|
||||||
Delegating checkpointer that reads the real checkpointer from context.
|
|
||||||
Used for graph caching: the compiled graph uses this resolver; at invoke time
|
|
||||||
the per-request checkpointer is set via set_checkpointer().
|
|
||||||
"""
|
|
||||||
|
|
||||||
def _get_checkpointer(self) -> BaseCheckpointSaver:
|
|
||||||
cp = _current_checkpointer.get()
|
|
||||||
if cp is None:
|
|
||||||
raise RuntimeError(
|
|
||||||
"CheckpointerResolver: no checkpointer in context. "
|
|
||||||
"Call set_checkpointer() before invoking the cached graph."
|
|
||||||
)
|
|
||||||
return cp
|
|
||||||
|
|
||||||
def put(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
checkpoint: Checkpoint,
|
|
||||||
metadata: CheckpointMetadata,
|
|
||||||
new_versions: Dict[str, int],
|
|
||||||
) -> None:
|
|
||||||
self._get_checkpointer().put(config, checkpoint, metadata, new_versions)
|
|
||||||
|
|
||||||
def get(self, config: Dict[str, Any]) -> Optional[Checkpoint]:
|
|
||||||
return self._get_checkpointer().get(config)
|
|
||||||
|
|
||||||
def list(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
filter: Optional[Dict[str, Any]] = None,
|
|
||||||
before: Optional[str] = None,
|
|
||||||
limit: Optional[int] = None,
|
|
||||||
) -> List[Checkpoint]:
|
|
||||||
return self._get_checkpointer().list(config, filter, before, limit)
|
|
||||||
|
|
||||||
def put_writes(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
writes: List[Tuple[str, Any]],
|
|
||||||
task_id: str,
|
|
||||||
) -> None:
|
|
||||||
self._get_checkpointer().put_writes(config, writes, task_id)
|
|
||||||
|
|
||||||
async def aget_tuple(self, config: Dict[str, Any]) -> Optional[CheckpointTuple]:
|
|
||||||
inner = self._get_checkpointer()
|
|
||||||
if hasattr(inner, "aget_tuple"):
|
|
||||||
return await inner.aget_tuple(config)
|
|
||||||
checkpoint = inner.get(config)
|
|
||||||
if checkpoint:
|
|
||||||
metadata: CheckpointMetadata = {"step": 0}
|
|
||||||
return CheckpointTuple(
|
|
||||||
config=config,
|
|
||||||
checkpoint=checkpoint,
|
|
||||||
metadata=metadata,
|
|
||||||
parent_config=None,
|
|
||||||
pending_writes=None,
|
|
||||||
)
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def aput(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
checkpoint: Checkpoint,
|
|
||||||
metadata: CheckpointMetadata,
|
|
||||||
new_versions: Dict[str, int],
|
|
||||||
) -> None:
|
|
||||||
inner = self._get_checkpointer()
|
|
||||||
if hasattr(inner, "aput"):
|
|
||||||
await inner.aput(config, checkpoint, metadata, new_versions)
|
|
||||||
else:
|
|
||||||
inner.put(config, checkpoint, metadata, new_versions)
|
|
||||||
|
|
||||||
async def alist(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
filter: Optional[Dict[str, Any]] = None,
|
|
||||||
before: Optional[str] = None,
|
|
||||||
limit: Optional[int] = None,
|
|
||||||
) -> List[Checkpoint]:
|
|
||||||
inner = self._get_checkpointer()
|
|
||||||
if hasattr(inner, "alist"):
|
|
||||||
return await inner.alist(config, filter, before, limit)
|
|
||||||
return inner.list(config, filter, before, limit)
|
|
||||||
|
|
||||||
async def aput_writes(
|
|
||||||
self,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
writes: List[Tuple[str, Any]],
|
|
||||||
task_id: str,
|
|
||||||
) -> None:
|
|
||||||
inner = self._get_checkpointer()
|
|
||||||
if hasattr(inner, "aput_writes"):
|
|
||||||
await inner.aput_writes(config, writes, task_id)
|
|
||||||
|
|
@ -1,390 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""
|
|
||||||
Chatbot tools for LangGraph integration.
|
|
||||||
Includes SQL query tool, Tavily search tool, and streaming status tool.
|
|
||||||
|
|
||||||
Tools can be created with factory functions for dynamic configuration:
|
|
||||||
- create_sql_query_tool(connector_type) - SQL query tool with configurable connector
|
|
||||||
- create_tavily_search_tool() - Tavily web search tool
|
|
||||||
- create_send_streaming_message_tool(event_manager) - Streaming status updates
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import asyncio
|
|
||||||
from typing import Optional, Callable, Dict, Any
|
|
||||||
from langchain_core.tools import tool
|
|
||||||
|
|
||||||
from modules.connectors.connectorPreprocessor import PreprocessorConnector
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# Connection pool for preprocessor connector (performance optimization)
|
|
||||||
# =============================================================================
|
|
||||||
|
|
||||||
class ConnectorPool:
|
|
||||||
"""Simple connection pool for preprocessor connectors."""
|
|
||||||
_instance: Optional['ConnectorPool'] = None
|
|
||||||
_connector: Optional[PreprocessorConnector] = None
|
|
||||||
_lock: asyncio.Lock = None
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_instance(cls) -> 'ConnectorPool':
|
|
||||||
if cls._instance is None:
|
|
||||||
cls._instance = ConnectorPool()
|
|
||||||
cls._lock = asyncio.Lock()
|
|
||||||
return cls._instance
|
|
||||||
|
|
||||||
async def get_connector(self) -> PreprocessorConnector:
|
|
||||||
"""Get or create a connector instance."""
|
|
||||||
if self._connector is None:
|
|
||||||
self._connector = PreprocessorConnector()
|
|
||||||
return self._connector
|
|
||||||
|
|
||||||
async def close(self):
|
|
||||||
"""Close the connector."""
|
|
||||||
if self._connector:
|
|
||||||
try:
|
|
||||||
await self._connector.close()
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Error closing connector: {e}")
|
|
||||||
self._connector = None
|
|
||||||
|
|
||||||
# Global pool instance
|
|
||||||
_connector_pool = ConnectorPool.get_instance()
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# Factory functions for configurable tools
|
|
||||||
# =============================================================================
|
|
||||||
|
|
||||||
def create_sql_query_tool(connector_type: str = "preprocessor"):
|
|
||||||
"""
|
|
||||||
Create a SQL query tool with a specific connector type.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
connector_type: Type of database connector to use (e.g., "preprocessor")
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
LangChain tool for executing SQL queries
|
|
||||||
"""
|
|
||||||
@tool
|
|
||||||
async def sqlite_query(query: str) -> str:
|
|
||||||
"""
|
|
||||||
Execute a SQL SELECT query on the database.
|
|
||||||
|
|
||||||
This tool allows you to query the database to find articles, prices,
|
|
||||||
inventory levels, and other product information.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: A valid SQL SELECT query. Must use double quotes for column names
|
|
||||||
with spaces or special characters (e.g., "Artikelnummer", "S_IST_BESTAND").
|
|
||||||
Only SELECT queries are allowed.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Query results as a formatted string, or an error message if the query fails.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
- Find articles by name:
|
|
||||||
SELECT a."Artikelnummer", a."Artikelbezeichnung", a."Lieferant"
|
|
||||||
FROM Artikel a
|
|
||||||
WHERE a."Artikelbezeichnung" LIKE '%Motor%'
|
|
||||||
LIMIT 20
|
|
||||||
|
|
||||||
- Find articles with price and inventory:
|
|
||||||
SELECT a."Artikelnummer", a."Artikelbezeichnung", e."EP_CHF",
|
|
||||||
lp."Lagerplatz" as "Lagerplatzname", l."S_IST_BESTAND",
|
|
||||||
l."S_RESERVIERTER__BESTAND",
|
|
||||||
CASE WHEN l."S_IST_BESTAND" != 'Unbekannt'
|
|
||||||
THEN CAST(l."S_IST_BESTAND" AS INTEGER) - COALESCE(l."S_RESERVIERTER__BESTAND", 0)
|
|
||||||
ELSE NULL END as "Verfügbarer Bestand"
|
|
||||||
FROM Artikel a
|
|
||||||
LEFT JOIN Einkaufspreis e ON a."I_ID" = e."m_Artikel"
|
|
||||||
LEFT JOIN Lagerplatz_Artikel l ON a."I_ID" = l."R_ARTIKEL"
|
|
||||||
LEFT JOIN Lagerplatz lp ON l."R_LAGERPLATZ" = lp."I_ID"
|
|
||||||
WHERE a."Artikelbezeichnung" LIKE '%Netzgerät%'
|
|
||||||
LIMIT 20
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Use connection pool for better performance
|
|
||||||
connector = await _connector_pool.get_connector()
|
|
||||||
|
|
||||||
result = await connector.executeQuery(query, return_json=True)
|
|
||||||
|
|
||||||
if result.get("text", "").startswith(("Error:", "Query failed:")):
|
|
||||||
error_msg = result.get("text", "Query failed")
|
|
||||||
logger.error(f"SQL query failed: {error_msg}")
|
|
||||||
return error_msg
|
|
||||||
|
|
||||||
# Format results
|
|
||||||
data = result.get("data", [])
|
|
||||||
row_count = result.get("row_count", len(data))
|
|
||||||
|
|
||||||
if not data:
|
|
||||||
return f"Query executed successfully. Returned {row_count} rows (no data)."
|
|
||||||
|
|
||||||
# Format as readable string - optimized for faster output
|
|
||||||
lines = [f"Query returned {row_count} rows:"]
|
|
||||||
|
|
||||||
# Show column headers from first row
|
|
||||||
if data and isinstance(data[0], dict):
|
|
||||||
headers = list(data[0].keys())
|
|
||||||
lines.append("Columns: " + ", ".join(headers))
|
|
||||||
|
|
||||||
# Show first 30 rows (reduced for faster response)
|
|
||||||
max_rows = min(30, len(data))
|
|
||||||
for i, row in enumerate(data[:max_rows], 1):
|
|
||||||
row_str = ", ".join([f"{k}: {v}" for k, v in row.items()])
|
|
||||||
lines.append(f"{i}. {row_str}")
|
|
||||||
|
|
||||||
if row_count > max_rows:
|
|
||||||
lines.append(f"(Showing first {max_rows} of {row_count} rows)")
|
|
||||||
else:
|
|
||||||
# Fallback for non-dict rows
|
|
||||||
for i, row in enumerate(data[:30], 1):
|
|
||||||
lines.append(f"{i}. {row}")
|
|
||||||
|
|
||||||
return "\n".join(lines)
|
|
||||||
# Note: Connection is reused, not closed after each query
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
error_msg = f"Error executing SQL query: {str(e)}"
|
|
||||||
logger.error(error_msg, exc_info=True)
|
|
||||||
return error_msg
|
|
||||||
|
|
||||||
return sqlite_query
|
|
||||||
|
|
||||||
|
|
||||||
def create_tavily_search_tool():
|
|
||||||
"""
|
|
||||||
Create a Tavily web search tool.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
LangChain tool for executing Tavily web searches
|
|
||||||
"""
|
|
||||||
@tool
|
|
||||||
async def tavily_search(query: str) -> str:
|
|
||||||
"""
|
|
||||||
Search the internet for comprehensive information using Tavily search via AI Center.
|
|
||||||
|
|
||||||
Use this tool when you need to find detailed product information, datasheets,
|
|
||||||
certifications, technical specifications, market trends, or other comprehensive
|
|
||||||
information that is not in the database.
|
|
||||||
|
|
||||||
IMPORTANT: This tool returns FULL content from search results (not truncated).
|
|
||||||
Use all available information to provide comprehensive, detailed answers with
|
|
||||||
specific facts, numbers, dates, and technical details.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: Search query string. Be specific and include product names,
|
|
||||||
model numbers, or other relevant keywords. For comprehensive
|
|
||||||
research, use broad queries like "latest developments in LED technology 2026"
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Comprehensive search results with full content, titles, URLs, and sources.
|
|
||||||
Results include up to 15 sources with complete content for detailed analysis.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
- Search for comprehensive product information:
|
|
||||||
tavily_search("latest LED technology developments 2026")
|
|
||||||
|
|
||||||
- Search for product datasheet:
|
|
||||||
tavily_search("Siemens 6AV2 181-8XP00-0AX0 datasheet")
|
|
||||||
|
|
||||||
- Search for market trends:
|
|
||||||
tavily_search("LED market trends efficiency breakthroughs 2025")
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Use AI Center Tavily plugin instead of direct langchain-tavily
|
|
||||||
from modules.aicore.aicoreModelRegistry import modelRegistry
|
|
||||||
from modules.aicore.aicoreModelSelector import modelSelector
|
|
||||||
from modules.datamodels.datamodelAi import (
|
|
||||||
AiModelCall,
|
|
||||||
AiModelResponse,
|
|
||||||
AiCallOptions,
|
|
||||||
OperationTypeEnum,
|
|
||||||
ProcessingModeEnum,
|
|
||||||
AiCallPromptWebSearch
|
|
||||||
)
|
|
||||||
import json
|
|
||||||
|
|
||||||
# Discover and register connectors if not already registered
|
|
||||||
if not modelRegistry._connectors:
|
|
||||||
discovered_connectors = modelRegistry.discoverConnectors()
|
|
||||||
for connector in discovered_connectors:
|
|
||||||
modelRegistry.registerConnector(connector)
|
|
||||||
|
|
||||||
# Refresh models to ensure Tavily is available
|
|
||||||
modelRegistry.refreshModels()
|
|
||||||
|
|
||||||
# Get available Tavily models (without RBAC filtering since tools don't have user context)
|
|
||||||
available_models = modelRegistry.getAvailableModels()
|
|
||||||
tavily_models = [m for m in available_models if m.connectorType == "tavily"]
|
|
||||||
|
|
||||||
if not tavily_models:
|
|
||||||
return "Error: Tavily model not available in AI Center. Please check configuration."
|
|
||||||
|
|
||||||
# Select the best Tavily model for web search
|
|
||||||
options = AiCallOptions(
|
|
||||||
operationType=OperationTypeEnum.WEB_SEARCH_DATA,
|
|
||||||
processingMode=ProcessingModeEnum.BASIC
|
|
||||||
)
|
|
||||||
|
|
||||||
# Use model selector to choose the best Tavily model
|
|
||||||
# Since we only have Tavily models, we can just pick the first one
|
|
||||||
# or use selector if multiple Tavily models exist
|
|
||||||
if len(tavily_models) == 1:
|
|
||||||
selected_model = tavily_models[0]
|
|
||||||
else:
|
|
||||||
selected_model = modelSelector.selectModel(
|
|
||||||
prompt=query,
|
|
||||||
context="",
|
|
||||||
options=options,
|
|
||||||
availableModels=tavily_models
|
|
||||||
)
|
|
||||||
|
|
||||||
if not selected_model:
|
|
||||||
return "Error: Could not select Tavily model for web search."
|
|
||||||
|
|
||||||
# Create web search prompt with more results and deeper research
|
|
||||||
web_search_prompt = AiCallPromptWebSearch(
|
|
||||||
instruction=query,
|
|
||||||
maxNumberPages=15, # Request more results for comprehensive information
|
|
||||||
country=None, # No country filter by default
|
|
||||||
language=None, # No language filter by default
|
|
||||||
researchDepth="deep" # Deep research for comprehensive results
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create model call with JSON prompt
|
|
||||||
model_call = AiModelCall(
|
|
||||||
messages=[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": json.dumps(web_search_prompt.model_dump())
|
|
||||||
}
|
|
||||||
],
|
|
||||||
model=selected_model,
|
|
||||||
options=options
|
|
||||||
)
|
|
||||||
|
|
||||||
# Call the model's functionCall (which routes to _routeWebOperation)
|
|
||||||
if not selected_model.functionCall:
|
|
||||||
return "Error: Tavily model has no functionCall defined."
|
|
||||||
|
|
||||||
response: AiModelResponse = await selected_model.functionCall(model_call)
|
|
||||||
|
|
||||||
if not response.success:
|
|
||||||
error_msg = response.error or "Unknown error"
|
|
||||||
logger.error(f"Tavily search failed: {error_msg}")
|
|
||||||
return f"Error performing Tavily search: {error_msg}"
|
|
||||||
|
|
||||||
# Parse response content (should be JSON with URLs and content)
|
|
||||||
try:
|
|
||||||
result_data = json.loads(response.content) if response.content else {}
|
|
||||||
|
|
||||||
# Handle different response formats
|
|
||||||
if isinstance(result_data, list):
|
|
||||||
# List of URLs or results
|
|
||||||
results = result_data
|
|
||||||
elif isinstance(result_data, dict):
|
|
||||||
# Dictionary with URLs or results key
|
|
||||||
results = result_data.get("urls", []) or result_data.get("results", []) or []
|
|
||||||
else:
|
|
||||||
results = []
|
|
||||||
|
|
||||||
if not results:
|
|
||||||
return f"No results found for query: {query}"
|
|
||||||
|
|
||||||
# Format results with full content (not truncated)
|
|
||||||
lines = [f"Internet search results for: {query}\n"]
|
|
||||||
|
|
||||||
# Return all results with full content (up to 15 results)
|
|
||||||
for i, result in enumerate(results[:15], 1):
|
|
||||||
if isinstance(result, str):
|
|
||||||
# Simple URL string
|
|
||||||
lines.append(f"{i}. {result}")
|
|
||||||
lines.append(f" URL: {result}")
|
|
||||||
elif isinstance(result, dict):
|
|
||||||
# Dictionary with url, title, content
|
|
||||||
url = result.get("url", "")
|
|
||||||
title = result.get("title", url)
|
|
||||||
content = result.get("content", "")
|
|
||||||
|
|
||||||
lines.append(f"{i}. {title}")
|
|
||||||
lines.append(f" URL: {url}")
|
|
||||||
if content:
|
|
||||||
# Return FULL content, not truncated - let the LLM decide what to use
|
|
||||||
lines.append(f" Content: {content}")
|
|
||||||
else:
|
|
||||||
# Fallback
|
|
||||||
lines.append(f"{i}. {str(result)}")
|
|
||||||
lines.append("")
|
|
||||||
|
|
||||||
return "\n".join(lines)
|
|
||||||
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
# If response is not JSON, try to parse as plain text
|
|
||||||
if response.content:
|
|
||||||
return f"Internet search results for: {query}\n\n{response.content}"
|
|
||||||
return f"No results found for query: {query}"
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
error_msg = f"Error performing Tavily search via AI Center: {str(e)}"
|
|
||||||
logger.error(error_msg, exc_info=True)
|
|
||||||
return error_msg
|
|
||||||
|
|
||||||
return tavily_search
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# Legacy tool definitions (kept for backwards compatibility)
|
|
||||||
# =============================================================================
|
|
||||||
|
|
||||||
# Legacy sqlite_query tool using default preprocessor connector
|
|
||||||
sqlite_query = create_sql_query_tool("preprocessor")
|
|
||||||
|
|
||||||
# Legacy tavily_search tool
|
|
||||||
tavily_search = create_tavily_search_tool()
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# Streaming message tool factory
|
|
||||||
# =============================================================================
|
|
||||||
|
|
||||||
def create_send_streaming_message_tool(event_manager=None):
|
|
||||||
"""
|
|
||||||
Create the send_streaming_message tool with access to event manager.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
event_manager: Event manager instance for emitting events (not used directly,
|
|
||||||
events are captured via LangGraph tool events)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
LangChain tool for sending streaming messages
|
|
||||||
"""
|
|
||||||
@tool
|
|
||||||
async def send_streaming_message(message: str) -> str:
|
|
||||||
"""
|
|
||||||
Send a streaming status update to the user.
|
|
||||||
|
|
||||||
Use this tool frequently to keep the user informed about what you are doing.
|
|
||||||
This helps provide a better user experience by showing progress updates.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
message: A short message describing what you are currently doing.
|
|
||||||
Examples:
|
|
||||||
- "Durchsuche Datenbank nach Lampen, LED, Leuchten, und Ähnlichem."
|
|
||||||
- "Suche im Internet nach Produktinformationen."
|
|
||||||
- "Analysiere Suchergebnisse und bereite Antwort vor."
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Confirmation that the message was sent.
|
|
||||||
"""
|
|
||||||
# This tool doesn't actually do anything in the tool execution
|
|
||||||
# The actual event emission happens in the streaming bridge
|
|
||||||
# This is just for LangGraph to recognize it as a tool call
|
|
||||||
return f"Status-Update gesendet: {message}"
|
|
||||||
|
|
||||||
return send_streaming_message
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,170 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""
|
|
||||||
Chatbot constants and helper functions.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
async def generate_conversation_name(
|
|
||||||
services,
|
|
||||||
prompt: str,
|
|
||||||
user_language: Optional[str] = None
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Generate a conversation name from the user's prompt using AI.
|
|
||||||
Creates a concise, informative summary name in German based on the user input.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
services: Services object with AI service
|
|
||||||
prompt: User's input prompt (always in German)
|
|
||||||
user_language: User's language preference (not used, always German)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A short, informative conversation name in German
|
|
||||||
"""
|
|
||||||
if not prompt or not prompt.strip():
|
|
||||||
return "Neue Unterhaltung"
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Check if AI service is available
|
|
||||||
if not hasattr(services, 'ai') or services.ai is None:
|
|
||||||
logger.warning("AI service not available, generating name from prompt")
|
|
||||||
return generate_name_from_prompt(prompt)
|
|
||||||
|
|
||||||
# Ensure AI service is initialized before use
|
|
||||||
await services.ai.ensureAiObjectsInitialized()
|
|
||||||
|
|
||||||
# Create AI prompt - very explicit that answer must be in German
|
|
||||||
ai_prompt = f"""Du bist ein deutscher Assistent. Der Benutzer hat folgende Anfrage auf Deutsch gestellt:
|
|
||||||
|
|
||||||
"{prompt.strip()}"
|
|
||||||
|
|
||||||
Erstelle einen kurzen, zusammenfassenden Titel für diese Unterhaltung. Der Titel muss:
|
|
||||||
- Auf Deutsch sein (KEIN Englisch!)
|
|
||||||
- Maximal 50 Zeichen lang sein
|
|
||||||
- Das Hauptthema zusammenfassen
|
|
||||||
- Informativ sein
|
|
||||||
|
|
||||||
Beispiele für gute deutsche Titel:
|
|
||||||
- "LED-Artikel Suche"
|
|
||||||
- "Lagerbestandsabfrage"
|
|
||||||
- "Produktinformationen"
|
|
||||||
- "Artikel-Suche"
|
|
||||||
|
|
||||||
Antworte NUR mit dem deutschen Titel, ohne Anführungszeichen oder Erklärungen."""
|
|
||||||
|
|
||||||
# Create AI request
|
|
||||||
request = AiCallRequest(
|
|
||||||
prompt=ai_prompt,
|
|
||||||
context="",
|
|
||||||
options=AiCallOptions(
|
|
||||||
operationType=OperationTypeEnum.DATA_GENERATE,
|
|
||||||
priority=PriorityEnum.SPEED,
|
|
||||||
processingMode=ProcessingModeEnum.BASIC,
|
|
||||||
compressPrompt=False,
|
|
||||||
compressContext=False,
|
|
||||||
temperature=0.3 # Lower temperature for more consistent German output
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Call AI service
|
|
||||||
logger.info(f"Calling AI to generate conversation name for prompt: {prompt[:50]}...")
|
|
||||||
response = await services.ai.callAi(request)
|
|
||||||
|
|
||||||
if not response or not hasattr(response, 'content') or not response.content:
|
|
||||||
logger.warning("AI response invalid, generating name from prompt")
|
|
||||||
return generate_name_from_prompt(prompt)
|
|
||||||
|
|
||||||
logger.info(f"AI response received: {response.content[:100]}...")
|
|
||||||
|
|
||||||
# Clean up the AI response
|
|
||||||
name = str(response.content).strip()
|
|
||||||
name = name.strip('"\'')
|
|
||||||
|
|
||||||
# Remove markdown code blocks if present
|
|
||||||
if name.startswith('```'):
|
|
||||||
lines = name.split('\n')
|
|
||||||
if len(lines) > 1:
|
|
||||||
name = '\n'.join(lines[1:-1]) if lines[-1].strip() == '```' else '\n'.join(lines[1:])
|
|
||||||
|
|
||||||
# Remove newlines and extra spaces
|
|
||||||
name = " ".join(name.split())
|
|
||||||
|
|
||||||
# Check if name contains English words - if so, generate from prompt instead
|
|
||||||
name_lower = name.lower()
|
|
||||||
english_words = ["search", "find", "show", "display", "query", "article", "product", "item", "led articles", "product search"]
|
|
||||||
if any(word in name_lower for word in english_words):
|
|
||||||
logger.warning(f"AI generated English name '{name}', generating from prompt instead")
|
|
||||||
return generate_name_from_prompt(prompt)
|
|
||||||
|
|
||||||
# Limit to 50 characters
|
|
||||||
if len(name) > 50:
|
|
||||||
name = name[:47] + "..."
|
|
||||||
|
|
||||||
# If we got a valid name, return it
|
|
||||||
if name and len(name) >= 3:
|
|
||||||
logger.info(f"Successfully generated conversation name via AI: '{name}'")
|
|
||||||
return name
|
|
||||||
else:
|
|
||||||
logger.warning(f"Generated name is too short: '{name}', generating from prompt")
|
|
||||||
return generate_name_from_prompt(prompt)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating conversation name with AI: {e}", exc_info=True)
|
|
||||||
return generate_name_from_prompt(prompt)
|
|
||||||
|
|
||||||
|
|
||||||
def generate_name_from_prompt(prompt: str) -> str:
|
|
||||||
"""
|
|
||||||
Generate a conversation name directly from the German prompt.
|
|
||||||
Creates a concise title by extracting key words and formatting them.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: User's input prompt in German
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A short conversation name in German
|
|
||||||
"""
|
|
||||||
if not prompt or not prompt.strip():
|
|
||||||
return "Neue Unterhaltung"
|
|
||||||
|
|
||||||
# Clean up the prompt
|
|
||||||
name = prompt.strip()
|
|
||||||
|
|
||||||
# Remove newlines and extra spaces
|
|
||||||
name = " ".join(name.split())
|
|
||||||
|
|
||||||
# Remove common question words and phrases
|
|
||||||
question_words = ["wie", "was", "wo", "wann", "wer", "welche", "welcher", "welches"]
|
|
||||||
words = name.split()
|
|
||||||
filtered_words = [w for w in words if w.lower() not in question_words]
|
|
||||||
|
|
||||||
if filtered_words:
|
|
||||||
name = " ".join(filtered_words)
|
|
||||||
|
|
||||||
# Capitalize first letter
|
|
||||||
if name:
|
|
||||||
name = name[0].upper() + name[1:] if len(name) > 1 else name.upper()
|
|
||||||
|
|
||||||
# Limit to 50 characters
|
|
||||||
if len(name) > 50:
|
|
||||||
# Try to cut at word boundary
|
|
||||||
truncated = name[:47]
|
|
||||||
last_space = truncated.rfind(' ')
|
|
||||||
if last_space > 20: # Only cut at word boundary if reasonable
|
|
||||||
name = truncated[:last_space] + "..."
|
|
||||||
else:
|
|
||||||
name = truncated + "..."
|
|
||||||
|
|
||||||
# If name is empty or too short, use default
|
|
||||||
if not name or len(name) < 3:
|
|
||||||
return "Neue Unterhaltung"
|
|
||||||
|
|
||||||
return name
|
|
||||||
|
|
@ -1,302 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""
|
|
||||||
Configuration system for chatbot instances.
|
|
||||||
Loads configuration from the database (FeatureInstance.config JSONB field).
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from typing import Optional, Dict, Any, List, TYPE_CHECKING
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from modules.datamodels.datamodelFeatures import FeatureInstance
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Cache for loaded configs (by instance ID for database configs)
|
|
||||||
_config_cache: Dict[str, 'ChatbotConfig'] = {}
|
|
||||||
|
|
||||||
# Default system prompt when none is configured
|
|
||||||
DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant. You have access to SQL query tools and web search tools. Use them to help answer user questions."
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class DatabaseConfig:
|
|
||||||
"""Database configuration for a chatbot instance."""
|
|
||||||
schema: Dict[str, Any] = field(default_factory=dict)
|
|
||||||
connector: str = "preprocessor"
|
|
||||||
|
|
||||||
def is_sql_enabled(self) -> bool:
|
|
||||||
"""Check if SQL queries are possible (has connector)."""
|
|
||||||
return bool(self.connector)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ToolConfig:
|
|
||||||
"""Tool configuration for a chatbot instance."""
|
|
||||||
sql: Dict[str, Any] = field(default_factory=lambda: {"enabled": True})
|
|
||||||
tavily: Optional[Dict[str, Any]] = None
|
|
||||||
streaming: Dict[str, Any] = field(default_factory=lambda: {"enabled": True})
|
|
||||||
|
|
||||||
def is_sql_enabled(self) -> bool:
|
|
||||||
"""Check if SQL tool is enabled."""
|
|
||||||
if self.sql is None:
|
|
||||||
return True # Default enabled
|
|
||||||
return self.sql.get("enabled", True)
|
|
||||||
|
|
||||||
def is_tavily_enabled(self) -> bool:
|
|
||||||
"""Check if Tavily web search tool is enabled."""
|
|
||||||
if self.tavily is None:
|
|
||||||
return False # Default disabled
|
|
||||||
return self.tavily.get("enabled", False)
|
|
||||||
|
|
||||||
def is_streaming_enabled(self) -> bool:
|
|
||||||
"""Check if streaming status tool is enabled."""
|
|
||||||
if self.streaming is None:
|
|
||||||
return True # Default enabled
|
|
||||||
return self.streaming.get("enabled", True)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ModelConfig:
|
|
||||||
"""Model configuration for a chatbot instance."""
|
|
||||||
operationType: str = "DATA_ANALYSE"
|
|
||||||
processingMode: str = "BASIC" # Changed from DETAILED for faster responses
|
|
||||||
allowedProviders: List[str] = field(default_factory=list) # Empty = all providers allowed
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ChatbotConfig:
|
|
||||||
"""Configuration for a chatbot instance."""
|
|
||||||
id: str
|
|
||||||
name: str
|
|
||||||
systemPrompt: str
|
|
||||||
database: DatabaseConfig
|
|
||||||
tools: ToolConfig
|
|
||||||
model: ModelConfig
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_dict(cls, data: Dict[str, Any], config_id: str = "default") -> 'ChatbotConfig':
|
|
||||||
"""
|
|
||||||
Create ChatbotConfig from dictionary.
|
|
||||||
|
|
||||||
Supports two config formats:
|
|
||||||
1. New format (file-based): systemPrompt, database, tools, model
|
|
||||||
2. Legacy frontend format: connector, prompts, behavior
|
|
||||||
|
|
||||||
Args:
|
|
||||||
data: Configuration dictionary (from JSON file or FeatureInstance.config)
|
|
||||||
config_id: Identifier for this config (instance ID or file name)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ChatbotConfig instance with validated values
|
|
||||||
"""
|
|
||||||
# Detect config format and normalize
|
|
||||||
if "prompts" in data or "connector" in data or "behavior" in data:
|
|
||||||
# Legacy frontend format - convert to new format
|
|
||||||
data = cls._convert_legacy_config(data)
|
|
||||||
|
|
||||||
# Get system prompt - required field, use default if not provided
|
|
||||||
system_prompt = data.get("systemPrompt")
|
|
||||||
if not system_prompt:
|
|
||||||
logger.warning(f"Config {config_id}: No systemPrompt provided, using default")
|
|
||||||
system_prompt = DEFAULT_SYSTEM_PROMPT
|
|
||||||
|
|
||||||
# Parse database config
|
|
||||||
db_data = data.get("database", {})
|
|
||||||
database_config = DatabaseConfig(
|
|
||||||
schema=db_data.get("schema", {}),
|
|
||||||
connector=db_data.get("connector", "preprocessor")
|
|
||||||
)
|
|
||||||
|
|
||||||
# Parse tools config with defaults
|
|
||||||
tools_data = data.get("tools", {})
|
|
||||||
tools_config = ToolConfig(
|
|
||||||
sql=tools_data.get("sql", {"enabled": True}),
|
|
||||||
tavily=tools_data.get("tavily", {"enabled": False}),
|
|
||||||
streaming=tools_data.get("streaming", {"enabled": True})
|
|
||||||
)
|
|
||||||
|
|
||||||
# Parse model config with defaults
|
|
||||||
model_data = data.get("model", {})
|
|
||||||
allowed_providers = model_data.get("allowedProviders") or data.get("allowedProviders", [])
|
|
||||||
model_config = ModelConfig(
|
|
||||||
operationType=model_data.get("operationType", "DATA_ANALYSE"),
|
|
||||||
processingMode=model_data.get("processingMode", "DETAILED"),
|
|
||||||
allowedProviders=allowed_providers if isinstance(allowed_providers, list) else []
|
|
||||||
)
|
|
||||||
|
|
||||||
return cls(
|
|
||||||
id=data.get("id", config_id),
|
|
||||||
name=data.get("name", "Chatbot"),
|
|
||||||
systemPrompt=system_prompt,
|
|
||||||
database=database_config,
|
|
||||||
tools=tools_config,
|
|
||||||
model=model_config
|
|
||||||
)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _convert_legacy_config(data: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Convert legacy frontend config format to new format.
|
|
||||||
|
|
||||||
Legacy format (from AdminFeatureAccessPage.tsx):
|
|
||||||
{
|
|
||||||
"connector": {"types": [...], "type": "preprocessor"},
|
|
||||||
"prompts": {"customAnalysisPrompt": "...", "customFinalAnswerPrompt": "..."},
|
|
||||||
"behavior": {"enableWebResearch": true, ...}
|
|
||||||
}
|
|
||||||
|
|
||||||
New format:
|
|
||||||
{
|
|
||||||
"systemPrompt": "...",
|
|
||||||
"database": {"connector": "preprocessor"},
|
|
||||||
"tools": {"sql": {"enabled": true}, "tavily": {"enabled": true}}
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
converted = {}
|
|
||||||
|
|
||||||
# Extract system prompt from prompts section
|
|
||||||
prompts = data.get("prompts", {})
|
|
||||||
system_prompt = prompts.get("customAnalysisPrompt") or prompts.get("customFinalAnswerPrompt")
|
|
||||||
if system_prompt:
|
|
||||||
converted["systemPrompt"] = system_prompt
|
|
||||||
|
|
||||||
# Extract connector from connector section
|
|
||||||
connector_data = data.get("connector", {})
|
|
||||||
connector_type = connector_data.get("type") or "preprocessor"
|
|
||||||
if isinstance(connector_data.get("types"), list) and connector_data["types"]:
|
|
||||||
connector_type = connector_data["types"][0] # Use first connector as primary
|
|
||||||
|
|
||||||
converted["database"] = {
|
|
||||||
"connector": connector_type,
|
|
||||||
"schema": {}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Extract tool settings from behavior section
|
|
||||||
behavior = data.get("behavior", {})
|
|
||||||
enable_web_research = behavior.get("enableWebResearch", False)
|
|
||||||
|
|
||||||
converted["tools"] = {
|
|
||||||
"sql": {"enabled": True}, # SQL always enabled if connector is set
|
|
||||||
"tavily": {"enabled": enable_web_research},
|
|
||||||
"streaming": {"enabled": True} # Streaming always enabled
|
|
||||||
}
|
|
||||||
|
|
||||||
# Model config defaults - use BASIC for faster responses
|
|
||||||
converted["model"] = {
|
|
||||||
"operationType": "DATA_ANALYSE",
|
|
||||||
"processingMode": "BASIC",
|
|
||||||
"allowedProviders": data.get("allowedProviders", [])
|
|
||||||
}
|
|
||||||
|
|
||||||
# Copy other fields
|
|
||||||
if "id" in data:
|
|
||||||
converted["id"] = data["id"]
|
|
||||||
if "name" in data:
|
|
||||||
converted["name"] = data["name"]
|
|
||||||
|
|
||||||
logger.debug(f"Converted legacy config format to new format")
|
|
||||||
return converted
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
|
||||||
"""Convert config to dictionary for serialization."""
|
|
||||||
return {
|
|
||||||
"id": self.id,
|
|
||||||
"name": self.name,
|
|
||||||
"systemPrompt": self.systemPrompt,
|
|
||||||
"database": {
|
|
||||||
"schema": self.database.schema,
|
|
||||||
"connector": self.database.connector
|
|
||||||
},
|
|
||||||
"tools": {
|
|
||||||
"sql": self.tools.sql,
|
|
||||||
"tavily": self.tools.tavily,
|
|
||||||
"streaming": self.tools.streaming
|
|
||||||
},
|
|
||||||
"model": {
|
|
||||||
"operationType": self.model.operationType,
|
|
||||||
"processingMode": self.model.processingMode,
|
|
||||||
"allowedProviders": self.model.allowedProviders
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def load_chatbot_config_from_instance(instance: 'FeatureInstance') -> ChatbotConfig:
|
|
||||||
"""
|
|
||||||
Load chatbot configuration from a FeatureInstance's config field.
|
|
||||||
|
|
||||||
This is the primary method for loading chatbot configuration.
|
|
||||||
The config is stored in the FeatureInstance.config JSONB field.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
instance: FeatureInstance object with config field
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ChatbotConfig instance
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If instance has no config and no fallback available
|
|
||||||
"""
|
|
||||||
instance_id = instance.id
|
|
||||||
|
|
||||||
# Check cache first (by instance ID)
|
|
||||||
cache_key = f"instance_{instance_id}"
|
|
||||||
if cache_key in _config_cache:
|
|
||||||
logger.debug(f"Returning cached config for instance {instance_id}")
|
|
||||||
return _config_cache[cache_key]
|
|
||||||
|
|
||||||
# Get config from instance
|
|
||||||
config_data = instance.config
|
|
||||||
|
|
||||||
if not config_data:
|
|
||||||
logger.warning(f"Instance {instance_id} has no config, using minimal defaults")
|
|
||||||
config_data = {}
|
|
||||||
|
|
||||||
logger.debug(f"Instance {instance_id} raw config keys: {list(config_data.keys()) if config_data else []}, allowedProviders: {config_data.get('allowedProviders')}")
|
|
||||||
|
|
||||||
# Create config from dictionary
|
|
||||||
config = ChatbotConfig.from_dict(config_data, config_id=instance_id)
|
|
||||||
logger.debug(f"Instance {instance_id} parsed config.model.allowedProviders: {config.model.allowedProviders}")
|
|
||||||
|
|
||||||
# Cache the config
|
|
||||||
_config_cache[cache_key] = config
|
|
||||||
logger.info(f"Loaded chatbot config from instance {instance_id}: {config.name}")
|
|
||||||
|
|
||||||
return config
|
|
||||||
|
|
||||||
|
|
||||||
def load_chatbot_config_from_dict(config_data: Dict[str, Any], config_id: str = "custom") -> ChatbotConfig:
|
|
||||||
"""
|
|
||||||
Load chatbot configuration from a dictionary.
|
|
||||||
|
|
||||||
Useful for testing or when config data is already available.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
config_data: Configuration dictionary
|
|
||||||
config_id: Identifier for this config
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ChatbotConfig instance
|
|
||||||
"""
|
|
||||||
return ChatbotConfig.from_dict(config_data, config_id=config_id)
|
|
||||||
|
|
||||||
|
|
||||||
def clear_config_cache(instance_id: Optional[str] = None):
|
|
||||||
"""
|
|
||||||
Clear the configuration cache.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
instance_id: Optional instance ID to clear specific cache entry.
|
|
||||||
If None, clears entire cache.
|
|
||||||
"""
|
|
||||||
global _config_cache
|
|
||||||
if instance_id:
|
|
||||||
cache_key = f"instance_{instance_id}"
|
|
||||||
if cache_key in _config_cache:
|
|
||||||
del _config_cache[cache_key]
|
|
||||||
logger.debug(f"Cleared chatbot config cache for instance {instance_id}")
|
|
||||||
else:
|
|
||||||
_config_cache.clear()
|
|
||||||
logger.debug("Cleared all chatbot config cache")
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,497 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""
|
|
||||||
Chatbot Feature Container - Main Module.
|
|
||||||
Handles feature initialization and RBAC catalog registration.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Dict, List, Any, Optional
|
|
||||||
|
|
||||||
from modules.shared.i18nRegistry import t
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Feature metadata
|
|
||||||
FEATURE_CODE = "chatbot"
|
|
||||||
FEATURE_LABEL = t("Chatbot", context="UI")
|
|
||||||
FEATURE_ICON = "mdi-robot"
|
|
||||||
|
|
||||||
# UI Objects for RBAC catalog
|
|
||||||
UI_OBJECTS = [
|
|
||||||
{
|
|
||||||
"objectKey": "ui.feature.chatbot.conversations",
|
|
||||||
"label": t("Konversationen", context="UI"),
|
|
||||||
"meta": {"area": "conversations"}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
# Resource Objects for RBAC catalog
|
|
||||||
RESOURCE_OBJECTS = [
|
|
||||||
{
|
|
||||||
"objectKey": "resource.feature.chatbot.startStream",
|
|
||||||
"label": t("Chat starten (Stream)", context="UI"),
|
|
||||||
"meta": {"endpoint": "/api/chatbot/{instanceId}/start/stream", "method": "POST"}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"objectKey": "resource.feature.chatbot.stop",
|
|
||||||
"label": t("Chat stoppen", context="UI"),
|
|
||||||
"meta": {"endpoint": "/api/chatbot/{instanceId}/stop/{workflowId}", "method": "POST"}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"objectKey": "resource.feature.chatbot.threads",
|
|
||||||
"label": t("Threads abrufen", context="UI"),
|
|
||||||
"meta": {"endpoint": "/api/chatbot/{instanceId}/threads", "method": "GET"}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"objectKey": "resource.feature.chatbot.delete",
|
|
||||||
"label": t("Chat löschen", context="UI"),
|
|
||||||
"meta": {"endpoint": "/api/chatbot/{instanceId}/{workflowId}", "method": "DELETE"}
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
# Service requirements - services this feature needs from the service center
|
|
||||||
# Format: [{serviceKey, meta}]. Used by getChatbotServices() to resolve only needed services.
|
|
||||||
REQUIRED_SERVICES = [
|
|
||||||
{
|
|
||||||
"serviceKey": "chat",
|
|
||||||
"meta": {"usage": "File info, document handling"}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"serviceKey": "ai",
|
|
||||||
"meta": {"usage": "AI calls, conversation name generation"}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"serviceKey": "billing",
|
|
||||||
"meta": {"usage": "Usage tracking, balance checks"}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"serviceKey": "streaming",
|
|
||||||
"meta": {"usage": "Event manager, ChatStreamingHelper"}
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
# Template roles for this feature
|
|
||||||
# Role names MUST follow convention: {featureCode}-{roleName}
|
|
||||||
TEMPLATE_ROLES = [
|
|
||||||
{
|
|
||||||
"roleLabel": "chatbot-viewer",
|
|
||||||
"description": "Chatbot Betrachter - Chat-Threads ansehen (nur lesen)",
|
|
||||||
"accessRules": [
|
|
||||||
# UI: only threads view, NO active chat
|
|
||||||
{"context": "UI", "item": "ui.feature.chatbot.threads", "view": True},
|
|
||||||
# RESOURCE: can list threads only
|
|
||||||
{"context": "RESOURCE", "item": "resource.feature.chatbot.threads", "view": True},
|
|
||||||
# DATA access (own records, read-only)
|
|
||||||
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "n", "update": "n", "delete": "n"},
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"roleLabel": "chatbot-user",
|
|
||||||
"description": "Chatbot Benutzer - Chatbot nutzen und eigene Threads verwalten",
|
|
||||||
"accessRules": [
|
|
||||||
# UI: full access to all views
|
|
||||||
{"context": "UI", "item": "ui.feature.chatbot.conversations", "view": True},
|
|
||||||
{"context": "UI", "item": "ui.feature.chatbot.threads", "view": True},
|
|
||||||
# Resource access: can start/stop chats, view threads, delete own
|
|
||||||
{"context": "RESOURCE", "item": "resource.feature.chatbot.startStream", "view": True},
|
|
||||||
{"context": "RESOURCE", "item": "resource.feature.chatbot.stop", "view": True},
|
|
||||||
{"context": "RESOURCE", "item": "resource.feature.chatbot.threads", "view": True},
|
|
||||||
{"context": "RESOURCE", "item": "resource.feature.chatbot.delete", "view": True},
|
|
||||||
# DATA access (own records)
|
|
||||||
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "m", "update": "m", "delete": "m"},
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"roleLabel": "chatbot-admin",
|
|
||||||
"description": "Chatbot Admin - Vollzugriff auf alle Chatbot-Funktionen",
|
|
||||||
"accessRules": [
|
|
||||||
# Full UI access
|
|
||||||
{"context": "UI", "item": None, "view": True},
|
|
||||||
# Full resource access
|
|
||||||
{"context": "RESOURCE", "item": None, "view": True},
|
|
||||||
# Full DATA access
|
|
||||||
{"context": "DATA", "item": None, "view": True, "read": "a", "create": "a", "update": "a", "delete": "a"},
|
|
||||||
]
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def getFeatureDefinition() -> Dict[str, Any]:
|
|
||||||
"""Return the feature definition for registration.
|
|
||||||
|
|
||||||
The chatbot feature is currently soft-disabled via ``enabled=False``: its
|
|
||||||
catalog objects, template roles and routes stay loaded so already-running
|
|
||||||
instances keep working, but it is filtered out of the Store and the
|
|
||||||
Admin Feature-Instances "Neue Instanz" selection list.
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
"code": FEATURE_CODE,
|
|
||||||
"label": FEATURE_LABEL,
|
|
||||||
"icon": FEATURE_ICON,
|
|
||||||
"enabled": False,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def getRequiredServiceKeys() -> List[str]:
|
|
||||||
"""Return list of service keys this feature requires."""
|
|
||||||
return [s["serviceKey"] for s in REQUIRED_SERVICES]
|
|
||||||
|
|
||||||
|
|
||||||
def getChatbotServices(
|
|
||||||
user,
|
|
||||||
mandateId: Optional[str] = None,
|
|
||||||
featureInstanceId: Optional[str] = None,
|
|
||||||
workflow=None,
|
|
||||||
) -> Any:
|
|
||||||
"""
|
|
||||||
Get a service hub for the chatbot feature using the service center.
|
|
||||||
Resolves only the services declared in REQUIRED_SERVICES.
|
|
||||||
|
|
||||||
Returns a hub-like object with: chat, ai, billing, streaming,
|
|
||||||
plus interfaceDbComponent, user, mandateId, featureInstanceId.
|
|
||||||
"""
|
|
||||||
from modules.serviceCenter import getService
|
|
||||||
from modules.serviceCenter.context import ServiceCenterContext
|
|
||||||
from modules.interfaces.interfaceDbManagement import getInterface as getComponentInterface
|
|
||||||
|
|
||||||
# Provide workflow or placeholder so billing/etc get featureCode
|
|
||||||
_workflow = workflow
|
|
||||||
if _workflow is None:
|
|
||||||
_workflow = type("_Placeholder", (), {"featureCode": FEATURE_CODE})()
|
|
||||||
ctx = ServiceCenterContext(
|
|
||||||
user=user,
|
|
||||||
mandate_id=mandateId,
|
|
||||||
feature_instance_id=featureInstanceId,
|
|
||||||
workflow=_workflow,
|
|
||||||
)
|
|
||||||
|
|
||||||
hub = _ChatbotServiceHub()
|
|
||||||
hub.user = user
|
|
||||||
hub.mandateId = mandateId
|
|
||||||
hub.featureInstanceId = featureInstanceId
|
|
||||||
hub.workflow = workflow
|
|
||||||
hub.interfaceDbComponent = getComponentInterface(user, mandateId=mandateId, featureInstanceId=featureInstanceId)
|
|
||||||
|
|
||||||
for spec in REQUIRED_SERVICES:
|
|
||||||
key = spec["serviceKey"]
|
|
||||||
try:
|
|
||||||
svc = getService(key, ctx)
|
|
||||||
setattr(hub, key, svc)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Could not resolve service '{key}' for chatbot: {e}")
|
|
||||||
setattr(hub, key, None)
|
|
||||||
|
|
||||||
return hub
|
|
||||||
|
|
||||||
|
|
||||||
def getChatStreamingHelper():
|
|
||||||
"""
|
|
||||||
Get ChatStreamingHelper utility class (used by chatbot for message normalization).
|
|
||||||
Resolves via service center streaming service.
|
|
||||||
"""
|
|
||||||
from modules.serviceCenter import getService
|
|
||||||
from modules.serviceCenter.context import ServiceCenterContext
|
|
||||||
# Minimal context - streaming service only needs it for resolver
|
|
||||||
ctx = ServiceCenterContext(user=__get_placeholder_user(), mandate_id=None, feature_instance_id=None)
|
|
||||||
streaming = getService("streaming", ctx)
|
|
||||||
return streaming.getChatStreamingHelper() if streaming else None
|
|
||||||
|
|
||||||
|
|
||||||
def __get_placeholder_user():
|
|
||||||
"""Placeholder user for contexts that only need service resolution (e.g. ChatStreamingHelper)."""
|
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
|
||||||
return getRootInterface().currentUser
|
|
||||||
|
|
||||||
|
|
||||||
def getEventManager(user, mandateId: Optional[str] = None, featureInstanceId: Optional[str] = None):
|
|
||||||
"""
|
|
||||||
Get the global event manager for SSE streaming (used by chatbot routes).
|
|
||||||
"""
|
|
||||||
from modules.serviceCenter import getService
|
|
||||||
from modules.serviceCenter.context import ServiceCenterContext
|
|
||||||
|
|
||||||
ctx = ServiceCenterContext(
|
|
||||||
user=user,
|
|
||||||
mandate_id=mandateId,
|
|
||||||
feature_instance_id=featureInstanceId,
|
|
||||||
)
|
|
||||||
streaming = getService("streaming", ctx)
|
|
||||||
return streaming.getEventManager()
|
|
||||||
|
|
||||||
|
|
||||||
class _ChatbotServiceHub:
|
|
||||||
"""Lightweight hub exposing only services required by the chatbot feature."""
|
|
||||||
user = None
|
|
||||||
mandateId = None
|
|
||||||
featureInstanceId = None
|
|
||||||
workflow = None
|
|
||||||
interfaceDbComponent = None
|
|
||||||
chat = None
|
|
||||||
ai = None
|
|
||||||
billing = None
|
|
||||||
streaming = None
|
|
||||||
featureCode = "chatbot"
|
|
||||||
allowedProviders = None
|
|
||||||
|
|
||||||
|
|
||||||
def getUiObjects() -> List[Dict[str, Any]]:
|
|
||||||
"""Return UI objects for RBAC catalog registration."""
|
|
||||||
return UI_OBJECTS
|
|
||||||
|
|
||||||
|
|
||||||
def getResourceObjects() -> List[Dict[str, Any]]:
|
|
||||||
"""Return resource objects for RBAC catalog registration."""
|
|
||||||
return RESOURCE_OBJECTS
|
|
||||||
|
|
||||||
|
|
||||||
def getTemplateRoles() -> List[Dict[str, Any]]:
|
|
||||||
"""Return template roles for this feature."""
|
|
||||||
return TEMPLATE_ROLES
|
|
||||||
|
|
||||||
|
|
||||||
def registerFeature(catalogService) -> bool:
|
|
||||||
"""
|
|
||||||
Register this feature's RBAC objects in the catalog.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
catalogService: The RBAC catalog service instance
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if registration was successful
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Register UI objects
|
|
||||||
for uiObj in UI_OBJECTS:
|
|
||||||
catalogService.registerUiObject(
|
|
||||||
featureCode=FEATURE_CODE,
|
|
||||||
objectKey=uiObj["objectKey"],
|
|
||||||
label=uiObj["label"],
|
|
||||||
meta=uiObj.get("meta")
|
|
||||||
)
|
|
||||||
|
|
||||||
# Register Resource objects
|
|
||||||
for resObj in RESOURCE_OBJECTS:
|
|
||||||
catalogService.registerResourceObject(
|
|
||||||
featureCode=FEATURE_CODE,
|
|
||||||
objectKey=resObj["objectKey"],
|
|
||||||
label=resObj["label"],
|
|
||||||
meta=resObj.get("meta")
|
|
||||||
)
|
|
||||||
|
|
||||||
# Sync template roles to database
|
|
||||||
_syncTemplateRolesToDb()
|
|
||||||
|
|
||||||
logger.info(f"Feature '{FEATURE_CODE}' registered {len(UI_OBJECTS)} UI objects and {len(RESOURCE_OBJECTS)} resource objects")
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to register feature '{FEATURE_CODE}': {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def getChatbotServices(
|
|
||||||
user,
|
|
||||||
mandateId: Optional[str] = None,
|
|
||||||
featureInstanceId: Optional[str] = None,
|
|
||||||
workflow=None,
|
|
||||||
) -> "_ChatbotServiceHub":
|
|
||||||
"""
|
|
||||||
Get lightweight service hub for chatbot (chat, ai, streaming) without loading
|
|
||||||
the full legacy Services hub. Avoids ~90 ms from _loadFeatureInterfaces +
|
|
||||||
_loadFeatureServices; only instantiates required services.
|
|
||||||
Uses interfaceFeatureChatbot (ChatObjects) for interfaceDbChat to avoid
|
|
||||||
duplicate DB init - chatProcess reuses hub.interfaceDbChat.
|
|
||||||
"""
|
|
||||||
from modules.serviceHub import PublicService
|
|
||||||
from modules.interfaces.interfaceDbApp import getInterface as getAppInterface
|
|
||||||
from modules.features.chatbot.interfaceFeatureChatbot import getInterface as getChatbotInterface
|
|
||||||
from modules.serviceCenter.services.serviceChat.mainServiceChat import ChatService
|
|
||||||
from modules.serviceCenter.services.serviceAi.mainServiceAi import AiService
|
|
||||||
from modules.serviceCenter.core.serviceStreaming.mainServiceStreaming import StreamingService
|
|
||||||
|
|
||||||
hub = _ChatbotServiceHub()
|
|
||||||
hub.user = user
|
|
||||||
hub.mandateId = mandateId
|
|
||||||
hub.featureInstanceId = featureInstanceId
|
|
||||||
hub.workflow = workflow
|
|
||||||
hub.featureCode = "chatbot"
|
|
||||||
hub.allowedProviders = None
|
|
||||||
|
|
||||||
hub.interfaceDbApp = getAppInterface(user, mandateId=mandateId)
|
|
||||||
# interfaceDbComponent: lazy-loaded on first access (saves ~100–300 ms when no file uploads)
|
|
||||||
hub._interfaceDbComponent_val = None
|
|
||||||
# Use ChatObjects (interfaceFeatureChatbot) - same as chatProcess, avoids extra interfaceDbChat init
|
|
||||||
hub.interfaceDbChat = getChatbotInterface(
|
|
||||||
user, mandateId=mandateId, featureInstanceId=featureInstanceId
|
|
||||||
)
|
|
||||||
|
|
||||||
hub.chat = PublicService(ChatService(hub))
|
|
||||||
hub.ai = PublicService(AiService(hub), functionsOnly=False)
|
|
||||||
hub.streaming = PublicService(StreamingService(hub))
|
|
||||||
|
|
||||||
# Resolve billing from service center (required for _preflight_billing_check and billing callback)
|
|
||||||
try:
|
|
||||||
from modules.serviceCenter import getService
|
|
||||||
from modules.serviceCenter.context import ServiceCenterContext
|
|
||||||
_workflow = workflow or type("_Placeholder", (), {"featureCode": FEATURE_CODE})()
|
|
||||||
ctx = ServiceCenterContext(
|
|
||||||
user=user,
|
|
||||||
mandate_id=mandateId,
|
|
||||||
feature_instance_id=featureInstanceId,
|
|
||||||
workflow=_workflow,
|
|
||||||
)
|
|
||||||
hub.billing = getService("billing", ctx)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Could not resolve billing service for chatbot: {e}")
|
|
||||||
hub.billing = None
|
|
||||||
|
|
||||||
return hub
|
|
||||||
|
|
||||||
|
|
||||||
class _ChatbotServiceHub:
|
|
||||||
"""Lightweight hub with chat, ai, streaming for chatbot; avoids full Services init."""
|
|
||||||
|
|
||||||
user = None
|
|
||||||
mandateId = None
|
|
||||||
featureInstanceId = None
|
|
||||||
workflow = None
|
|
||||||
interfaceDbApp = None
|
|
||||||
_interfaceDbComponent_val = None
|
|
||||||
interfaceDbChat = None
|
|
||||||
|
|
||||||
@property
|
|
||||||
def interfaceDbComponent(self):
|
|
||||||
"""Lazy-load interfaceDbComponent on first access (saves ~100–300 ms when no files)."""
|
|
||||||
if self._interfaceDbComponent_val is None:
|
|
||||||
from modules.interfaces.interfaceDbManagement import getInterface as getComponentInterface
|
|
||||||
self._interfaceDbComponent_val = getComponentInterface(
|
|
||||||
self.user, mandateId=self.mandateId, featureInstanceId=self.featureInstanceId
|
|
||||||
)
|
|
||||||
return self._interfaceDbComponent_val
|
|
||||||
chat = None
|
|
||||||
ai = None
|
|
||||||
billing = None
|
|
||||||
streaming = None
|
|
||||||
featureCode = "chatbot"
|
|
||||||
allowedProviders = None
|
|
||||||
|
|
||||||
|
|
||||||
def _syncTemplateRolesToDb() -> int:
|
|
||||||
"""
|
|
||||||
Sync template roles and their AccessRules to the database.
|
|
||||||
Creates global template roles (mandateId=None) if they don't exist.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Number of roles created/updated
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
|
||||||
from modules.datamodels.datamodelRbac import Role, AccessRule, AccessRuleContext
|
|
||||||
from modules.datamodels.datamodelUtils import coerce_text_multilingual
|
|
||||||
|
|
||||||
rootInterface = getRootInterface()
|
|
||||||
|
|
||||||
# Get existing template roles for this feature (Pydantic models)
|
|
||||||
existingRoles = rootInterface.getRolesByFeatureCode(FEATURE_CODE)
|
|
||||||
# Filter to template roles (mandateId is None)
|
|
||||||
templateRoles = [r for r in existingRoles if r.mandateId is None]
|
|
||||||
existingRoleLabels = {r.roleLabel: str(r.id) for r in templateRoles}
|
|
||||||
|
|
||||||
createdCount = 0
|
|
||||||
for roleTemplate in TEMPLATE_ROLES:
|
|
||||||
roleLabel = roleTemplate["roleLabel"]
|
|
||||||
|
|
||||||
if roleLabel in existingRoleLabels:
|
|
||||||
roleId = existingRoleLabels[roleLabel]
|
|
||||||
# Ensure AccessRules exist for this role
|
|
||||||
_ensureAccessRulesForRole(rootInterface, roleId, roleTemplate.get("accessRules", []))
|
|
||||||
else:
|
|
||||||
# Create new template role
|
|
||||||
newRole = Role(
|
|
||||||
roleLabel=roleLabel,
|
|
||||||
description=coerce_text_multilingual(roleTemplate.get("description", {})),
|
|
||||||
featureCode=FEATURE_CODE,
|
|
||||||
mandateId=None, # Global template
|
|
||||||
featureInstanceId=None,
|
|
||||||
isSystemRole=False
|
|
||||||
)
|
|
||||||
createdRole = rootInterface.db.recordCreate(Role, newRole.model_dump())
|
|
||||||
roleId = createdRole.get("id")
|
|
||||||
|
|
||||||
# Create AccessRules for this role
|
|
||||||
_ensureAccessRulesForRole(rootInterface, roleId, roleTemplate.get("accessRules", []))
|
|
||||||
|
|
||||||
logger.info(f"Created template role '{roleLabel}' with ID {roleId}")
|
|
||||||
createdCount += 1
|
|
||||||
|
|
||||||
if createdCount > 0:
|
|
||||||
logger.info(f"Feature '{FEATURE_CODE}': Created {createdCount} template roles")
|
|
||||||
|
|
||||||
return createdCount
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error syncing template roles for feature '{FEATURE_CODE}': {e}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def _ensureAccessRulesForRole(rootInterface, roleId: str, ruleTemplates: List[Dict[str, Any]]) -> int:
|
|
||||||
"""
|
|
||||||
Ensure AccessRules exist for a role based on templates.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
rootInterface: Root interface instance
|
|
||||||
roleId: Role ID
|
|
||||||
ruleTemplates: List of rule templates
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Number of rules created
|
|
||||||
"""
|
|
||||||
from modules.datamodels.datamodelRbac import AccessRule, AccessRuleContext
|
|
||||||
|
|
||||||
# Get existing rules for this role (Pydantic models)
|
|
||||||
existingRules = rootInterface.getAccessRulesByRole(roleId)
|
|
||||||
|
|
||||||
# Create a set of existing rule signatures to avoid duplicates
|
|
||||||
# IMPORTANT: Use .value for enum comparison, not str() which gives "AccessRuleContext.DATA" in Python 3.11+
|
|
||||||
existingSignatures = set()
|
|
||||||
for rule in existingRules:
|
|
||||||
sig = (rule.context.value if rule.context else None, rule.item)
|
|
||||||
existingSignatures.add(sig)
|
|
||||||
|
|
||||||
createdCount = 0
|
|
||||||
for template in ruleTemplates:
|
|
||||||
context = template.get("context", "UI")
|
|
||||||
item = template.get("item")
|
|
||||||
sig = (context, item)
|
|
||||||
|
|
||||||
if sig in existingSignatures:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Map context string to enum
|
|
||||||
if context == "UI":
|
|
||||||
contextEnum = AccessRuleContext.UI
|
|
||||||
elif context == "DATA":
|
|
||||||
contextEnum = AccessRuleContext.DATA
|
|
||||||
elif context == "RESOURCE":
|
|
||||||
contextEnum = AccessRuleContext.RESOURCE
|
|
||||||
else:
|
|
||||||
contextEnum = context
|
|
||||||
|
|
||||||
newRule = AccessRule(
|
|
||||||
roleId=roleId,
|
|
||||||
context=contextEnum,
|
|
||||||
item=item,
|
|
||||||
view=template.get("view", False),
|
|
||||||
read=template.get("read"),
|
|
||||||
create=template.get("create"),
|
|
||||||
update=template.get("update"),
|
|
||||||
delete=template.get("delete"),
|
|
||||||
)
|
|
||||||
rootInterface.db.recordCreate(AccessRule, newRule.model_dump())
|
|
||||||
createdCount += 1
|
|
||||||
|
|
||||||
if createdCount > 0:
|
|
||||||
logger.debug(f"Created {createdCount} AccessRules for role {roleId}")
|
|
||||||
|
|
||||||
return createdCount
|
|
||||||
|
|
@ -1,571 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""
|
|
||||||
Chatbot routes for the backend API.
|
|
||||||
Implements chatbot endpoints using LangGraph-based conversation workflows.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import json
|
|
||||||
import asyncio
|
|
||||||
import math
|
|
||||||
import uuid
|
|
||||||
from typing import Optional, Any, Dict, Union
|
|
||||||
from fastapi import APIRouter, HTTPException, Depends, Body, Path, Query, Request, status
|
|
||||||
from fastapi.responses import StreamingResponse
|
|
||||||
from modules.shared.timeUtils import parseTimestamp, getUtcTimestamp
|
|
||||||
|
|
||||||
# Import auth modules
|
|
||||||
from modules.auth import limiter, getRequestContext, RequestContext
|
|
||||||
|
|
||||||
# Import interfaces
|
|
||||||
from . import interfaceFeatureChatbot as interfaceDbChat
|
|
||||||
from modules.interfaces.interfaceRbac import getRecordsetWithRBAC
|
|
||||||
from modules.interfaces.interfaceDbApp import getRootInterface
|
|
||||||
from modules.interfaces.interfaceFeatures import getFeatureInterface
|
|
||||||
|
|
||||||
# Import models
|
|
||||||
from modules.datamodels.datamodelChat import UserInputRequest
|
|
||||||
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata
|
|
||||||
from modules.features.chatbot.interfaceFeatureChatbot import ChatbotConversation
|
|
||||||
|
|
||||||
# Import chatbot feature
|
|
||||||
from modules.features.chatbot import chatProcess
|
|
||||||
from modules.features.chatbot.mainChatbot import getEventManager
|
|
||||||
from modules.shared.i18nRegistry import apiRouteContext
|
|
||||||
routeApiMsg = apiRouteContext("routeFeatureChatbot")
|
|
||||||
|
|
||||||
# Configure logger
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Create router for chatbot endpoints
|
|
||||||
router = APIRouter(
|
|
||||||
prefix="/api/chatbot",
|
|
||||||
tags=["Chatbot"],
|
|
||||||
responses={404: {"description": "Not found"}}
|
|
||||||
)
|
|
||||||
|
|
||||||
def _getServiceChat(context: RequestContext, instanceId: Optional[str] = None, mandateId: Optional[str] = None):
|
|
||||||
"""Get chatbot interface with instance context.
|
|
||||||
Pass mandateId when available (e.g. from _validateInstanceAccess) to ensure cache hit with getChatbotServices.
|
|
||||||
"""
|
|
||||||
effective_mandate = mandateId if mandateId is not None else (str(context.mandateId) if context.mandateId else None)
|
|
||||||
return interfaceDbChat.getInterface(
|
|
||||||
context.user,
|
|
||||||
mandateId=effective_mandate,
|
|
||||||
featureInstanceId=instanceId
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _validateInstanceAccess(instanceId: str, context: RequestContext) -> str:
|
|
||||||
"""
|
|
||||||
Validate that the user has access to the feature instance.
|
|
||||||
Returns the mandateId for the instance.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
instanceId: The FeatureInstance ID from URL
|
|
||||||
context: The request context with user info
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
mandateId of the instance
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
HTTPException 404 if instance not found
|
|
||||||
HTTPException 403 if user doesn't have access
|
|
||||||
"""
|
|
||||||
rootInterface = getRootInterface()
|
|
||||||
featureInterface = getFeatureInterface(rootInterface.db)
|
|
||||||
|
|
||||||
instance = featureInterface.getFeatureInstance(instanceId)
|
|
||||||
if not instance:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404,
|
|
||||||
detail=f"Feature instance '{instanceId}' not found"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Verify it's a chatbot instance
|
|
||||||
if instance.featureCode != "chatbot":
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400,
|
|
||||||
detail=f"Instance '{instanceId}' is not a chatbot instance"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Verify user has access to this instance
|
|
||||||
if not context.isPlatformAdmin:
|
|
||||||
# Check if user has FeatureAccess for this instance
|
|
||||||
featureAccesses = rootInterface.getFeatureAccessesForUser(str(context.user.id))
|
|
||||||
hasAccess = any(
|
|
||||||
str(fa.featureInstanceId) == instanceId and fa.enabled
|
|
||||||
for fa in featureAccesses
|
|
||||||
)
|
|
||||||
if not hasAccess:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=403,
|
|
||||||
detail=f"Access denied to feature instance '{instanceId}'"
|
|
||||||
)
|
|
||||||
|
|
||||||
return str(instance.mandateId)
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# List threads - MUST be first to avoid /{instanceId}/{workflowId} matching
|
|
||||||
# GET /api/chatbot/{instanceId}/threads before DELETE /api/chatbot/{instanceId}/{workflowId}
|
|
||||||
# =============================================================================
|
|
||||||
@router.get("/{instanceId}/threads")
|
|
||||||
@limiter.limit("120/minute")
|
|
||||||
def get_chatbot_threads(
|
|
||||||
request: Request,
|
|
||||||
instanceId: str = Path(..., description="Feature Instance ID"),
|
|
||||||
workflowId: Optional[str] = Query(None, description="Optional workflow ID to get details and chat data for a specific thread"),
|
|
||||||
pagination: Optional[str] = Query(None, description="JSON-encoded PaginationParams object (only used when workflowId is not provided)"),
|
|
||||||
context: RequestContext = Depends(getRequestContext)
|
|
||||||
) -> Union[PaginatedResponse[ChatbotConversation], Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
List all chatbot workflows (threads) for the current user, or get details and chat data for a specific thread.
|
|
||||||
|
|
||||||
- If workflowId is provided: Returns the workflow details and all chat data (messages, logs, stats)
|
|
||||||
- If workflowId is not provided: Returns a paginated list of all workflows
|
|
||||||
"""
|
|
||||||
mandateId = _validateInstanceAccess(instanceId, context)
|
|
||||||
|
|
||||||
try:
|
|
||||||
interfaceDbChat = _getServiceChat(context, instanceId, mandateId=mandateId)
|
|
||||||
|
|
||||||
if workflowId:
|
|
||||||
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
||||||
if not workflow:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404,
|
|
||||||
detail=f"Workflow with ID {workflowId} not found"
|
|
||||||
)
|
|
||||||
|
|
||||||
if hasattr(workflow, 'model_dump'):
|
|
||||||
workflow_dict = workflow.model_dump()
|
|
||||||
elif isinstance(workflow, dict):
|
|
||||||
workflow_dict = dict(workflow)
|
|
||||||
else:
|
|
||||||
workflow_dict = workflow
|
|
||||||
|
|
||||||
if workflow_dict.get("maxSteps") is None:
|
|
||||||
workflow_dict["maxSteps"] = 10
|
|
||||||
|
|
||||||
chatData = interfaceDbChat.getUnifiedChatData(workflowId, None)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"workflow": workflow_dict,
|
|
||||||
"chatData": chatData
|
|
||||||
}
|
|
||||||
|
|
||||||
paginationParams = None
|
|
||||||
if pagination:
|
|
||||||
try:
|
|
||||||
paginationDict = json.loads(pagination)
|
|
||||||
paginationParams = PaginationParams(**paginationDict) if paginationDict else None
|
|
||||||
except (json.JSONDecodeError, ValueError) as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400,
|
|
||||||
detail=f"Invalid pagination parameter: {str(e)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
all_workflows = interfaceDbChat.getWorkflows(pagination=None)
|
|
||||||
chatbot_workflows_data = [
|
|
||||||
wf for wf in all_workflows
|
|
||||||
if (wf.get("workflowMode") or getattr(wf, "workflowMode", None)) == "Chatbot"
|
|
||||||
]
|
|
||||||
|
|
||||||
if paginationParams:
|
|
||||||
if paginationParams.sort:
|
|
||||||
chatbot_workflows_data = interfaceDbChat._applySorting(chatbot_workflows_data, paginationParams.sort)
|
|
||||||
totalItems = len(chatbot_workflows_data)
|
|
||||||
totalPages = math.ceil(totalItems / paginationParams.pageSize) if totalItems > 0 else 0
|
|
||||||
startIdx = (paginationParams.page - 1) * paginationParams.pageSize
|
|
||||||
endIdx = startIdx + paginationParams.pageSize
|
|
||||||
workflows = chatbot_workflows_data[startIdx:endIdx]
|
|
||||||
else:
|
|
||||||
workflows = chatbot_workflows_data
|
|
||||||
totalItems = len(chatbot_workflows_data)
|
|
||||||
totalPages = 1
|
|
||||||
|
|
||||||
normalized_workflows = []
|
|
||||||
for wf in workflows:
|
|
||||||
normalized_wf = dict(wf)
|
|
||||||
if normalized_wf.get("maxSteps") is None:
|
|
||||||
normalized_wf["maxSteps"] = 10
|
|
||||||
normalized_workflows.append(normalized_wf)
|
|
||||||
|
|
||||||
from modules.routes.routeHelpers import enrichRowsWithFkLabels
|
|
||||||
enriched = enrichRowsWithFkLabels(normalized_workflows, ChatbotConversation)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"items": enriched,
|
|
||||||
"pagination": PaginationMetadata(
|
|
||||||
currentPage=paginationParams.page if paginationParams else 1,
|
|
||||||
pageSize=paginationParams.pageSize if paginationParams else len(workflows),
|
|
||||||
totalItems=totalItems,
|
|
||||||
totalPages=totalPages,
|
|
||||||
sort=paginationParams.sort if paginationParams else [],
|
|
||||||
filters=paginationParams.filters if paginationParams else None
|
|
||||||
).model_dump(),
|
|
||||||
}
|
|
||||||
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting chatbot threads: {str(e)}", exc_info=True)
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=f"Error getting chatbot threads: {str(e)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# Chatbot streaming endpoint (SSE)
|
|
||||||
@router.post("/{instanceId}/start/stream")
|
|
||||||
@limiter.limit("120/minute")
|
|
||||||
async def stream_chatbot_start(
|
|
||||||
request: Request,
|
|
||||||
instanceId: str = Path(..., description="Feature Instance ID"),
|
|
||||||
workflowId: Optional[str] = Query(None, description="Optional ID of the workflow to continue (can also be in request body)"),
|
|
||||||
userInput: UserInputRequest = Body(...),
|
|
||||||
context: RequestContext = Depends(getRequestContext)
|
|
||||||
) -> StreamingResponse:
|
|
||||||
"""
|
|
||||||
Starts a new chatbot workflow or continues an existing one with SSE streaming.
|
|
||||||
Streams progress updates in real-time via Server-Sent Events.
|
|
||||||
|
|
||||||
workflowId can be provided either:
|
|
||||||
- As a query parameter: /api/chatbot/{instanceId}/start/stream?workflowId=xxx
|
|
||||||
- In the request body as part of UserInputRequest
|
|
||||||
- Query parameter takes precedence if both are provided
|
|
||||||
"""
|
|
||||||
# Validate instance access
|
|
||||||
mandateId = _validateInstanceAccess(instanceId, context)
|
|
||||||
|
|
||||||
event_manager = getEventManager(context.user, mandateId=mandateId, featureInstanceId=instanceId)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Use workflowId from query parameter if provided, otherwise from request body
|
|
||||||
final_workflow_id = workflowId or userInput.workflowId
|
|
||||||
|
|
||||||
# Start background processing (this will create the workflow and event queue)
|
|
||||||
# Pass featureInstanceId and event_manager to chatProcess
|
|
||||||
workflow = await chatProcess(
|
|
||||||
context.user, mandateId, userInput, final_workflow_id,
|
|
||||||
featureInstanceId=instanceId, event_manager=event_manager
|
|
||||||
)
|
|
||||||
|
|
||||||
# Check if workflow was created successfully
|
|
||||||
if not workflow:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=routeApiMsg("Failed to create or load workflow")
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get event queue for the workflow
|
|
||||||
queue = event_manager.get_queue(workflow.id)
|
|
||||||
if not queue:
|
|
||||||
# Create queue if it doesn't exist
|
|
||||||
queue = event_manager.create_queue(workflow.id)
|
|
||||||
|
|
||||||
async def event_stream():
|
|
||||||
"""Async generator for SSE events - pure event-driven streaming (no polling)."""
|
|
||||||
try:
|
|
||||||
# Yield keepalive immediately so client gets 200 + first byte fast (normal chatbot feel)
|
|
||||||
yield ": keepalive\n\n"
|
|
||||||
|
|
||||||
# Use same mandateId as chatProcess so we hit interface cache (avoid duplicate DB init)
|
|
||||||
interfaceDbChat = _getServiceChat(context, instanceId, mandateId=mandateId)
|
|
||||||
|
|
||||||
# Use workflow from chatProcess (no refetch)
|
|
||||||
current_round = workflow.currentRound if workflow else None
|
|
||||||
is_resuming = final_workflow_id is not None and current_round and current_round > 1
|
|
||||||
|
|
||||||
# Send initial chat data (exact format as chatData endpoint) - only once at start
|
|
||||||
try:
|
|
||||||
chatData = interfaceDbChat.getUnifiedChatData(workflow.id, None)
|
|
||||||
if chatData.get("items"):
|
|
||||||
# Filter items by round number if resuming
|
|
||||||
filtered_items = []
|
|
||||||
for item in chatData["items"]:
|
|
||||||
if is_resuming and current_round:
|
|
||||||
# Get round number from item
|
|
||||||
item_round = None
|
|
||||||
item_data = item.get("item")
|
|
||||||
if item_data:
|
|
||||||
# Handle both dict and object access
|
|
||||||
if isinstance(item_data, dict):
|
|
||||||
item_round = item_data.get("roundNumber")
|
|
||||||
elif hasattr(item_data, "roundNumber"):
|
|
||||||
item_round = item_data.roundNumber
|
|
||||||
|
|
||||||
# When resuming, only include items from current round onwards
|
|
||||||
# Exclude items without roundNumber (they're from old rounds before roundNumber was added)
|
|
||||||
# Exclude items with roundNumber < current_round (from previous rounds)
|
|
||||||
if item_round is None or item_round < current_round:
|
|
||||||
continue # Skip items from previous rounds or without round info
|
|
||||||
|
|
||||||
filtered_items.append(item)
|
|
||||||
|
|
||||||
# Emit filtered items
|
|
||||||
for item in filtered_items:
|
|
||||||
_inner = item.get("item")
|
|
||||||
serializable_item = {
|
|
||||||
"type": item.get("type"),
|
|
||||||
"createdAt": item.get("createdAt"),
|
|
||||||
"item": _inner.model_dump() if _inner is not None and hasattr(_inner, "model_dump") else _inner,
|
|
||||||
}
|
|
||||||
# Emit item directly in exact chatData format: {type, createdAt, item}
|
|
||||||
yield f"data: {json.dumps(serializable_item)}\n\n"
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error fetching initial chat data: {e}")
|
|
||||||
|
|
||||||
# Keepalive interval (30 seconds)
|
|
||||||
keepalive_interval = 30.0
|
|
||||||
last_keepalive = asyncio.get_event_loop().time()
|
|
||||||
|
|
||||||
# Status check interval (check workflow status every 5 seconds - less frequent since we're event-driven)
|
|
||||||
status_check_interval = 5.0
|
|
||||||
last_status_check = asyncio.get_event_loop().time()
|
|
||||||
|
|
||||||
# Stream events until completion or timeout - pure event-driven (no polling)
|
|
||||||
timeout = 300.0 # 5 minutes max
|
|
||||||
start_time = asyncio.get_event_loop().time()
|
|
||||||
|
|
||||||
while True:
|
|
||||||
# Check timeout
|
|
||||||
elapsed = asyncio.get_event_loop().time() - start_time
|
|
||||||
if elapsed > timeout:
|
|
||||||
logger.info(f"Stream timeout for workflow {workflow.id}")
|
|
||||||
break
|
|
||||||
|
|
||||||
# Check for client disconnection
|
|
||||||
if await request.is_disconnected():
|
|
||||||
logger.info(f"Client disconnected for workflow {workflow.id}")
|
|
||||||
break
|
|
||||||
|
|
||||||
current_time = asyncio.get_event_loop().time()
|
|
||||||
|
|
||||||
# Periodically check workflow status (less frequent since we're event-driven)
|
|
||||||
if current_time - last_status_check >= status_check_interval:
|
|
||||||
try:
|
|
||||||
current_workflow = interfaceDbChat.getWorkflow(workflow.id)
|
|
||||||
if current_workflow and current_workflow.status == "stopped":
|
|
||||||
logger.info(f"Workflow {workflow.id} was stopped, closing stream")
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error checking workflow status: {e}")
|
|
||||||
last_status_check = current_time
|
|
||||||
|
|
||||||
# Get event from queue (pure event-driven - no polling database)
|
|
||||||
try:
|
|
||||||
event = await asyncio.wait_for(queue.get(), timeout=1.0)
|
|
||||||
|
|
||||||
# Handle different event types
|
|
||||||
event_type = event.get("type")
|
|
||||||
event_data = event.get("data", {})
|
|
||||||
|
|
||||||
# Emit chatdata events (messages, logs, stats, status, chunk) in exact chatData format
|
|
||||||
if event_type == "chatdata" and event_data:
|
|
||||||
# Handle status events (transient UI feedback)
|
|
||||||
if event_data.get("type") == "status":
|
|
||||||
# Status events have simple structure: {type: "status", label: "..."}
|
|
||||||
status_item = {
|
|
||||||
"type": "status",
|
|
||||||
"label": event_data.get("label", "")
|
|
||||||
}
|
|
||||||
yield f"data: {json.dumps(status_item)}\n\n"
|
|
||||||
elif event_data.get("type") == "chunk":
|
|
||||||
# Token chunks for ChatGPT-like streaming
|
|
||||||
chunk_item = {
|
|
||||||
"type": "chunk",
|
|
||||||
"content": event_data.get("content", "")
|
|
||||||
}
|
|
||||||
yield f"data: {json.dumps(chunk_item)}\n\n"
|
|
||||||
else:
|
|
||||||
# Emit other chatdata items (messages, logs, stats) in exact chatData format
|
|
||||||
chatdata_item = event_data
|
|
||||||
# Ensure item field is serializable (convert Pydantic models to dicts)
|
|
||||||
if isinstance(chatdata_item, dict) and "item" in chatdata_item:
|
|
||||||
item_obj = chatdata_item.get("item")
|
|
||||||
if hasattr(item_obj, "model_dump"):
|
|
||||||
chatdata_item = chatdata_item.copy()
|
|
||||||
chatdata_item["item"] = item_obj.model_dump()
|
|
||||||
yield f"data: {json.dumps(chatdata_item)}\n\n"
|
|
||||||
|
|
||||||
# Handle completion/stopped events to close stream
|
|
||||||
elif event_type == "complete":
|
|
||||||
logger.info(f"Workflow {workflow.id} completed, closing stream")
|
|
||||||
break
|
|
||||||
elif event_type == "stopped":
|
|
||||||
logger.info(f"Workflow {workflow.id} stopped, closing stream")
|
|
||||||
break
|
|
||||||
elif event_type == "error" and event.get("step") == "error":
|
|
||||||
logger.warning(f"Workflow {workflow.id} error, closing stream")
|
|
||||||
break
|
|
||||||
|
|
||||||
last_keepalive = current_time
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
# Send keepalive if needed (no events received, but keep connection alive)
|
|
||||||
current_time = asyncio.get_event_loop().time()
|
|
||||||
if current_time - last_keepalive >= keepalive_interval:
|
|
||||||
yield f": keepalive\n\n"
|
|
||||||
last_keepalive = current_time
|
|
||||||
continue
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in event stream: {e}")
|
|
||||||
break
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in event stream generator: {e}", exc_info=True)
|
|
||||||
finally:
|
|
||||||
# Stream ends - cleanup handled by event manager
|
|
||||||
pass
|
|
||||||
|
|
||||||
return StreamingResponse(
|
|
||||||
event_stream(),
|
|
||||||
media_type="text/event-stream",
|
|
||||||
headers={
|
|
||||||
"Cache-Control": "no-cache",
|
|
||||||
"Connection": "keep-alive",
|
|
||||||
"X-Accel-Buffering": "no" # Disable buffering for nginx
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in stream_chatbot_start: {str(e)}", exc_info=True)
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=str(e)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# Workflow stop endpoint
|
|
||||||
@router.post("/{instanceId}/stop/{workflowId}", response_model=ChatbotConversation)
|
|
||||||
@limiter.limit("120/minute")
|
|
||||||
async def stop_chatbot(
|
|
||||||
request: Request,
|
|
||||||
instanceId: str = Path(..., description="Feature Instance ID"),
|
|
||||||
workflowId: str = Path(..., description="ID of the workflow to stop"),
|
|
||||||
context: RequestContext = Depends(getRequestContext)
|
|
||||||
) -> ChatbotConversation:
|
|
||||||
"""Stops a running chatbot workflow."""
|
|
||||||
# Validate instance access
|
|
||||||
mandateId = _validateInstanceAccess(instanceId, context)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Get chatbot interface with instance context
|
|
||||||
interfaceDbChat = _getServiceChat(context, instanceId, mandateId=mandateId)
|
|
||||||
|
|
||||||
# Get workflow to verify it exists and belongs to this instance
|
|
||||||
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
||||||
if not workflow:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404,
|
|
||||||
detail=f"Workflow {workflowId} not found"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Verify workflow belongs to this instance
|
|
||||||
if workflow.featureInstanceId and workflow.featureInstanceId != instanceId:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=403,
|
|
||||||
detail=f"Workflow {workflowId} does not belong to instance {instanceId}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update workflow status to stopped
|
|
||||||
interfaceDbChat.updateWorkflow(workflowId, {
|
|
||||||
"status": "stopped",
|
|
||||||
"lastActivity": getUtcTimestamp()
|
|
||||||
})
|
|
||||||
|
|
||||||
event_manager = getEventManager(context.user, mandateId=mandateId, featureInstanceId=instanceId)
|
|
||||||
# Store log entry (createLog emits when event_manager is provided)
|
|
||||||
interfaceDbChat.createLog({
|
|
||||||
"id": f"log_{uuid.uuid4()}",
|
|
||||||
"workflowId": workflowId,
|
|
||||||
"message": "Workflow stopped by user",
|
|
||||||
"type": "warning",
|
|
||||||
"status": "stopped",
|
|
||||||
"timestamp": getUtcTimestamp(),
|
|
||||||
"roundNumber": workflow.currentRound if workflow else 1
|
|
||||||
}, event_manager=event_manager)
|
|
||||||
|
|
||||||
# Reload workflow to return updated version
|
|
||||||
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
||||||
|
|
||||||
# Emit stopped event to active streams
|
|
||||||
await event_manager.emit_event(
|
|
||||||
context_id=workflowId,
|
|
||||||
event_type="stopped",
|
|
||||||
data={"workflowId": workflowId},
|
|
||||||
event_category="workflow",
|
|
||||||
message="Workflow stopped by user",
|
|
||||||
step="stopped"
|
|
||||||
)
|
|
||||||
logger.info(f"Stopped workflow {workflowId} and emitted stopped event")
|
|
||||||
|
|
||||||
return workflow
|
|
||||||
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in stop_chatbot: {str(e)}", exc_info=True)
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=str(e)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Delete chatbot workflow endpoint
|
|
||||||
# NOTE: This catch-all route MUST be defined AFTER more specific routes like /threads
|
|
||||||
@router.delete("/{instanceId}/{workflowId}", response_model=Dict[str, Any])
|
|
||||||
@limiter.limit("120/minute")
|
|
||||||
def delete_chatbot(
|
|
||||||
request: Request,
|
|
||||||
instanceId: str = Path(..., description="Feature Instance ID"),
|
|
||||||
workflowId: str = Path(..., description="ID of the workflow to delete"),
|
|
||||||
context: RequestContext = Depends(getRequestContext)
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""Deletes a chatbot workflow and its associated data."""
|
|
||||||
# Validate instance access - if user has access to instance, they can delete their workflows
|
|
||||||
mandateId = _validateInstanceAccess(instanceId, context)
|
|
||||||
|
|
||||||
try:
|
|
||||||
interfaceDbChat = _getServiceChat(context, instanceId, mandateId=mandateId)
|
|
||||||
|
|
||||||
# Get workflow directly (interface already handles mandate filtering)
|
|
||||||
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
||||||
if not workflow:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_404_NOT_FOUND,
|
|
||||||
detail=f"Workflow with ID {workflowId} not found"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Check if workflow is a chatbot workflow
|
|
||||||
if (workflow.workflowMode or getattr(workflow, "workflowMode", None)) != "Chatbot":
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
|
||||||
detail=f"Workflow {workflowId} is not a chatbot workflow"
|
|
||||||
)
|
|
||||||
|
|
||||||
# User has instance access, allow delete (no complex RBAC checks needed)
|
|
||||||
logger.info(f"User {context.user.id} deleting workflow {workflowId} from instance {instanceId}")
|
|
||||||
|
|
||||||
# Delete workflow
|
|
||||||
success = interfaceDbChat.deleteWorkflow(workflowId)
|
|
||||||
|
|
||||||
if not success:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
||||||
detail=routeApiMsg("Failed to delete workflow")
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"id": workflowId,
|
|
||||||
"message": "Chatbot workflow and associated data deleted successfully"
|
|
||||||
}
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in delete_chatbot: {str(e)}", exc_info=True)
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=f"Error deleting chatbot workflow: {str(e)}"
|
|
||||||
)
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -33,7 +33,7 @@ Feature Registration → mainCommcoach.py
|
||||||
| DatabaseConnector | `connectorDbPostgre.py` | New DB `poweron_commcoach` |
|
| DatabaseConnector | `connectorDbPostgre.py` | New DB `poweron_commcoach` |
|
||||||
| VoiceObjects (STT/TTS) | `interfaceVoiceObjects.py` | Voice pipeline |
|
| VoiceObjects (STT/TTS) | `interfaceVoiceObjects.py` | Voice pipeline |
|
||||||
| MessagingInterface | `interfaceMessaging.py` | Email summaries |
|
| MessagingInterface | `interfaceMessaging.py` | Email summaries |
|
||||||
| SSE Pattern | chatbot `routeFeatureChatbot.py` | Chat streaming |
|
| SSE Pattern | workspace `routeFeatureWorkspace.py` | Chat streaming |
|
||||||
| PDF Renderer | `rendererPdf.py` | Dossier export (Iteration 2) |
|
| PDF Renderer | `rendererPdf.py` | Dossier export (Iteration 2) |
|
||||||
| EventManagement | `eventManagement.py` | Scheduled reminders |
|
| EventManagement | `eventManagement.py` | Scheduled reminders |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,15 @@
|
||||||
"""
|
"""
|
||||||
LangGraph-based pipeline for extracting structured content from BZO PDFs.
|
Pipeline for extracting structured content from BZO PDFs.
|
||||||
|
|
||||||
|
The extraction runs as a plain sequential pipeline of step functions; each step
|
||||||
|
takes the shared state dict, mutates/returns it, and the steps are chained
|
||||||
|
directly (no external workflow-orchestration framework).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from typing import TypedDict, List, Dict, Any, Optional
|
from typing import TypedDict, List, Dict, Any, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from langgraph.graph import StateGraph, START, END
|
|
||||||
|
|
||||||
from modules.features.realEstate.bzoPdfExtractor import BZOPdfExtractor, TextBlock
|
from modules.features.realEstate.bzoPdfExtractor import BZOPdfExtractor, TextBlock
|
||||||
from modules.features.realEstate.bzoRuleTaxonomy import RULE_TAXONOMY
|
from modules.features.realEstate.bzoRuleTaxonomy import RULE_TAXONOMY
|
||||||
|
|
@ -14,7 +17,7 @@ from modules.features.realEstate.bzoRuleTaxonomy import RULE_TAXONOMY
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# ===== BZO Params Extraction State (LangGraph with LLM) =====
|
# ===== BZO Params Extraction State (LLM step) =====
|
||||||
|
|
||||||
class BZOParamsExtractionState(TypedDict):
|
class BZOParamsExtractionState(TypedDict):
|
||||||
"""State for BZO params extraction via LLM."""
|
"""State for BZO params extraction via LLM."""
|
||||||
|
|
@ -947,7 +950,7 @@ def extract_wohnzone_params(
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# ===== LangGraph: LLM-based BZO Params Extraction =====
|
# ===== LLM-based BZO Params Extraction =====
|
||||||
|
|
||||||
def _build_bauzone_context_for_llm(state: BZOParamsExtractionState) -> str:
|
def _build_bauzone_context_for_llm(state: BZOParamsExtractionState) -> str:
|
||||||
"""Build context string for LLM from extracted BZO content."""
|
"""Build context string for LLM from extracted BZO content."""
|
||||||
|
|
@ -1089,7 +1092,7 @@ Antwort NUR mit den relevanten Artikelnummern, eine pro Zeile (z.B. "Art. 15", "
|
||||||
|
|
||||||
|
|
||||||
async def llm_extract_bauzone_params_node(state: BZOParamsExtractionState) -> BZOParamsExtractionState:
|
async def llm_extract_bauzone_params_node(state: BZOParamsExtractionState) -> BZOParamsExtractionState:
|
||||||
"""LangGraph node: use LLM to extract BZO parameters for Bauzone as bullet list."""
|
"""Use LLM to extract BZO parameters for Bauzone as bullet list."""
|
||||||
bauzone = state.get("bauzone", "")
|
bauzone = state.get("bauzone", "")
|
||||||
gemeinde = state.get("gemeinde", "")
|
gemeinde = state.get("gemeinde", "")
|
||||||
ai_service = state.get("ai_service")
|
ai_service = state.get("ai_service")
|
||||||
|
|
@ -1192,15 +1195,6 @@ Antwort NUR mit der Bullet-Liste, sonst nichts:"""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def create_bzo_params_extraction_graph():
|
|
||||||
"""Create LangGraph for LLM-based BZO params extraction."""
|
|
||||||
workflow = StateGraph(BZOParamsExtractionState)
|
|
||||||
workflow.add_node("llm_extract", llm_extract_bauzone_params_node)
|
|
||||||
workflow.set_entry_point("llm_extract")
|
|
||||||
workflow.add_edge("llm_extract", END)
|
|
||||||
return workflow.compile()
|
|
||||||
|
|
||||||
|
|
||||||
def _filter_articles_by_bauzone(articles: List[Dict[str, Any]], bauzone: str) -> List[Dict[str, Any]]:
|
def _filter_articles_by_bauzone(articles: List[Dict[str, Any]], bauzone: str) -> List[Dict[str, Any]]:
|
||||||
"""Filter articles that mention the Bauzone."""
|
"""Filter articles that mention the Bauzone."""
|
||||||
bauzone_upper = (bauzone or "").upper()
|
bauzone_upper = (bauzone or "").upper()
|
||||||
|
|
@ -1247,7 +1241,7 @@ async def run_bzo_params_extraction(
|
||||||
total_area_m2: Optional[float] = None,
|
total_area_m2: Optional[float] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Run LangGraph workflow to extract BZO parameters for a Bauzone via LLM.
|
Extract BZO parameters for a Bauzone via LLM.
|
||||||
Returns fakten (item/value/source), bauzone_params_list (bullet strings), zusatzinformationen.
|
Returns fakten (item/value/source), bauzone_params_list (bullet strings), zusatzinformationen.
|
||||||
"""
|
"""
|
||||||
rules = relevant_rules if relevant_rules is not None else _bzo_filter_rules_by_bauzone(
|
rules = relevant_rules if relevant_rules is not None else _bzo_filter_rules_by_bauzone(
|
||||||
|
|
@ -1276,8 +1270,7 @@ async def run_bzo_params_extraction(
|
||||||
"errors": [],
|
"errors": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
graph = create_bzo_params_extraction_graph()
|
final_state = await llm_extract_bauzone_params_node(state)
|
||||||
final_state = await graph.ainvoke(state)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"bauzone": bauzone,
|
"bauzone": bauzone,
|
||||||
|
|
@ -1288,19 +1281,14 @@ async def run_bzo_params_extraction(
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# ===== Graph Construction =====
|
# ===== Pipeline Execution =====
|
||||||
|
|
||||||
def create_bzo_extraction_graph():
|
def _run_bzo_extraction_pipeline(state: BZOExtractionState) -> BZOExtractionState:
|
||||||
"""Create and compile the BZO extraction graph (simplified 4-node pipeline)."""
|
"""Run the BZO extraction steps sequentially on the shared state."""
|
||||||
workflow = StateGraph(BZOExtractionState)
|
state = classify_and_assemble(state)
|
||||||
workflow.add_node("classify_and_assemble", classify_and_assemble)
|
state = extract_zones_and_tables(state)
|
||||||
workflow.add_node("extract_zones_and_tables", extract_zones_and_tables)
|
state = extract_rules(state)
|
||||||
workflow.add_node("extract_rules", extract_rules)
|
return state
|
||||||
workflow.set_entry_point("classify_and_assemble")
|
|
||||||
workflow.add_edge("classify_and_assemble", "extract_zones_and_tables")
|
|
||||||
workflow.add_edge("extract_zones_and_tables", "extract_rules")
|
|
||||||
workflow.add_edge("extract_rules", END)
|
|
||||||
return workflow.compile()
|
|
||||||
|
|
||||||
|
|
||||||
def run_extraction(pdf_bytes: bytes, pdf_id: str = None, dokument_id: str = None) -> Dict[str, Any]:
|
def run_extraction(pdf_bytes: bytes, pdf_id: str = None, dokument_id: str = None) -> Dict[str, Any]:
|
||||||
|
|
@ -1357,9 +1345,8 @@ def run_extraction(pdf_bytes: bytes, pdf_id: str = None, dokument_id: str = None
|
||||||
for tb in text_blocks_objects
|
for tb in text_blocks_objects
|
||||||
]
|
]
|
||||||
|
|
||||||
# Create and run graph
|
# Run the extraction pipeline
|
||||||
graph = create_bzo_extraction_graph()
|
final_state = _run_bzo_extraction_pipeline(state)
|
||||||
final_state = graph.invoke(state)
|
|
||||||
|
|
||||||
# Sort and structure results
|
# Sort and structure results
|
||||||
articles = sorted(
|
articles = sorted(
|
||||||
|
|
|
||||||
|
|
@ -278,7 +278,7 @@ from .interfaceFeatureRealEstate import getInterface as getRealEstateInterface
|
||||||
from modules.interfaces.interfaceDbManagement import getInterface as getComponentInterface
|
from modules.interfaces.interfaceDbManagement import getInterface as getComponentInterface
|
||||||
from modules.connectors.connectorSwissTopoMapServer import SwissTopoMapServerConnector
|
from modules.connectors.connectorSwissTopoMapServer import SwissTopoMapServerConnector
|
||||||
from modules.features.realEstate.bzoDocumentRetriever import BZODocumentRetriever
|
from modules.features.realEstate.bzoDocumentRetriever import BZODocumentRetriever
|
||||||
from modules.features.realEstate.bzoExtractionLangGraph import run_extraction, run_bzo_params_extraction
|
from modules.features.realEstate.bzoExtraction import run_extraction, run_bzo_params_extraction
|
||||||
from modules.features.realEstate.parcelSelectionService import compute_selection_summary
|
from modules.features.realEstate.parcelSelectionService import compute_selection_summary
|
||||||
from modules.features.realEstate.realEstateGemeindeService import (
|
from modules.features.realEstate.realEstateGemeindeService import (
|
||||||
ensure_single_gemeinde,
|
ensure_single_gemeinde,
|
||||||
|
|
@ -2331,7 +2331,7 @@ async def extract_bzo_information(
|
||||||
Extract BZO information from PDF documents for a specific Bauzone in a Gemeinde.
|
Extract BZO information from PDF documents for a specific Bauzone in a Gemeinde.
|
||||||
|
|
||||||
Retrieves BZO documents for the specified Gemeinde, extracts content using
|
Retrieves BZO documents for the specified Gemeinde, extracts content using
|
||||||
langgraph workflow, filters by Bauzone, and uses AI to find relevant information.
|
the BZO extraction pipeline, filters by Bauzone, and uses AI to find relevant information.
|
||||||
When total_area_m2 or parcels are provided, runs Machbarkeitsstudie for structured output.
|
When total_area_m2 or parcels are provided, runs Machbarkeitsstudie for structured output.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
@ -2489,7 +2489,7 @@ async def extract_bzo_information(
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Run extraction using langgraph workflow
|
# Run extraction using the BZO extraction pipeline
|
||||||
extraction_result = run_extraction(
|
extraction_result = run_extraction(
|
||||||
pdf_bytes=pdf_bytes,
|
pdf_bytes=pdf_bytes,
|
||||||
pdf_id=dokument.dokumentReferenz or f"dok_{dokument.id}",
|
pdf_id=dokument.dokumentReferenz or f"dok_{dokument.id}",
|
||||||
|
|
@ -2543,7 +2543,7 @@ async def extract_bzo_information(
|
||||||
selection_summary = compute_selection_summary(parcels)
|
selection_summary = compute_selection_summary(parcels)
|
||||||
_total_area_m2 = selection_summary.get("total_area_m2") or 0.0
|
_total_area_m2 = selection_summary.get("total_area_m2") or 0.0
|
||||||
|
|
||||||
# Extract BZO parameters for Wohnzone via LangGraph + LLM (bullet list with sources)
|
# Extract BZO parameters for Wohnzone via LLM (bullet list with sources)
|
||||||
bzo_params_result = None
|
bzo_params_result = None
|
||||||
try:
|
try:
|
||||||
services = getServices(
|
services = getServices(
|
||||||
|
|
|
||||||
|
|
@ -706,7 +706,7 @@ async def get_parcel_documents(
|
||||||
"""
|
"""
|
||||||
Ensure BZO document exists for Gemeinde, return documents for parcel info display.
|
Ensure BZO document exists for Gemeinde, return documents for parcel info display.
|
||||||
Creates Gemeinde (Swiss Topo) and BZO (Tavily) if not in DB.
|
Creates Gemeinde (Swiss Topo) and BZO (Tavily) if not in DB.
|
||||||
Returns documents for preview - does NOT run LangGraph.
|
Returns documents for preview - does NOT run the BZO extraction pipeline.
|
||||||
"""
|
"""
|
||||||
mandateId = _validateInstanceAccess(instanceId, context)
|
mandateId = _validateInstanceAccess(instanceId, context)
|
||||||
interface = getRealEstateInterface(
|
interface = getRealEstateInterface(
|
||||||
|
|
@ -787,7 +787,7 @@ async def get_instance_bzo_information(
|
||||||
parcel_ids: Optional[str] = Query(None, description="Comma-separated parcel IDs; total area computed from parcels"),
|
parcel_ids: Optional[str] = Query(None, description="Comma-separated parcel IDs; total area computed from parcels"),
|
||||||
context: RequestContext = Depends(getRequestContext),
|
context: RequestContext = Depends(getRequestContext),
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Extract BZO information for a Bauzone in a Gemeinde. Runs LangGraph workflow. With total_area_m2 or parcel_ids, includes Machbarkeitsstudie."""
|
"""Extract BZO information for a Bauzone in a Gemeinde. Runs the BZO extraction pipeline. With total_area_m2 or parcel_ids, includes Machbarkeitsstudie."""
|
||||||
mandateId = _validateInstanceAccess(instanceId, context)
|
mandateId = _validateInstanceAccess(instanceId, context)
|
||||||
parcels = None
|
parcels = None
|
||||||
if parcel_ids:
|
if parcel_ids:
|
||||||
|
|
|
||||||
|
|
@ -203,8 +203,9 @@ def buildDataSourceContext(chatService, dataSourceIds: List[str]) -> str:
|
||||||
"- Use ONLY browseDataSource, searchDataSource, and downloadFromDataSource to access these sources.",
|
"- Use ONLY browseDataSource, searchDataSource, and downloadFromDataSource to access these sources.",
|
||||||
"- Use the dataSourceId (UUID) exactly as shown below.",
|
"- Use the dataSourceId (UUID) exactly as shown below.",
|
||||||
"- Do NOT use listFiles, externalBrowse, or externalSearch for attached data sources -- those tools are for other purposes.",
|
"- Do NOT use listFiles, externalBrowse, or externalSearch for attached data sources -- those tools are for other purposes.",
|
||||||
"- browseDataSource returns BOTH files and folders at the given path.",
|
"- SEARCH FIRST: for any targeted question (specific topic, sender, keyword, date range), use searchDataSource. It queries the source server-side and is far cheaper than listing everything. These sources can be huge (gigabytes); NEVER browse and download an entire source to find a few items.",
|
||||||
"- When downloading files, ALWAYS provide the human-readable fileName (with extension) from the browse results.",
|
"- browseDataSource is for directory listing or getting the newest items of a folder. Download ONLY the items that match the user's request.",
|
||||||
|
"- When downloading files, ALWAYS provide the human-readable fileName (with extension) from the browse/search results.",
|
||||||
"",
|
"",
|
||||||
]
|
]
|
||||||
found = False
|
found = False
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ class FeatureInterface:
|
||||||
Get a feature by code.
|
Get a feature by code.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
featureCode: Feature code (e.g., "trustee", "chatbot")
|
featureCode: Feature code (e.g., "trustee", "commcoach")
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Feature object or None
|
Feature object or None
|
||||||
|
|
|
||||||
|
|
@ -196,11 +196,6 @@ TABLE_NAMESPACE = {
|
||||||
"ChatLog": "chat",
|
"ChatLog": "chat",
|
||||||
"ChatDocument": "chat",
|
"ChatDocument": "chat",
|
||||||
"Prompt": "chat",
|
"Prompt": "chat",
|
||||||
# Chatbot (poweron_chatbot) - per feature-instance isolation
|
|
||||||
"ChatbotConversation": "chatbot",
|
|
||||||
"ChatbotMessage": "chatbot",
|
|
||||||
"ChatbotDocument": "chatbot",
|
|
||||||
"ChatbotLog": "chatbot",
|
|
||||||
# Files - benutzer-eigen
|
# Files - benutzer-eigen
|
||||||
"FileItem": "files",
|
"FileItem": "files",
|
||||||
"FileData": "files",
|
"FileData": "files",
|
||||||
|
|
@ -228,7 +223,7 @@ TABLE_NAMESPACE = {
|
||||||
|
|
||||||
# Namespaces ohne Mandantenkontext - GROUP wird auf MY gemappt
|
# Namespaces ohne Mandantenkontext - GROUP wird auf MY gemappt
|
||||||
# NOTE: "files" is NOT in this set – files use scope-based visibility for GROUP
|
# NOTE: "files" is NOT in this set – files use scope-based visibility for GROUP
|
||||||
USER_OWNED_NAMESPACES = {"chat", "chatbot", "automation", "knowledge", "datasource"}
|
USER_OWNED_NAMESPACES = {"chat", "automation", "knowledge", "datasource"}
|
||||||
|
|
||||||
|
|
||||||
def buildDataObjectKey(tableName: str, featureCode: Optional[str] = None) -> str:
|
def buildDataObjectKey(tableName: str, featureCode: Optional[str] = None) -> str:
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ router = APIRouter(
|
||||||
|
|
||||||
class FeatureInstanceCreate(BaseModel):
|
class FeatureInstanceCreate(BaseModel):
|
||||||
"""Request model for creating a feature instance"""
|
"""Request model for creating a feature instance"""
|
||||||
featureCode: str = Field(..., description="Feature code (e.g., 'trustee', 'chatbot')")
|
featureCode: str = Field(..., description="Feature code (e.g., 'trustee', 'commcoach')")
|
||||||
label: str = Field(..., description="Instance label (e.g., 'Buchhaltung 2025')")
|
label: str = Field(..., description="Instance label (e.g., 'Buchhaltung 2025')")
|
||||||
enabled: bool = Field(True, description="Whether this feature instance is enabled")
|
enabled: bool = Field(True, description="Whether this feature instance is enabled")
|
||||||
copyTemplateRoles: bool = Field(True, description="Whether to copy template roles on creation")
|
copyTemplateRoles: bool = Field(True, description="Whether to copy template roles on creation")
|
||||||
|
|
@ -778,11 +778,6 @@ def updateFeatureInstance(
|
||||||
detail=routeApiMsg("Failed to update feature instance")
|
detail=routeApiMsg("Failed to update feature instance")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Clear chatbot config cache when config was updated for chatbot instances
|
|
||||||
if "config" in updateData and instance.featureCode == "chatbot":
|
|
||||||
from modules.features.chatbot.config import clear_config_cache
|
|
||||||
clear_config_cache(instanceId)
|
|
||||||
|
|
||||||
logger.info(f"User {context.user.id} updated feature instance {instanceId}: {updateData}")
|
logger.info(f"User {context.user.id} updated feature instance {instanceId}: {updateData}")
|
||||||
|
|
||||||
return updated.model_dump()
|
return updated.model_dump()
|
||||||
|
|
@ -1637,7 +1632,7 @@ def get_feature(
|
||||||
/instances, /my, /templates, etc.
|
/instances, /my, /templates, etc.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
featureCode: Feature code (e.g., 'trustee', 'chatbot')
|
featureCode: Feature code (e.g., 'trustee', 'commcoach')
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Features come from the RBAC Catalog (code-defined, not DB-stored)
|
# Features come from the RBAC Catalog (code-defined, not DB-stored)
|
||||||
|
|
|
||||||
|
|
@ -1,217 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""STT Benchmark route — compare Speech-to-Text v1 (latest_long) vs v2 (Chirp 2).
|
|
||||||
|
|
||||||
Sysadmin-only page for evaluating STT model quality and latency.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
import logging
|
|
||||||
from typing import Any, Dict
|
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException, Depends, Request, UploadFile, File, Form
|
|
||||||
from modules.auth import limiter, getCurrentUser
|
|
||||||
from modules.datamodels.datamodelUam import User
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
router = APIRouter(
|
|
||||||
prefix="/api/admin/stt-benchmark",
|
|
||||||
tags=["Admin STT Benchmark"],
|
|
||||||
responses={401: {"description": "Unauthorized"}, 403: {"description": "Forbidden"}},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _requireSysAdmin(currentUser: User = Depends(getCurrentUser)) -> User:
|
|
||||||
if not getattr(currentUser, "isSysAdmin", False) and not getattr(currentUser, "isPlatformAdmin", False):
|
|
||||||
raise HTTPException(status_code=403, detail="SysAdmin required")
|
|
||||||
return currentUser
|
|
||||||
|
|
||||||
|
|
||||||
def _getCredentials():
|
|
||||||
apiKey = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
|
|
||||||
if not apiKey or apiKey.startswith("YOUR_"):
|
|
||||||
raise HTTPException(status_code=500, detail="Google Speech API key not configured")
|
|
||||||
from google.oauth2 import service_account
|
|
||||||
return service_account.Credentials.from_service_account_info(json.loads(apiKey))
|
|
||||||
|
|
||||||
|
|
||||||
def _runV1(audioBytes: bytes, language: str, model: str) -> Dict[str, Any]:
|
|
||||||
"""Run Speech-to-Text v1 recognition."""
|
|
||||||
from google.cloud import speech
|
|
||||||
credentials = _getCredentials()
|
|
||||||
client = speech.SpeechClient(credentials=credentials)
|
|
||||||
|
|
||||||
config = speech.RecognitionConfig(
|
|
||||||
encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
|
|
||||||
language_code=language,
|
|
||||||
model=model,
|
|
||||||
enable_automatic_punctuation=True,
|
|
||||||
enable_word_time_offsets=True,
|
|
||||||
enable_word_confidence=True,
|
|
||||||
max_alternatives=3,
|
|
||||||
use_enhanced=True,
|
|
||||||
)
|
|
||||||
audio = speech.RecognitionAudio(content=audioBytes)
|
|
||||||
|
|
||||||
t0 = time.perf_counter()
|
|
||||||
response = client.recognize(config=config, audio=audio)
|
|
||||||
elapsed = time.perf_counter() - t0
|
|
||||||
|
|
||||||
results = []
|
|
||||||
for r in response.results:
|
|
||||||
for alt in r.alternatives:
|
|
||||||
results.append({
|
|
||||||
"transcript": alt.transcript,
|
|
||||||
"confidence": round(alt.confidence, 4),
|
|
||||||
"words": len(alt.words) if alt.words else 0,
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
"api": "v1",
|
|
||||||
"model": model,
|
|
||||||
"latencyMs": round(elapsed * 1000, 1),
|
|
||||||
"results": results,
|
|
||||||
"resultCount": len(response.results),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _runV2(audioBytes: bytes, language: str, model: str, location: str) -> Dict[str, Any]:
|
|
||||||
"""Run Speech-to-Text v2 recognition (Chirp 2)."""
|
|
||||||
from google.cloud.speech_v2 import SpeechClient
|
|
||||||
from google.cloud.speech_v2.types import cloud_speech
|
|
||||||
|
|
||||||
credentials = _getCredentials()
|
|
||||||
credInfo = json.loads(APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET"))
|
|
||||||
projectId = credInfo.get("project_id", "")
|
|
||||||
|
|
||||||
client = SpeechClient(
|
|
||||||
credentials=credentials,
|
|
||||||
client_options={"api_endpoint": f"{location}-speech.googleapis.com"},
|
|
||||||
)
|
|
||||||
|
|
||||||
config = cloud_speech.RecognitionConfig(
|
|
||||||
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
|
|
||||||
language_codes=[language],
|
|
||||||
model=model,
|
|
||||||
features=cloud_speech.RecognitionFeatures(
|
|
||||||
enable_automatic_punctuation=True,
|
|
||||||
enable_word_time_offsets=True,
|
|
||||||
enable_word_confidence=True,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
recognizer = f"projects/{projectId}/locations/{location}/recognizers/_"
|
|
||||||
|
|
||||||
request = cloud_speech.RecognizeRequest(
|
|
||||||
recognizer=recognizer,
|
|
||||||
config=config,
|
|
||||||
content=audioBytes,
|
|
||||||
)
|
|
||||||
|
|
||||||
t0 = time.perf_counter()
|
|
||||||
response = client.recognize(request=request)
|
|
||||||
elapsed = time.perf_counter() - t0
|
|
||||||
|
|
||||||
results = []
|
|
||||||
for r in response.results:
|
|
||||||
for alt in r.alternatives:
|
|
||||||
results.append({
|
|
||||||
"transcript": alt.transcript,
|
|
||||||
"confidence": round(alt.confidence, 4),
|
|
||||||
"words": len(alt.words) if alt.words else 0,
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
"api": "v2",
|
|
||||||
"model": model,
|
|
||||||
"location": location,
|
|
||||||
"latencyMs": round(elapsed * 1000, 1),
|
|
||||||
"results": results,
|
|
||||||
"resultCount": len(getattr(response, "results", [])),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/run")
|
|
||||||
@limiter.limit("10/minute")
|
|
||||||
async def runBenchmark(
|
|
||||||
request: Request,
|
|
||||||
file: UploadFile = File(...),
|
|
||||||
language: str = Form(default="de-DE"),
|
|
||||||
v1Model: str = Form(default="latest_long"),
|
|
||||||
v2Model: str = Form(default="chirp_2"),
|
|
||||||
v2Location: str = Form(default="europe-west4"),
|
|
||||||
currentUser: User = Depends(_requireSysAdmin),
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""Upload audio and compare v1 vs v2 STT results."""
|
|
||||||
audioBytes = await file.read()
|
|
||||||
if len(audioBytes) > 10 * 1024 * 1024:
|
|
||||||
raise HTTPException(status_code=400, detail="Audio file too large (max 10 MB)")
|
|
||||||
if len(audioBytes) < 100:
|
|
||||||
raise HTTPException(status_code=400, detail="Audio file too small")
|
|
||||||
|
|
||||||
logger.info("STT benchmark: %s, %d bytes, language=%s, v1=%s, v2=%s@%s",
|
|
||||||
file.filename, len(audioBytes), language, v1Model, v2Model, v2Location)
|
|
||||||
|
|
||||||
v1Result = None
|
|
||||||
v1Error = None
|
|
||||||
try:
|
|
||||||
v1Result = _runV1(audioBytes, language, v1Model)
|
|
||||||
except Exception as e:
|
|
||||||
v1Error = str(e)
|
|
||||||
logger.warning("STT v1 benchmark failed: %s", e)
|
|
||||||
|
|
||||||
v2Result = None
|
|
||||||
v2Error = None
|
|
||||||
try:
|
|
||||||
v2Result = _runV2(audioBytes, language, v2Model, v2Location)
|
|
||||||
except Exception as e:
|
|
||||||
v2Error = str(e)
|
|
||||||
logger.warning("STT v2 benchmark failed: %s", e)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"filename": file.filename,
|
|
||||||
"fileSizeBytes": len(audioBytes),
|
|
||||||
"language": language,
|
|
||||||
"v1": v1Result or {"error": v1Error},
|
|
||||||
"v2": v2Result or {"error": v2Error},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@router.get("/models")
|
|
||||||
@limiter.limit("30/minute")
|
|
||||||
async def getAvailableModels(
|
|
||||||
request: Request,
|
|
||||||
currentUser: User = Depends(_requireSysAdmin),
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""Return available STT models for the benchmark UI."""
|
|
||||||
return {
|
|
||||||
"v1Models": [
|
|
||||||
{"value": "latest_long", "label": "latest_long (default)"},
|
|
||||||
{"value": "latest_short", "label": "latest_short"},
|
|
||||||
{"value": "phone_call", "label": "phone_call"},
|
|
||||||
{"value": "video", "label": "video"},
|
|
||||||
{"value": "command_and_search", "label": "command_and_search"},
|
|
||||||
],
|
|
||||||
"v2Models": [
|
|
||||||
{"value": "chirp_2", "label": "Chirp 2 (recommended)"},
|
|
||||||
{"value": "chirp", "label": "Chirp (original)"},
|
|
||||||
{"value": "long", "label": "long"},
|
|
||||||
{"value": "short", "label": "short"},
|
|
||||||
],
|
|
||||||
"locations": [
|
|
||||||
{"value": "europe-west4", "label": "Europe West (NL)"},
|
|
||||||
{"value": "us-central1", "label": "US Central"},
|
|
||||||
{"value": "asia-southeast1", "label": "Asia Southeast"},
|
|
||||||
],
|
|
||||||
"languages": [
|
|
||||||
{"value": "de-DE", "label": "Deutsch (DE)"},
|
|
||||||
{"value": "de-CH", "label": "Deutsch (CH)"},
|
|
||||||
{"value": "en-US", "label": "English (US)"},
|
|
||||||
{"value": "en-GB", "label": "English (GB)"},
|
|
||||||
{"value": "fr-FR", "label": "Francais (FR)"},
|
|
||||||
{"value": "it-IT", "label": "Italiano (IT)"},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
"""DataSource auxiliary endpoints: settings (ragLimits) and cost estimate.
|
"""DataSource auxiliary endpoints: settings (ragLimits) and cost estimate.
|
||||||
|
|
||||||
Flag toggles (neutralize / scope / ragIndexEnabled) have moved to the
|
Flag toggles (neutralize / ragIndexEnabled) have moved to the
|
||||||
generic UDB router (`POST /api/udb/node/{key}/flag/{flag}`); see
|
generic UDB router (`POST /api/udb/node/{key}/flag/{flag}`); see
|
||||||
`modules/routes/routeUdb.py` and the wiki UDB reference page.
|
`modules/routes/routeUdb.py` and the wiki UDB reference page.
|
||||||
"""
|
"""
|
||||||
|
|
@ -127,9 +127,8 @@ def _updateDataSourceSettings(
|
||||||
Currently supports `ragLimits` only. Unknown top-level keys in the body are
|
Currently supports `ragLimits` only. Unknown top-level keys in the body are
|
||||||
rejected to avoid silently storing garbage that no consumer reads.
|
rejected to avoid silently storing garbage that no consumer reads.
|
||||||
|
|
||||||
DataSource: owner-only (or sysadmin). For mandate/feature scopes the
|
DataSource: owner-only (or sysadmin). FeatureDataSource requires
|
||||||
mandateAdmin also passes. FeatureDataSource has no userId/scope; for
|
a feature-admin role on the FDS's featureInstanceId.
|
||||||
those we require a feature-admin role on the FDS's featureInstanceId.
|
|
||||||
"""
|
"""
|
||||||
if not isinstance(settings, dict):
|
if not isinstance(settings, dict):
|
||||||
raise HTTPException(status_code=400, detail="settings must be an object")
|
raise HTTPException(status_code=400, detail="settings must be an object")
|
||||||
|
|
@ -148,13 +147,7 @@ def _updateDataSourceSettings(
|
||||||
if model is DataSource:
|
if model is DataSource:
|
||||||
ownerId = str(rec.get("userId") or "")
|
ownerId = str(rec.get("userId") or "")
|
||||||
if ownerId and ownerId != currentUserId and not context.isSysAdmin:
|
if ownerId and ownerId != currentUserId and not context.isSysAdmin:
|
||||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import getEffectiveFlag
|
raise HTTPException(status_code=403, detail="Not allowed to modify this DataSource's settings")
|
||||||
connectionId = rec.get("connectionId", "")
|
|
||||||
allDs = rootIf.db.getRecordset(DataSource, recordFilter={"connectionId": connectionId})
|
|
||||||
scope = str(getEffectiveFlag(rec, "scope", allDs, mode="walk"))
|
|
||||||
isMandateAdmin = getattr(context, "isMandateAdmin", False)
|
|
||||||
if scope == "personal" or not isMandateAdmin:
|
|
||||||
raise HTTPException(status_code=403, detail="Not allowed to modify this DataSource's settings")
|
|
||||||
else:
|
else:
|
||||||
from modules.serviceCenter.services.serviceKnowledge.udbNodes import _isFeatureAdmin
|
from modules.serviceCenter.services.serviceKnowledge.udbNodes import _isFeatureAdmin
|
||||||
featureInstanceId = str(rec.get("featureInstanceId") or "")
|
featureInstanceId = str(rec.get("featureInstanceId") or "")
|
||||||
|
|
|
||||||
|
|
@ -2234,13 +2234,13 @@ async def get_bzo_information(
|
||||||
"""
|
"""
|
||||||
Extract BZO information from PDF documents for a specific Bauzone in a Gemeinde.
|
Extract BZO information from PDF documents for a specific Bauzone in a Gemeinde.
|
||||||
|
|
||||||
Uses a langgraph workflow to extract content from BZO PDF documents for the
|
Uses the BZO extraction pipeline to extract content from BZO PDF documents for the
|
||||||
specified Gemeinde, then uses AI to search for relevant information specific
|
specified Gemeinde, then uses AI to search for relevant information specific
|
||||||
to the specified Bauzone.
|
to the specified Bauzone.
|
||||||
|
|
||||||
The workflow:
|
The workflow:
|
||||||
1. Finds BZO documents for the Gemeinde (by name or ID)
|
1. Finds BZO documents for the Gemeinde (by name or ID)
|
||||||
2. Extracts content from PDFs using langgraph workflow
|
2. Extracts content from PDFs using the BZO extraction pipeline
|
||||||
3. Filters rules, zones, and articles by Bauzone
|
3. Filters rules, zones, and articles by Bauzone
|
||||||
4. Uses AI to generate a summary and find relevant information
|
4. Uses AI to generate a summary and find relevant information
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -114,9 +114,6 @@ def _getFeatureUiObjects(featureCode: str) -> List[Dict[str, Any]]:
|
||||||
elif featureCode == "neutralization":
|
elif featureCode == "neutralization":
|
||||||
from modules.features.neutralization.mainNeutralization import UI_OBJECTS
|
from modules.features.neutralization.mainNeutralization import UI_OBJECTS
|
||||||
return UI_OBJECTS
|
return UI_OBJECTS
|
||||||
elif featureCode == "chatbot":
|
|
||||||
from modules.features.chatbot.mainChatbot import UI_OBJECTS
|
|
||||||
return UI_OBJECTS
|
|
||||||
elif featureCode == "commcoach":
|
elif featureCode == "commcoach":
|
||||||
from modules.features.commcoach.mainCommcoach import UI_OBJECTS
|
from modules.features.commcoach.mainCommcoach import UI_OBJECTS
|
||||||
return UI_OBJECTS
|
return UI_OBJECTS
|
||||||
|
|
|
||||||
|
|
@ -50,9 +50,6 @@ router = APIRouter(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
_VALID_SCOPES = {"personal", "featureInstance", "mandate", "global"}
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# POST /api/udb/tree/children
|
# POST /api/udb/tree/children
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -98,7 +95,6 @@ class _UdbFlagBody(BaseModel):
|
||||||
the flag (see `_extractFlagValue` for the mapping). `value` is typed
|
the flag (see `_extractFlagValue` for the mapping). `value` is typed
|
||||||
as Any because the legal type depends on the flag:
|
as Any because the legal type depends on the flag:
|
||||||
- neutralize/ragIndexEnabled : bool | null (null = inherit)
|
- neutralize/ragIndexEnabled : bool | null (null = inherit)
|
||||||
- scope : str | null (one of _VALID_SCOPES, null = inherit)
|
|
||||||
"""
|
"""
|
||||||
value: Any = Field(default=None, description="New flag value or null to reset to inherit.")
|
value: Any = Field(default=None, description="New flag value or null to reset to inherit.")
|
||||||
|
|
||||||
|
|
@ -108,7 +104,7 @@ class _UdbFlagBody(BaseModel):
|
||||||
async def _udbNodeFlag(
|
async def _udbNodeFlag(
|
||||||
request: Request,
|
request: Request,
|
||||||
nodeKey: str = Path(..., description="Tree key of the node to modify"),
|
nodeKey: str = Path(..., description="Tree key of the node to modify"),
|
||||||
flag: str = Path(..., description="One of: neutralize | scope | ragIndexEnabled"),
|
flag: str = Path(..., description="One of: neutralize | ragIndexEnabled"),
|
||||||
body: _UdbFlagBody = Body(default_factory=_UdbFlagBody),
|
body: _UdbFlagBody = Body(default_factory=_UdbFlagBody),
|
||||||
context: RequestContext = Depends(getRequestContext),
|
context: RequestContext = Depends(getRequestContext),
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
|
|
@ -122,7 +118,7 @@ async def _udbNodeFlag(
|
||||||
RBAC: `node.canEdit(context, rootIf)` decides; the route never
|
RBAC: `node.canEdit(context, rootIf)` decides; the route never
|
||||||
re-implements ownership rules.
|
re-implements ownership rules.
|
||||||
"""
|
"""
|
||||||
if flag not in ("neutralize", "scope", "ragIndexEnabled"):
|
if flag not in ("neutralize", "ragIndexEnabled"):
|
||||||
raise HTTPException(status_code=400, detail=f"Unknown flag: {flag}")
|
raise HTTPException(status_code=400, detail=f"Unknown flag: {flag}")
|
||||||
|
|
||||||
value = _validateFlagValue(flag, body.value, context)
|
value = _validateFlagValue(flag, body.value, context)
|
||||||
|
|
@ -188,15 +184,6 @@ def _validateFlagValue(flag: str, value: Any, context: RequestContext) -> Any:
|
||||||
"""
|
"""
|
||||||
if value is None:
|
if value is None:
|
||||||
return None
|
return None
|
||||||
if flag == "scope":
|
|
||||||
if not isinstance(value, str) or value not in _VALID_SCOPES:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400,
|
|
||||||
detail=f"Invalid scope: {value!r}. Must be one of {sorted(_VALID_SCOPES)}",
|
|
||||||
)
|
|
||||||
if value == "global" and not context.isSysAdmin:
|
|
||||||
raise HTTPException(status_code=403, detail=routeApiMsg("Only sysadmins can set global scope"))
|
|
||||||
return value
|
|
||||||
# neutralize / ragIndexEnabled
|
# neutralize / ragIndexEnabled
|
||||||
if isinstance(value, bool):
|
if isinstance(value, bool):
|
||||||
return value
|
return value
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ def checkUiAccess(
|
||||||
Args:
|
Args:
|
||||||
RbacInstance: RbacClass instance
|
RbacInstance: RbacClass instance
|
||||||
currentUser: Current user object
|
currentUser: Current user object
|
||||||
uiPath: UI path (e.g., "playground.voice.settings", "chatbot.search")
|
uiPath: UI path (e.g., "playground.voice.settings", "workspace.search")
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
True if user has view permission for the UI element, False otherwise
|
True if user has view permission for the UI element, False otherwise
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@
|
||||||
"""Streaming core service for SSE event management."""
|
"""Streaming core service for SSE event management."""
|
||||||
|
|
||||||
from .eventManager import EventManager, get_event_manager
|
from .eventManager import EventManager, get_event_manager
|
||||||
from .helpers import ChatStreamingHelper
|
|
||||||
from .mainServiceStreaming import StreamingService
|
from .mainServiceStreaming import StreamingService
|
||||||
|
|
||||||
__all__ = ["EventManager", "get_event_manager", "ChatStreamingHelper", "StreamingService"]
|
__all__ = ["EventManager", "get_event_manager", "StreamingService"]
|
||||||
|
|
|
||||||
|
|
@ -1,242 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""Streaming helper utilities for chat message processing and normalization."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any, Dict, List, Literal, Mapping, Optional
|
|
||||||
|
|
||||||
from langchain_core.messages import (
|
|
||||||
AIMessage,
|
|
||||||
BaseMessage,
|
|
||||||
HumanMessage,
|
|
||||||
SystemMessage,
|
|
||||||
ToolMessage,
|
|
||||||
)
|
|
||||||
|
|
||||||
Role = Literal["user", "assistant", "system", "tool"]
|
|
||||||
|
|
||||||
|
|
||||||
class ChatStreamingHelper:
|
|
||||||
"""Pure helper methods for streaming and message normalization.
|
|
||||||
|
|
||||||
This class provides static utility methods for converting between different
|
|
||||||
message formats, extracting content, and normalizing message structures
|
|
||||||
for streaming chat applications.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def role_from_message(*, msg: BaseMessage) -> Role:
|
|
||||||
"""Extract the role from a BaseMessage instance.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
msg: The BaseMessage instance to extract the role from.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The role as a string literal: "user", "assistant", "system", or "tool".
|
|
||||||
Defaults to "assistant" if the message type is not recognized.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> from langchain_core.messages import HumanMessage
|
|
||||||
>>> msg = HumanMessage(content="Hello")
|
|
||||||
>>> ChatStreamingHelper.role_from_message(msg=msg)
|
|
||||||
'user'
|
|
||||||
"""
|
|
||||||
if isinstance(msg, HumanMessage):
|
|
||||||
return "user"
|
|
||||||
if isinstance(msg, AIMessage):
|
|
||||||
return "assistant"
|
|
||||||
if isinstance(msg, SystemMessage):
|
|
||||||
return "system"
|
|
||||||
if isinstance(msg, ToolMessage):
|
|
||||||
return "tool"
|
|
||||||
return getattr(msg, "role", "assistant")
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def flatten_content(*, content: Any) -> str:
|
|
||||||
"""Convert complex content structures to plain text.
|
|
||||||
|
|
||||||
This method handles various content formats including strings, lists of
|
|
||||||
content parts, and dictionaries with text fields. It's designed to
|
|
||||||
normalize content from different message sources into a consistent
|
|
||||||
plain text format.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: The content to flatten. Can be:
|
|
||||||
- str: Returned as-is after stripping whitespace
|
|
||||||
- list: Each item processed and joined with newlines
|
|
||||||
- dict: Text extracted from "text" or "content" fields
|
|
||||||
- None: Returns empty string
|
|
||||||
- Any other type: Converted to string
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The flattened content as a plain text string with whitespace stripped.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> content = [{"type": "text", "text": "Hello"}, {"type": "text", "text": "world"}]
|
|
||||||
>>> ChatStreamingHelper.flatten_content(content=content)
|
|
||||||
'Hello\nworld'
|
|
||||||
|
|
||||||
>>> content = {"text": "Simple message"}
|
|
||||||
>>> ChatStreamingHelper.flatten_content(content=content)
|
|
||||||
'Simple message'
|
|
||||||
"""
|
|
||||||
if content is None:
|
|
||||||
return ""
|
|
||||||
if isinstance(content, str):
|
|
||||||
return content.strip()
|
|
||||||
if isinstance(content, list):
|
|
||||||
parts: List[str] = []
|
|
||||||
for part in content:
|
|
||||||
if isinstance(part, dict):
|
|
||||||
if "text" in part and isinstance(part["text"], str):
|
|
||||||
parts.append(part["text"])
|
|
||||||
elif part.get("type") == "text" and isinstance(
|
|
||||||
part.get("text"), str
|
|
||||||
):
|
|
||||||
parts.append(part["text"])
|
|
||||||
elif "content" in part and isinstance(part["content"], str):
|
|
||||||
parts.append(part["content"])
|
|
||||||
else:
|
|
||||||
# Fallback for unknown dictionary structures
|
|
||||||
val = part.get("value")
|
|
||||||
if isinstance(val, str):
|
|
||||||
parts.append(val)
|
|
||||||
else:
|
|
||||||
parts.append(str(part))
|
|
||||||
return "\n".join(p.strip() for p in parts if p is not None)
|
|
||||||
if isinstance(content, dict):
|
|
||||||
if "text" in content and isinstance(content["text"], str):
|
|
||||||
return content["text"].strip()
|
|
||||||
if "content" in content and isinstance(content["content"], str):
|
|
||||||
return content["content"].strip()
|
|
||||||
return str(content).strip()
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def message_to_dict(*, msg: BaseMessage) -> Dict[str, Any]:
|
|
||||||
"""Convert a BaseMessage instance to a dictionary for streaming output.
|
|
||||||
|
|
||||||
This method normalizes BaseMessage instances into a consistent dictionary
|
|
||||||
format suitable for JSON serialization and streaming to clients.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
msg: The BaseMessage instance to convert.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A dictionary containing:
|
|
||||||
- "role": The message role (user, assistant, system, tool)
|
|
||||||
- "content": The flattened message content as plain text
|
|
||||||
- "tool_calls": Tool calls if present (optional)
|
|
||||||
- "name": Message name if present (optional)
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> from langchain_core.messages import HumanMessage
|
|
||||||
>>> msg = HumanMessage(content="Hello there")
|
|
||||||
>>> result = ChatStreamingHelper.message_to_dict(msg=msg)
|
|
||||||
>>> result["role"]
|
|
||||||
'user'
|
|
||||||
>>> result["content"]
|
|
||||||
'Hello there'
|
|
||||||
"""
|
|
||||||
payload: Dict[str, Any] = {
|
|
||||||
"role": ChatStreamingHelper.role_from_message(msg=msg),
|
|
||||||
"content": ChatStreamingHelper.flatten_content(
|
|
||||||
content=getattr(msg, "content", "")
|
|
||||||
),
|
|
||||||
}
|
|
||||||
tool_calls = getattr(msg, "tool_calls", None)
|
|
||||||
if tool_calls:
|
|
||||||
payload["tool_calls"] = tool_calls
|
|
||||||
name = getattr(msg, "name", None)
|
|
||||||
if name:
|
|
||||||
payload["name"] = name
|
|
||||||
return payload
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def dict_message_to_dict(*, obj: Mapping[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Convert a dictionary-shaped message to a normalized dictionary.
|
|
||||||
|
|
||||||
This method handles messages that come from serialized state and are
|
|
||||||
represented as dictionaries rather than BaseMessage instances. It
|
|
||||||
normalizes various dictionary formats into a consistent structure.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
obj: The dictionary-shaped message to convert. Expected to contain
|
|
||||||
fields like "role", "type", "content", "text", etc.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A normalized dictionary containing:
|
|
||||||
- "role": The message role (user, assistant, system, tool)
|
|
||||||
- "content": The flattened message content as plain text
|
|
||||||
- "tool_calls": Tool calls if present (optional)
|
|
||||||
- "name": Message name if present (optional)
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> obj = {"type": "human", "content": "Hello"}
|
|
||||||
>>> result = ChatStreamingHelper.dict_message_to_dict(obj=obj)
|
|
||||||
>>> result["role"]
|
|
||||||
'user'
|
|
||||||
>>> result["content"]
|
|
||||||
'Hello'
|
|
||||||
"""
|
|
||||||
role: Optional[str] = obj.get("role")
|
|
||||||
if not role:
|
|
||||||
# Handle alternative type field mappings
|
|
||||||
typ = obj.get("type")
|
|
||||||
if typ in ("human", "user"):
|
|
||||||
role = "user"
|
|
||||||
elif typ in ("ai", "assistant"):
|
|
||||||
role = "assistant"
|
|
||||||
elif typ in ("system",):
|
|
||||||
role = "system"
|
|
||||||
elif typ in ("tool", "function"):
|
|
||||||
role = "tool"
|
|
||||||
|
|
||||||
content = obj.get("content")
|
|
||||||
if content is None and "text" in obj:
|
|
||||||
content = obj["text"]
|
|
||||||
|
|
||||||
out: Dict[str, Any] = {
|
|
||||||
"role": role or "assistant",
|
|
||||||
"content": ChatStreamingHelper.flatten_content(content=content),
|
|
||||||
}
|
|
||||||
if "tool_calls" in obj:
|
|
||||||
out["tool_calls"] = obj["tool_calls"]
|
|
||||||
if obj.get("name"):
|
|
||||||
out["name"] = obj["name"]
|
|
||||||
return out
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def extract_messages_from_output(*, output_obj: Any) -> List[Any]:
|
|
||||||
"""Extract messages from LangGraph output objects.
|
|
||||||
|
|
||||||
This method handles various output formats from LangGraph execution,
|
|
||||||
extracting the messages list from different possible structures.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
output_obj: The output object from LangGraph execution. Can be:
|
|
||||||
- An object with a "messages" attribute
|
|
||||||
- A dictionary with a "messages" key
|
|
||||||
- Any other object (returns empty list)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A list of extracted messages, or an empty list if no messages
|
|
||||||
are found or if the output object is None.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> output = {"messages": [{"role": "user", "content": "Hello"}]}
|
|
||||||
>>> messages = ChatStreamingHelper.extract_messages_from_output(output_obj=output)
|
|
||||||
>>> len(messages)
|
|
||||||
1
|
|
||||||
"""
|
|
||||||
if output_obj is None:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Try to parse dicts first
|
|
||||||
if isinstance(output_obj, dict):
|
|
||||||
msgs = output_obj.get("messages")
|
|
||||||
return msgs if isinstance(msgs, list) else []
|
|
||||||
|
|
||||||
# Then try to get messages attribute
|
|
||||||
msgs = getattr(output_obj, "messages", None)
|
|
||||||
return msgs if isinstance(msgs, list) else []
|
|
||||||
|
|
@ -9,7 +9,6 @@ import logging
|
||||||
from typing import Any, Callable
|
from typing import Any, Callable
|
||||||
|
|
||||||
from modules.serviceCenter.core.serviceStreaming.eventManager import EventManager, get_event_manager
|
from modules.serviceCenter.core.serviceStreaming.eventManager import EventManager, get_event_manager
|
||||||
from modules.serviceCenter.core.serviceStreaming.helpers import ChatStreamingHelper
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -25,7 +24,3 @@ class StreamingService:
|
||||||
def getEventManager(self) -> EventManager:
|
def getEventManager(self) -> EventManager:
|
||||||
"""Get the global event manager instance for SSE streaming."""
|
"""Get the global event manager instance for SSE streaming."""
|
||||||
return get_event_manager()
|
return get_event_manager()
|
||||||
|
|
||||||
def getChatStreamingHelper(self):
|
|
||||||
"""Get ChatStreamingHelper utility for message normalization (no legacy import at call site)."""
|
|
||||||
return ChatStreamingHelper
|
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,90 @@ from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _formatMailLine(entry) -> str:
|
||||||
|
"""Format a mail ExternalEntry with sender + date so the agent can decide
|
||||||
|
which messages to download without fetching every body."""
|
||||||
|
meta = entry.metadata or {}
|
||||||
|
sender = meta.get("from") or ""
|
||||||
|
received = (meta.get("receivedDateTime") or meta.get("date") or "")[:24]
|
||||||
|
parts = []
|
||||||
|
if received:
|
||||||
|
parts.append(received)
|
||||||
|
if sender:
|
||||||
|
parts.append(f"from: {sender}")
|
||||||
|
metaStr = f" [{', '.join(parts)}]" if parts else ""
|
||||||
|
return f"- ✉️ {entry.name}{metaStr} path: {entry.path}"
|
||||||
|
|
||||||
|
|
||||||
|
def _formatContactLine(entry) -> str:
|
||||||
|
"""Format a contact ExternalEntry with email/phone/company inline so the
|
||||||
|
agent does not have to download a .vcf for every contact. Handles the
|
||||||
|
differing metadata keys across MSFT, Google and Infomaniak adapters."""
|
||||||
|
meta = entry.metadata or {}
|
||||||
|
# email: MSFT 'emailAddresses' (list), Google 'emails' (list), Infomaniak 'email' (str)
|
||||||
|
emails = meta.get("emailAddresses") or meta.get("emails") or []
|
||||||
|
if isinstance(emails, str):
|
||||||
|
emails = [emails]
|
||||||
|
email = next((e for e in emails if e), None) or meta.get("email") or ""
|
||||||
|
# phone: MSFT 'businessPhones'/'mobilePhone', Google 'phones', Infomaniak 'phone'
|
||||||
|
phones = meta.get("phones") or meta.get("businessPhones") or []
|
||||||
|
if isinstance(phones, str):
|
||||||
|
phones = [phones]
|
||||||
|
phone = next((p for p in phones if p), None) or meta.get("mobilePhone") or meta.get("phone") or ""
|
||||||
|
company = meta.get("companyName") or meta.get("organization") or ""
|
||||||
|
parts = []
|
||||||
|
if email:
|
||||||
|
parts.append(email)
|
||||||
|
if phone:
|
||||||
|
parts.append(phone)
|
||||||
|
if company:
|
||||||
|
parts.append(company)
|
||||||
|
metaStr = f" [{', '.join(parts)}]" if parts else ""
|
||||||
|
return f"- 👤 {entry.name}{metaStr} path: {entry.path}"
|
||||||
|
|
||||||
|
|
||||||
|
def _formatTaskLine(entry) -> str:
|
||||||
|
"""Format a ClickUp task with status/assignee/due-date inline so the agent
|
||||||
|
can answer task questions without downloading every task JSON."""
|
||||||
|
meta = entry.metadata or {}
|
||||||
|
task = meta.get("task") or {}
|
||||||
|
parts = []
|
||||||
|
status = ((task.get("status") or {}).get("status")) if isinstance(task.get("status"), dict) else task.get("status")
|
||||||
|
if status:
|
||||||
|
parts.append(f"status: {status}")
|
||||||
|
assignees = [a.get("username") or a.get("email") for a in (task.get("assignees") or []) if a]
|
||||||
|
assignees = [a for a in assignees if a]
|
||||||
|
if assignees:
|
||||||
|
parts.append(f"assignee: {', '.join(assignees)}")
|
||||||
|
dueMs = task.get("due_date")
|
||||||
|
if dueMs:
|
||||||
|
try:
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
due = datetime.fromtimestamp(int(dueMs) / 1000, tz=timezone.utc).strftime("%Y-%m-%d")
|
||||||
|
parts.append(f"due: {due}")
|
||||||
|
except (TypeError, ValueError, OverflowError):
|
||||||
|
pass
|
||||||
|
metaStr = f" [{', '.join(parts)}]" if parts else ""
|
||||||
|
return f"- ☑️ {entry.name}{metaStr} path: {entry.path}"
|
||||||
|
|
||||||
|
|
||||||
|
def _buildCountLine(entries, limit) -> str:
|
||||||
|
"""Build a summary count line, including total estimate if available."""
|
||||||
|
realCount = sum(1 for e in entries if not (e.path or "").endswith("/_count"))
|
||||||
|
line = f"\n\n({realCount} entries returned"
|
||||||
|
if limit is not None:
|
||||||
|
line += f", limit={limit}"
|
||||||
|
for e in entries:
|
||||||
|
if (e.path or "").endswith("/_count"):
|
||||||
|
meta = e.metadata or {}
|
||||||
|
total = meta.get("totalEstimate") or meta.get("totalCount")
|
||||||
|
if total:
|
||||||
|
line += f", ~{total} total in source"
|
||||||
|
break
|
||||||
|
line += ")"
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
def _registerDataSourceTools(registry: ToolRegistry, services):
|
def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
"""Auto-extracted from registerCoreTools."""
|
"""Auto-extracted from registerCoreTools."""
|
||||||
|
|
||||||
|
|
@ -81,6 +165,8 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
|
|
||||||
_MAIL_SERVICES = {"outlook", "gmail"}
|
_MAIL_SERVICES = {"outlook", "gmail"}
|
||||||
_CALENDAR_SERVICES = {"calendar", "calendarFolder"}
|
_CALENDAR_SERVICES = {"calendar", "calendarFolder"}
|
||||||
|
_CONTACT_SERVICES = {"contact", "contactFolder"}
|
||||||
|
_CLICKUP_SERVICES = {"clickup", "clickupList"}
|
||||||
|
|
||||||
async def _browseDataSource(args: Dict[str, Any], context: Dict[str, Any]):
|
async def _browseDataSource(args: Dict[str, Any], context: Dict[str, Any]):
|
||||||
dsId = args.get("dataSourceId", "")
|
dsId = args.get("dataSourceId", "")
|
||||||
|
|
@ -118,6 +204,9 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
return ToolResult(toolCallId="", toolName="browseDataSource", success=True, data="Empty directory.")
|
return ToolResult(toolCallId="", toolName="browseDataSource", success=True, data="Empty directory.")
|
||||||
lines = []
|
lines = []
|
||||||
isCalendar = service in _CALENDAR_SERVICES
|
isCalendar = service in _CALENDAR_SERVICES
|
||||||
|
isMail = service in _MAIL_SERVICES
|
||||||
|
isContact = service in _CONTACT_SERVICES
|
||||||
|
isClickup = service in _CLICKUP_SERVICES
|
||||||
for e in entries:
|
for e in entries:
|
||||||
prefix = "[DIR]" if e.isFolder else "[FILE]"
|
prefix = "[DIR]" if e.isFolder else "[FILE]"
|
||||||
sizeInfo = f" ({e.size} bytes)" if e.size else ""
|
sizeInfo = f" ({e.size} bytes)" if e.size else ""
|
||||||
|
|
@ -127,18 +216,24 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
loc = e.metadata.get("location") or ""
|
loc = e.metadata.get("location") or ""
|
||||||
locStr = f" 📍 {loc}" if loc else ""
|
locStr = f" 📍 {loc}" if loc else ""
|
||||||
lines.append(f"- 📅 {start} – {end} {e.name}{locStr}")
|
lines.append(f"- 📅 {start} – {end} {e.name}{locStr}")
|
||||||
|
elif isMail and not e.isFolder:
|
||||||
|
lines.append(_formatMailLine(e))
|
||||||
|
elif isContact and not e.isFolder:
|
||||||
|
lines.append(_formatContactLine(e))
|
||||||
|
elif isClickup and not e.isFolder and (e.metadata or {}).get("cuType") == "task":
|
||||||
|
lines.append(_formatTaskLine(e))
|
||||||
else:
|
else:
|
||||||
lines.append(f"- {prefix} {e.name}{sizeInfo} path: {e.path}")
|
lines.append(f"- {prefix} {e.name}{sizeInfo} path: {e.path}")
|
||||||
result = "\n".join(lines)
|
result = "\n".join(lines)
|
||||||
countLine = f"\n\n({len(entries)} entries returned"
|
result += _buildCountLine(entries, limit)
|
||||||
if limit is not None:
|
|
||||||
countLine += f", limit={limit}"
|
|
||||||
countLine += ")"
|
|
||||||
result += countLine
|
|
||||||
if service in _MAIL_SERVICES:
|
if service in _MAIL_SERVICES:
|
||||||
result += "\n\nIMPORTANT: These are email subjects only. To read the full email content, use downloadFromDataSource with the path, then readFile on the returned file ID."
|
result += "\n\nIMPORTANT: These are email subjects only. To read the full email content, use downloadFromDataSource with the path, then readFile on the returned file ID."
|
||||||
if isCalendar and not any(e.isFolder for e in entries):
|
if isCalendar and not any(e.isFolder for e in entries):
|
||||||
result += "\n\nThese are calendar event summaries with date/time. You do NOT need to download individual events — this listing already contains subject, start, end, and location. Use the filter parameter with a date range (e.g. '2026-06') for specific periods."
|
result += "\n\nThese are calendar event summaries with date/time. You do NOT need to download individual events — this listing already contains subject, start, end, and location. Use the filter parameter with a date range (e.g. '2026-06') for specific periods."
|
||||||
|
if isContact and not any(e.isFolder for e in entries):
|
||||||
|
result += "\n\nThese are contacts with name, email, phone and company shown inline. You do NOT need to download a vCard for each contact — only download when you need the full record."
|
||||||
|
if isClickup and any((e.metadata or {}).get("cuType") == "task" for e in entries):
|
||||||
|
result += "\n\nThese are ClickUp tasks with status, assignee and due-date shown inline. Only download a task (JSON) when you need its full description, comments or custom fields."
|
||||||
return ToolResult(toolCallId="", toolName="browseDataSource", success=True, data=result)
|
return ToolResult(toolCallId="", toolName="browseDataSource", success=True, data=result)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return ToolResult(toolCallId="", toolName="browseDataSource", success=False, error=str(e))
|
return ToolResult(toolCallId="", toolName="browseDataSource", success=False, error=str(e))
|
||||||
|
|
@ -173,6 +268,9 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
if not entries:
|
if not entries:
|
||||||
return ToolResult(toolCallId="", toolName="searchDataSource", success=True, data="No results found.")
|
return ToolResult(toolCallId="", toolName="searchDataSource", success=True, data="No results found.")
|
||||||
isCalendar = service in _CALENDAR_SERVICES
|
isCalendar = service in _CALENDAR_SERVICES
|
||||||
|
isMail = service in _MAIL_SERVICES
|
||||||
|
isContact = service in _CONTACT_SERVICES
|
||||||
|
isClickup = service in _CLICKUP_SERVICES
|
||||||
lines = []
|
lines = []
|
||||||
for e in entries:
|
for e in entries:
|
||||||
if isCalendar and e.metadata:
|
if isCalendar and e.metadata:
|
||||||
|
|
@ -181,18 +279,24 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
loc = e.metadata.get("location") or ""
|
loc = e.metadata.get("location") or ""
|
||||||
locStr = f" 📍 {loc}" if loc else ""
|
locStr = f" 📍 {loc}" if loc else ""
|
||||||
lines.append(f"- 📅 {start} – {end} {e.name}{locStr}")
|
lines.append(f"- 📅 {start} – {end} {e.name}{locStr}")
|
||||||
|
elif isMail and not e.isFolder:
|
||||||
|
lines.append(_formatMailLine(e))
|
||||||
|
elif isContact and not e.isFolder:
|
||||||
|
lines.append(_formatContactLine(e))
|
||||||
|
elif isClickup and not e.isFolder and (e.metadata or {}).get("cuType") == "task":
|
||||||
|
lines.append(_formatTaskLine(e))
|
||||||
else:
|
else:
|
||||||
lines.append(f"- {e.name} (path: {e.path})")
|
lines.append(f"- {e.name} (path: {e.path})")
|
||||||
result = "\n".join(lines)
|
result = "\n".join(lines)
|
||||||
countLine = f"\n\n({len(entries)} entries returned"
|
result += _buildCountLine(entries, limit)
|
||||||
if limit is not None:
|
|
||||||
countLine += f", limit={limit}"
|
|
||||||
countLine += ")"
|
|
||||||
result += countLine
|
|
||||||
if service in _MAIL_SERVICES:
|
if service in _MAIL_SERVICES:
|
||||||
result += "\n\nIMPORTANT: These are email subjects only. To read the full email content, use downloadFromDataSource with the path, then readFile on the returned file ID."
|
result += "\n\nIMPORTANT: These are email subjects only. To read the full email content, use downloadFromDataSource with the path, then readFile on the returned file ID."
|
||||||
if isCalendar:
|
if isCalendar:
|
||||||
result += "\n\nThese are calendar event summaries. You do NOT need to download individual events — subject, start, end, and location are shown above. For date-specific queries, use a date range as query (e.g. '2026-06')."
|
result += "\n\nThese are calendar event summaries. You do NOT need to download individual events — subject, start, end, and location are shown above. For date-specific queries, use a date range as query (e.g. '2026-06')."
|
||||||
|
if isContact:
|
||||||
|
result += "\n\nThese are contacts with name, email, phone and company shown inline. You do NOT need to download a vCard for each contact — only download when you need the full record."
|
||||||
|
if isClickup:
|
||||||
|
result += "\n\nThese are ClickUp tasks with status, assignee and due-date shown inline. Only download a task (JSON) when you need its full description, comments or custom fields."
|
||||||
return ToolResult(toolCallId="", toolName="searchDataSource", success=True, data=result)
|
return ToolResult(toolCallId="", toolName="searchDataSource", success=True, data=result)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return ToolResult(toolCallId="", toolName="searchDataSource", success=False, error=str(e))
|
return ToolResult(toolCallId="", toolName="searchDataSource", success=False, error=str(e))
|
||||||
|
|
@ -295,15 +399,20 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
registry.register(
|
registry.register(
|
||||||
"browseDataSource", _browseDataSource,
|
"browseDataSource", _browseDataSource,
|
||||||
description=(
|
description=(
|
||||||
"Browse files, folders, or emails in a data source. Accepts either:\n"
|
"List the contents of a folder/directory in a data source. Accepts either:\n"
|
||||||
"- dataSourceId (for attached data sources shown in the prompt), OR\n"
|
"- dataSourceId (for attached data sources shown in the prompt), OR\n"
|
||||||
"- connectionId + service (for direct connection access via listConnections).\n"
|
"- connectionId + service (for direct connection access via listConnections).\n"
|
||||||
"\n"
|
"\n"
|
||||||
"DEFAULT BEHAVIOUR: omit `limit` to get the connector's full default page. "
|
"WHEN TO USE: Use browse to enumerate a directory's contents, or to get the "
|
||||||
"For mail folders (Outlook/Gmail) the default returns up to 100 newest "
|
"newest items of a mail/calendar folder. For TARGETED queries (find emails "
|
||||||
"messages -- DO NOT pass a smaller limit just to be safe; users almost "
|
"from a person, files about a topic, events in a date range), prefer "
|
||||||
"always want the full default page or explicitly more. Only set `limit` "
|
"searchDataSource -- it queries the source server-side and avoids listing/"
|
||||||
"when the user asks for a specific number (e.g. 'show me the latest 5 mails')."
|
"downloading large amounts of irrelevant data.\n"
|
||||||
|
"\n"
|
||||||
|
"For calendar folders, pass a date range via `filter` (e.g. '2026-06' or "
|
||||||
|
"'2026-06-01 2026-06-30') so only that period is fetched. "
|
||||||
|
"Omit `limit` for the connector default; set it only when the user asks for a "
|
||||||
|
"specific count or you need MORE after hitting the default."
|
||||||
),
|
),
|
||||||
parameters={
|
parameters={
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|
@ -334,8 +443,21 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
registry.register(
|
registry.register(
|
||||||
"searchDataSource", _searchDataSource,
|
"searchDataSource", _searchDataSource,
|
||||||
description=(
|
description=(
|
||||||
"Search for files within a data source. Accepts either dataSourceId OR connectionId+service. "
|
"PRIMARY tool for finding specific data in a source. The query runs "
|
||||||
"Use the `limit` parameter to control how many hits are returned."
|
"server-side, so this is far more efficient than browsing + downloading -- "
|
||||||
|
"ALWAYS prefer search when the user is looking for something specific "
|
||||||
|
"(a topic, a sender, a date range, a keyword). NEVER browse and download an "
|
||||||
|
"entire large source to find a few items; search first, then download ONLY "
|
||||||
|
"the matching results.\n"
|
||||||
|
"\n"
|
||||||
|
"Per-service query syntax:\n"
|
||||||
|
"- Outlook (KQL): 'from:alice subject:budget', 'received>=2026-05-01'.\n"
|
||||||
|
"- Gmail: 'from:alice after:2026/05/01 before:2026/06/01 budget'.\n"
|
||||||
|
"- SharePoint/OneDrive: free-text, searches file names AND content.\n"
|
||||||
|
"- Google Drive: searches file names and content.\n"
|
||||||
|
"- Calendar: pass a date range (e.g. '2026-06' or '2026-06-01 2026-06-30').\n"
|
||||||
|
"- ClickUp: task name/description keywords.\n"
|
||||||
|
"Accepts either dataSourceId OR connectionId+service."
|
||||||
),
|
),
|
||||||
parameters={
|
parameters={
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|
@ -344,7 +466,7 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
|
||||||
"connectionId": {"type": "string", "description": "UserConnection ID (alternative to dataSourceId)"},
|
"connectionId": {"type": "string", "description": "UserConnection ID (alternative to dataSourceId)"},
|
||||||
"service": {"type": "string", "description": "Service name (alternative to dataSourceId)"},
|
"service": {"type": "string", "description": "Service name (alternative to dataSourceId)"},
|
||||||
"path": {"type": "string", "description": "Scope path (used with connectionId+service)"},
|
"path": {"type": "string", "description": "Scope path (used with connectionId+service)"},
|
||||||
"query": {"type": "string", "description": "Search query"},
|
"query": {"type": "string", "description": "Search query (use the per-service syntax in the tool description)"},
|
||||||
"limit": {
|
"limit": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"description": "Maximum number of search results (default ~100, max 1000).",
|
"description": "Maximum number of search results (default ~100, max 1000).",
|
||||||
|
|
|
||||||
|
|
@ -157,6 +157,28 @@ def _registerFeatureSubAgentTools(registry: ToolRegistry, services):
|
||||||
success=False, error=f"No data tables available for feature '{featureCode}'",
|
success=False, error=f"No data tables available for feature '{featureCode}'",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# A2: build the per-table type/inheritance-aware neutralization policy.
|
||||||
|
# tableActive = effective (own or inherited) table-level neutralize flag;
|
||||||
|
# explicitFields = fields whose neutralize flag is set explicitly.
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import resolveEffectiveForFds
|
||||||
|
neutralizePolicy: Dict[str, Dict[str, Any]] = {}
|
||||||
|
for tblObj in selectedTables:
|
||||||
|
tn = tblObj.get("meta", {}).get("table", "") if isinstance(tblObj, dict) else ""
|
||||||
|
if not tn:
|
||||||
|
continue
|
||||||
|
eff = resolveEffectiveForFds(featureInstanceId, tn, None, _fdsAll, mode="walk")
|
||||||
|
tableActive = eff.get("effectiveNeutralize") is True
|
||||||
|
explicitFields = set(neutralizeFieldsPerTable.get(tn, []))
|
||||||
|
if tableActive or explicitFields:
|
||||||
|
neutralizePolicy[tn] = {"tableActive": tableActive, "explicitFields": explicitFields}
|
||||||
|
|
||||||
|
neutralizationService = services.getService("neutralization") if hasattr(services, "getService") else None
|
||||||
|
if neutralizationService is not None and not getattr(neutralizationService, "interfaceDbComponent", None):
|
||||||
|
try:
|
||||||
|
neutralizationService.interfaceDbComponent = services.chat.interfaceDbComponent
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
cacheKey = f"{featureInstanceId}:{hashlib.md5(question.encode()).hexdigest()}"
|
cacheKey = f"{featureInstanceId}:{hashlib.md5(question.encode()).hexdigest()}"
|
||||||
if cacheKey in _featureQueryCache:
|
if cacheKey in _featureQueryCache:
|
||||||
cachedAt, cachedResult = _featureQueryCache[cacheKey]
|
cachedAt, cachedResult = _featureQueryCache[cacheKey]
|
||||||
|
|
@ -202,7 +224,8 @@ def _registerFeatureSubAgentTools(registry: ToolRegistry, services):
|
||||||
instanceLabel=instanceLabel,
|
instanceLabel=instanceLabel,
|
||||||
tableFilters=tableFilters,
|
tableFilters=tableFilters,
|
||||||
requestLang=requestLang,
|
requestLang=requestLang,
|
||||||
neutralizeFields=neutralizeFieldsPerTable if neutralizeFieldsPerTable else None,
|
neutralizePolicy=neutralizePolicy if neutralizePolicy else None,
|
||||||
|
neutralizationService=neutralizationService,
|
||||||
maxRounds=parentMaxRounds,
|
maxRounds=parentMaxRounds,
|
||||||
maxCostCHF=parentMaxCostCHF,
|
maxCostCHF=parentMaxCostCHF,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -115,6 +115,31 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
logger.warning(f"renderDocument: knowledge service unavailable: {e}")
|
logger.warning(f"renderDocument: knowledge service unavailable: {e}")
|
||||||
resolvedImages = 0
|
resolvedImages = 0
|
||||||
|
|
||||||
|
# Large-document path: for binary doc formats (pdf/docx) we keep block
|
||||||
|
# images as fileId references and let the renderer fetch the bytes lazily
|
||||||
|
# (one image at a time) instead of embedding every image's base64 in the
|
||||||
|
# JSON. Inline images and other formats keep the eager pre-resolution.
|
||||||
|
lazyBlockImages = outputFormat.strip().lower() in ("pdf", "docx", "doc")
|
||||||
|
|
||||||
|
def _imageBytesResolver(fileId: str):
|
||||||
|
"""Lazy resolver passed to the renderer: fileId -> raw image bytes."""
|
||||||
|
if not fileId:
|
||||||
|
return None
|
||||||
|
if knowledgeService:
|
||||||
|
try:
|
||||||
|
chunks = knowledgeService._knowledgeDb.getContentChunks(fileId)
|
||||||
|
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
|
||||||
|
if imageChunks and imageChunks[0].get("data"):
|
||||||
|
import base64 as _b64
|
||||||
|
return _b64.b64decode(imageChunks[0]["data"])
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"renderDocument: lazy knowledge image fetch failed for {fileId}: {e}")
|
||||||
|
try:
|
||||||
|
return services.chat.getFileData(fileId)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"renderDocument: lazy file image fetch failed for {fileId}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
def _resolveImageRef(targetObj, fileRefKey="_fileRef", fileIdKey="fileId"):
|
def _resolveImageRef(targetObj, fileRefKey="_fileRef", fileIdKey="fileId"):
|
||||||
"""Resolve a single image reference dict to base64Data in-place."""
|
"""Resolve a single image reference dict to base64Data in-place."""
|
||||||
nonlocal resolvedImages
|
nonlocal resolvedImages
|
||||||
|
|
@ -153,6 +178,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
cType = section.get("content_type")
|
cType = section.get("content_type")
|
||||||
# Block-level image sections
|
# Block-level image sections
|
||||||
if cType == "image":
|
if cType == "image":
|
||||||
|
# For pdf/docx, defer to lazy renderer-side resolution: keep
|
||||||
|
# the fileId reference, do not embed base64 into the JSON.
|
||||||
|
if lazyBlockImages:
|
||||||
|
continue
|
||||||
for element in section.get("elements", []):
|
for element in section.get("elements", []):
|
||||||
contentObj = element.get("content", {})
|
contentObj = element.get("content", {})
|
||||||
_resolveImageRef(contentObj)
|
_resolveImageRef(contentObj)
|
||||||
|
|
@ -195,6 +224,8 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
title=title,
|
title=title,
|
||||||
userPrompt=content,
|
userPrompt=content,
|
||||||
style=args.get("style"),
|
style=args.get("style"),
|
||||||
|
documentTheme=args.get("documentTheme"),
|
||||||
|
imageResolver=_imageBytesResolver if lazyBlockImages else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not documents:
|
if not documents:
|
||||||
|
|
@ -262,6 +293,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
"For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with "
|
"For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with "
|
||||||
"`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. "
|
"`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. "
|
||||||
"Images:  in the markdown. "
|
"Images:  in the markdown. "
|
||||||
|
"Layout primitives (PDF/DOCX): a fenced ```cover_page block with `title:`/`subtitle:`/`author:`/`date:`/`logo: file:ID` "
|
||||||
|
"lines renders a centered title page (e.g. legal filing / report front page); a fenced ```image_grid block with an "
|
||||||
|
"optional `columns: N` line followed by image refs (`` or `file:ID`, one per line) renders an N-column "
|
||||||
|
"image arrangement (marketing layouts). "
|
||||||
"Each rendered file's result line contains `file id: <fileId>` (for embeds / readFile) AND "
|
"Each rendered file's result line contains `file id: <fileId>` (for embeds / readFile) AND "
|
||||||
"`doc ref: docItem:<chatDocId>` -- pass the latter inside `documentList` of subsequent "
|
"`doc ref: docItem:<chatDocId>` -- pass the latter inside `documentList` of subsequent "
|
||||||
"`ai_process` / `ai_summarizeDocument` / `context_extractContent` calls."
|
"`ai_process` / `ai_summarizeDocument` / `context_extractContent` calls."
|
||||||
|
|
@ -280,6 +315,17 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
"outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
|
"outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
|
||||||
"title": {"type": "string", "description": "Document title", "default": "Document"},
|
"title": {"type": "string", "description": "Document title", "default": "Document"},
|
||||||
"language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"},
|
"language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"},
|
||||||
|
"documentTheme": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["general", "finance", "legal", "technical", "hr", "marketing"],
|
||||||
|
"description": (
|
||||||
|
"Named style preset applied by the renderer (colors, fonts, spacing). "
|
||||||
|
"Pick the one that matches the document purpose: 'legal' for serif/justified "
|
||||||
|
"legal filings, 'marketing' for bold image-friendly layouts, 'finance', "
|
||||||
|
"'technical', 'hr', or 'general' (default). The explicit 'style' object, if "
|
||||||
|
"provided, overrides individual preset keys."
|
||||||
|
),
|
||||||
|
},
|
||||||
"style": {
|
"style": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"description": (
|
"description": (
|
||||||
|
|
@ -840,6 +886,88 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return ToolResult(toolCallId="", toolName="neutralizeData", success=False, error=str(e))
|
return ToolResult(toolCallId="", toolName="neutralizeData", success=False, error=str(e))
|
||||||
|
|
||||||
|
async def _revealDocument(args: Dict[str, Any], context: Dict[str, Any]):
|
||||||
|
"""De-neutralize (reveal) placeholder text back to original values for a one-time download.
|
||||||
|
|
||||||
|
PRIVACY: The cleartext is delivered to the user's browser as a transient download
|
||||||
|
only (SSE side-event). It is NEVER saved as a file, indexed into the knowledge base,
|
||||||
|
or written back into chat history. The persisted tool result contains only a
|
||||||
|
confirmation -- not the revealed cleartext. Resolution uses ONLY the private local
|
||||||
|
placeholder mapping (no external LLM).
|
||||||
|
"""
|
||||||
|
import base64 as _b64
|
||||||
|
import re as _re
|
||||||
|
text = args.get("text", "")
|
||||||
|
fileId = (args.get("fileId") or "").strip()
|
||||||
|
fileName = (args.get("fileName") or "").strip()
|
||||||
|
if not isinstance(text, str):
|
||||||
|
text = str(text) if text is not None else ""
|
||||||
|
if not text and not fileId:
|
||||||
|
return ToolResult(toolCallId="", toolName="revealDocument", success=False,
|
||||||
|
error="text or fileId is required")
|
||||||
|
try:
|
||||||
|
neutralizationService = services.getService("neutralization") if hasattr(services, "getService") else None
|
||||||
|
if not neutralizationService or not hasattr(neutralizationService, "resolveText"):
|
||||||
|
return ToolResult(toolCallId="", toolName="revealDocument", success=False,
|
||||||
|
error="Neutralization service not available")
|
||||||
|
if not getattr(neutralizationService, "interfaceDbComponent", None):
|
||||||
|
neutralizationService.interfaceDbComponent = services.chat.interfaceDbComponent
|
||||||
|
|
||||||
|
if fileId and not text:
|
||||||
|
dbMgmt = services.chat.interfaceDbComponent
|
||||||
|
fileRow = dbMgmt.getFile(fileId)
|
||||||
|
if not fileRow:
|
||||||
|
return ToolResult(toolCallId="", toolName="revealDocument", success=False,
|
||||||
|
error=f"fileId not found: {fileId}")
|
||||||
|
rawBytes = dbMgmt.getFileData(fileId)
|
||||||
|
if not rawBytes:
|
||||||
|
return ToolResult(toolCallId="", toolName="revealDocument", success=False,
|
||||||
|
error="File data not accessible")
|
||||||
|
decoded = None
|
||||||
|
for encoding in ("utf-8", "utf-8-sig", "latin-1"):
|
||||||
|
try:
|
||||||
|
decoded = rawBytes.decode(encoding)
|
||||||
|
break
|
||||||
|
except (UnicodeDecodeError, ValueError):
|
||||||
|
continue
|
||||||
|
if decoded is None:
|
||||||
|
return ToolResult(toolCallId="", toolName="revealDocument", success=False,
|
||||||
|
error="File is binary or could not be decoded as text; reveal only supports text content")
|
||||||
|
text = decoded
|
||||||
|
if not fileName:
|
||||||
|
info = fileRow if isinstance(fileRow, dict) else None
|
||||||
|
fileName = (info.get("fileName") if info else None) or f"{fileId}.txt"
|
||||||
|
|
||||||
|
# Resolve placeholders locally (private mapping, no LLM). Count for the audit message.
|
||||||
|
placeholderCount = len(_re.findall(r'\[[a-z]+\.[a-f0-9-]{36}\]', text))
|
||||||
|
revealed = neutralizationService.resolveText(text)
|
||||||
|
|
||||||
|
if not fileName:
|
||||||
|
fileName = "revealed.txt"
|
||||||
|
mimeType = "text/markdown" if fileName.lower().endswith((".md", ".markdown")) else "text/plain"
|
||||||
|
contentB64 = _b64.b64encode(revealed.encode("utf-8")).decode("ascii")
|
||||||
|
|
||||||
|
return ToolResult(
|
||||||
|
toolCallId="", toolName="revealDocument", success=True,
|
||||||
|
data=(
|
||||||
|
f"Revealed {placeholderCount} placeholder(s) and prepared '{fileName}' for "
|
||||||
|
f"download in the chat. The cleartext was NOT stored, indexed, or kept in history."
|
||||||
|
),
|
||||||
|
sideEvents=[{
|
||||||
|
"type": "revealDownload",
|
||||||
|
"data": {
|
||||||
|
"content": contentB64,
|
||||||
|
"encoding": "base64",
|
||||||
|
"fileName": fileName,
|
||||||
|
"mimeType": mimeType,
|
||||||
|
"placeholderCount": placeholderCount,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"revealDocument failed: {e}")
|
||||||
|
return ToolResult(toolCallId="", toolName="revealDocument", success=False, error=str(e))
|
||||||
|
|
||||||
async def _executeCode(args: Dict[str, Any], context: Dict[str, Any]):
|
async def _executeCode(args: Dict[str, Any], context: Dict[str, Any]):
|
||||||
code = args.get("code", "")
|
code = args.get("code", "")
|
||||||
language = args.get("language", "python")
|
language = args.get("language", "python")
|
||||||
|
|
@ -899,6 +1027,28 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
||||||
readOnly=True
|
readOnly=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
registry.register(
|
||||||
|
"revealDocument", _revealDocument,
|
||||||
|
description=(
|
||||||
|
"De-neutralize (reveal) a neutralized text/document by replacing placeholders like "
|
||||||
|
"[name.<uuid>] with their original values, using ONLY the private local mapping (no "
|
||||||
|
"external LLM). The cleartext is delivered to the user as a transient, one-time download "
|
||||||
|
"in the chat -- it is NEVER saved, indexed, or written to chat history. Use ONLY when the "
|
||||||
|
"user explicitly asks to download the real/original (de-anonymized) version of a document. "
|
||||||
|
"Provide either 'fileId' (a stored neutralized text file) or inline 'text'."
|
||||||
|
),
|
||||||
|
parameters={
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"fileId": {"type": "string", "description": "ID of a stored text file containing placeholders to reveal"},
|
||||||
|
"text": {"type": "string", "description": "Inline placeholder text to reveal (alternative to fileId)"},
|
||||||
|
"fileName": {"type": "string", "description": "Optional download file name (e.g. 'contract-original.md')"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
readOnly=True,
|
||||||
|
displayLabel="preparing de-anonymized download",
|
||||||
|
)
|
||||||
|
|
||||||
from modules.serviceCenter.services.serviceAgent.sandboxExecutor import SANDBOX_ALLOWED_MODULES
|
from modules.serviceCenter.services.serviceAgent.sandboxExecutor import SANDBOX_ALLOWED_MODULES
|
||||||
moduleList = ", ".join(sorted(SANDBOX_ALLOWED_MODULES | {"io"}))
|
moduleList = ", ".join(sorted(SANDBOX_ALLOWED_MODULES | {"io"}))
|
||||||
registry.register(
|
registry.register(
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ class AgentEventTypeEnum(str, Enum):
|
||||||
FILE_EDIT_REJECTED = "fileEditRejected"
|
FILE_EDIT_REJECTED = "fileEditRejected"
|
||||||
DATA_SOURCE_ACCESS = "dataSourceAccess"
|
DATA_SOURCE_ACCESS = "dataSourceAccess"
|
||||||
VOICE_RESPONSE = "voiceResponse"
|
VOICE_RESPONSE = "voiceResponse"
|
||||||
|
REVEAL_DOWNLOAD = "revealDownload"
|
||||||
FINAL = "final"
|
FINAL = "final"
|
||||||
ERROR = "error"
|
ERROR = "error"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,8 @@ async def runFeatureDataAgent(
|
||||||
tableFilters: Optional[Dict[str, Dict[str, str]]] = None,
|
tableFilters: Optional[Dict[str, Dict[str, str]]] = None,
|
||||||
requestLang: Optional[str] = None,
|
requestLang: Optional[str] = None,
|
||||||
neutralizeFields: Optional[Dict[str, List[str]]] = None,
|
neutralizeFields: Optional[Dict[str, List[str]]] = None,
|
||||||
|
neutralizePolicy: Optional[Dict[str, Dict[str, Any]]] = None,
|
||||||
|
neutralizationService: Optional[Any] = None,
|
||||||
maxRounds: Optional[int] = None,
|
maxRounds: Optional[int] = None,
|
||||||
maxCostCHF: Optional[float] = None,
|
maxCostCHF: Optional[float] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
|
|
@ -74,8 +76,13 @@ async def runFeatureDataAgent(
|
||||||
instanceLabel: Human-readable instance name for context.
|
instanceLabel: Human-readable instance name for context.
|
||||||
tableFilters: Per-table record filters from FeatureDataSource.recordFilter.
|
tableFilters: Per-table record filters from FeatureDataSource.recordFilter.
|
||||||
requestLang: ISO 639-1 code for resolving multilingual table labels in the schema prompt.
|
requestLang: ISO 639-1 code for resolving multilingual table labels in the schema prompt.
|
||||||
neutralizeFields: Per-table list of field names to mask with placeholders
|
neutralizeFields: LEGACY per-table list of field names for whole-value masking.
|
||||||
before returning data to the AI.
|
neutralizePolicy: Per-table type/inheritance-aware neutralization policy
|
||||||
|
({"tableActive": bool, "explicitFields": set}) applied via the provider's
|
||||||
|
finalizeRowsAsync (A2 rules: strings substring-neutralized when effective,
|
||||||
|
binary dropped, other scalars only when explicit).
|
||||||
|
neutralizationService: Mandate/instance-scoped NeutralizationService used for
|
||||||
|
substring neutralization of string cells.
|
||||||
maxRounds: Inherited from the parent agent's configured `maxRounds`
|
maxRounds: Inherited from the parent agent's configured `maxRounds`
|
||||||
(workspace user setting `maxAgentRounds` -> `AgentConfig.maxRounds`).
|
(workspace user setting `maxAgentRounds` -> `AgentConfig.maxRounds`).
|
||||||
Falls back to the legacy 8-round default when not provided so direct
|
Falls back to the legacy 8-round default when not provided so direct
|
||||||
|
|
@ -87,7 +94,12 @@ async def runFeatureDataAgent(
|
||||||
Plain-text answer produced by the sub-agent.
|
Plain-text answer produced by the sub-agent.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
provider = FeatureDataProvider(dbConnector, neutralizeFields=neutralizeFields)
|
provider = FeatureDataProvider(
|
||||||
|
dbConnector,
|
||||||
|
neutralizeFields=neutralizeFields,
|
||||||
|
neutralizePolicy=neutralizePolicy,
|
||||||
|
neutralizationService=neutralizationService,
|
||||||
|
)
|
||||||
validator = _buildValidatorForFeature(featureCode)
|
validator = _buildValidatorForFeature(featureCode)
|
||||||
registry = _buildSubAgentTools(provider, featureInstanceId, mandateId, tableFilters or {}, validator=validator)
|
registry = _buildSubAgentTools(provider, featureInstanceId, mandateId, tableFilters or {}, validator=validator)
|
||||||
|
|
||||||
|
|
@ -207,6 +219,8 @@ def _buildSubAgentTools(
|
||||||
offset=offset,
|
offset=offset,
|
||||||
extraFilters=_recordFilterToList(tableName),
|
extraFilters=_recordFilterToList(tableName),
|
||||||
)
|
)
|
||||||
|
if hasattr(provider, "finalizeRowsAsync") and "rows" in result:
|
||||||
|
result["rows"] = await provider.finalizeRowsAsync(tableName, result["rows"])
|
||||||
return ToolResult(
|
return ToolResult(
|
||||||
toolCallId="", toolName="browseTable",
|
toolCallId="", toolName="browseTable",
|
||||||
success="error" not in result,
|
success="error" not in result,
|
||||||
|
|
@ -237,6 +251,8 @@ def _buildSubAgentTools(
|
||||||
offset=offset,
|
offset=offset,
|
||||||
extraFilters=_recordFilterToList(tableName),
|
extraFilters=_recordFilterToList(tableName),
|
||||||
)
|
)
|
||||||
|
if hasattr(provider, "finalizeRowsAsync") and "rows" in result:
|
||||||
|
result["rows"] = await provider.finalizeRowsAsync(tableName, result["rows"])
|
||||||
return ToolResult(
|
return ToolResult(
|
||||||
toolCallId="", toolName="queryTable",
|
toolCallId="", toolName="queryTable",
|
||||||
success="error" not in result,
|
success="error" not in result,
|
||||||
|
|
@ -271,6 +287,8 @@ def _buildSubAgentTools(
|
||||||
groupBy=groupBy,
|
groupBy=groupBy,
|
||||||
extraFilters=combinedFilters or None,
|
extraFilters=combinedFilters or None,
|
||||||
)
|
)
|
||||||
|
if hasattr(provider, "finalizeRowsAsync") and "rows" in result:
|
||||||
|
result["rows"] = await provider.finalizeRowsAsync(tableName, result["rows"])
|
||||||
return ToolResult(
|
return ToolResult(
|
||||||
toolCallId="", toolName="aggregateTable",
|
toolCallId="", toolName="aggregateTable",
|
||||||
success="error" not in result,
|
success="error" not in result,
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ feature table. All queries are automatically filtered by featureInstanceId
|
||||||
and mandateId so data isolation is guaranteed.
|
and mandateId so data isolation is guaranteed.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
|
|
@ -62,18 +63,36 @@ _ALLOWED_AGGREGATES = {"SUM", "COUNT", "AVG", "MIN", "MAX"}
|
||||||
class FeatureDataProvider:
|
class FeatureDataProvider:
|
||||||
"""Reads feature-instance data from the DB using DATA_OBJECTS metadata."""
|
"""Reads feature-instance data from the DB using DATA_OBJECTS metadata."""
|
||||||
|
|
||||||
def __init__(self, dbConnector, neutralizeFields: Optional[Dict[str, List[str]]] = None):
|
def __init__(
|
||||||
|
self,
|
||||||
|
dbConnector,
|
||||||
|
neutralizeFields: Optional[Dict[str, List[str]]] = None,
|
||||||
|
neutralizePolicy: Optional[Dict[str, Dict[str, Any]]] = None,
|
||||||
|
neutralizationService: Optional[Any] = None,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
dbConnector: A connectorDbPostgre.DatabaseConnector with an open connection.
|
dbConnector: A connectorDbPostgre.DatabaseConnector with an open connection.
|
||||||
neutralizeFields: Per-table field names whose values must be replaced
|
neutralizeFields: LEGACY per-table field names whose values are replaced
|
||||||
with placeholders before returning to the AI, e.g.
|
with a whole-value placeholder ``[NEUT.<field>.<hash>]``. Kept for
|
||||||
``{"TrusteePosition": ["firstName", "lastName", "address"]}``.
|
backward compatibility; superseded by ``neutralizePolicy``.
|
||||||
|
neutralizePolicy: Per-table type/inheritance-aware policy, e.g.
|
||||||
|
``{"TrusteePosition": {"tableActive": True, "explicitFields": {"iban"}}}``.
|
||||||
|
* ``tableActive`` -- effective (own/inherited) table-level neutralize flag.
|
||||||
|
* ``explicitFields`` -- fields whose neutralize flag is set EXPLICITLY.
|
||||||
|
Applied via :meth:`finalizeRowsAsync` following the A2 rules:
|
||||||
|
strings substring-neutralized when effective (explicit or inherited),
|
||||||
|
binary dropped, other scalars only when explicit.
|
||||||
|
neutralizationService: The mandate/instance-scoped NeutralizationService
|
||||||
|
used for substring neutralization of string cells (reuses the standard
|
||||||
|
neutralization engine; no external LLM is introduced here).
|
||||||
"""
|
"""
|
||||||
self._db = dbConnector
|
self._db = dbConnector
|
||||||
self._neutralizeFields: Dict[str, Set[str]] = {
|
self._neutralizeFields: Dict[str, Set[str]] = {
|
||||||
tbl: set(fields) for tbl, fields in (neutralizeFields or {}).items()
|
tbl: set(fields) for tbl, fields in (neutralizeFields or {}).items()
|
||||||
}
|
}
|
||||||
|
self._neutralizePolicy: Dict[str, Dict[str, Any]] = neutralizePolicy or {}
|
||||||
|
self._neutralizer = neutralizationService
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# public API (called by FeatureDataAgent tools)
|
# public API (called by FeatureDataAgent tools)
|
||||||
|
|
@ -108,12 +127,27 @@ class FeatureDataProvider:
|
||||||
logger.warning(f"getActualColumns({tableName}) failed: {e}")
|
logger.warning(f"getActualColumns({tableName}) failed: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _applyFieldNeutralization(self, tableName: str, rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
async def finalizeRowsAsync(self, tableName: str, rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
"""Neutralize sensitive field values in query results before they reach the AI."""
|
"""Make raw DB rows AI-safe: apply the field-neutralization policy and JSON-serialize.
|
||||||
fieldsToNeut = self._neutralizeFields.get(tableName)
|
|
||||||
if not fieldsToNeut:
|
The query methods (``browseTable``/``queryTable``/``aggregateTable``) return RAW
|
||||||
return rows
|
rows so this step can see the real Python types (bytes vs str vs scalar).
|
||||||
return [_neutralizeRowFields(row, fieldsToNeut) for row in rows]
|
|
||||||
|
* When a rich ``neutralizePolicy`` exists for the table, the A2 type/inheritance
|
||||||
|
rules apply (see :func:`_neutralizeAndSerializeRows`).
|
||||||
|
* Otherwise the legacy whole-value behavior is preserved (``neutralizeFields``).
|
||||||
|
* With no neutralization at all, rows are just JSON-serialized.
|
||||||
|
|
||||||
|
Always returns JSON-serializable rows.
|
||||||
|
"""
|
||||||
|
policy = self._neutralizePolicy.get(tableName)
|
||||||
|
if policy:
|
||||||
|
return await _neutralizeAndSerializeRows(rows, policy, self._neutralizer)
|
||||||
|
serialized = [_serializeRow(dict(r)) for r in rows]
|
||||||
|
legacyFields = self._neutralizeFields.get(tableName)
|
||||||
|
if legacyFields:
|
||||||
|
serialized = [_neutralizeRowFields(row, legacyFields) for row in serialized]
|
||||||
|
return serialized
|
||||||
|
|
||||||
def browseTable(
|
def browseTable(
|
||||||
self,
|
self,
|
||||||
|
|
@ -162,9 +196,10 @@ class FeatureDataProvider:
|
||||||
f'ORDER BY "id" LIMIT %s OFFSET %s'
|
f'ORDER BY "id" LIMIT %s OFFSET %s'
|
||||||
)
|
)
|
||||||
cur.execute(dataSql, allParams + [limit, offset])
|
cur.execute(dataSql, allParams + [limit, offset])
|
||||||
rows = [_serializeRow(dict(r)) for r in cur.fetchall()]
|
# Return RAW rows; neutralization + JSON-serialization happen in
|
||||||
|
# finalizeRowsAsync (needs the real Python types to apply A2 rules).
|
||||||
|
rows = [dict(r) for r in cur.fetchall()]
|
||||||
|
|
||||||
rows = self._applyFieldNeutralization(tableName, rows)
|
|
||||||
result = {"rows": rows, "total": total, "limit": limit, "offset": offset}
|
result = {"rows": rows, "total": total, "limit": limit, "offset": offset}
|
||||||
_debugQueryLog("browseTable", tableName, {
|
_debugQueryLog("browseTable", tableName, {
|
||||||
"fields": fields, "limit": limit, "offset": offset,
|
"fields": fields, "limit": limit, "offset": offset,
|
||||||
|
|
@ -226,9 +261,8 @@ class FeatureDataProvider:
|
||||||
f'FROM "{tableName}" WHERE {fullWhere}'
|
f'FROM "{tableName}" WHERE {fullWhere}'
|
||||||
)
|
)
|
||||||
cur.execute(sql, allParams)
|
cur.execute(sql, allParams)
|
||||||
rows = [_serializeRow(dict(r)) for r in cur.fetchall()]
|
rows = [dict(r) for r in cur.fetchall()]
|
||||||
|
|
||||||
rows = self._applyFieldNeutralization(tableName, rows)
|
|
||||||
result = {
|
result = {
|
||||||
"rows": rows,
|
"rows": rows,
|
||||||
"aggregate": aggregate,
|
"aggregate": aggregate,
|
||||||
|
|
@ -300,9 +334,8 @@ class FeatureDataProvider:
|
||||||
f'WHERE {fullWhere} {orderClause} LIMIT %s OFFSET %s'
|
f'WHERE {fullWhere} {orderClause} LIMIT %s OFFSET %s'
|
||||||
)
|
)
|
||||||
cur.execute(dataSql, allParams + [limit, offset])
|
cur.execute(dataSql, allParams + [limit, offset])
|
||||||
rows = [_serializeRow(dict(r)) for r in cur.fetchall()]
|
rows = [dict(r) for r in cur.fetchall()]
|
||||||
|
|
||||||
rows = self._applyFieldNeutralization(tableName, rows)
|
|
||||||
result = {"rows": rows, "total": total, "limit": limit, "offset": offset}
|
result = {"rows": rows, "total": total, "limit": limit, "offset": offset}
|
||||||
_debugQueryLog("queryTable", tableName, {
|
_debugQueryLog("queryTable", tableName, {
|
||||||
"filters": filters, "fields": fields, "orderBy": orderBy,
|
"filters": filters, "fields": fields, "orderBy": orderBy,
|
||||||
|
|
@ -437,3 +470,142 @@ def _neutralizeRowFields(row: Dict[str, Any], fieldsToNeutralize: Set[str]) -> D
|
||||||
shortHash = hashlib.sha256(str(val).encode()).hexdigest()[:8]
|
shortHash = hashlib.sha256(str(val).encode()).hexdigest()[:8]
|
||||||
row[field] = f"[{_PLACEHOLDER_PREFIX}.{field}.{shortHash}]"
|
row[field] = f"[{_PLACEHOLDER_PREFIX}.{field}.{shortHash}]"
|
||||||
return row
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# A2: type / inheritance-aware field neutralization for source data
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# Rules (see wiki neutralization.md Failsafe 5/6):
|
||||||
|
# 1. STRING (incl. JSON/markdown/code -- anything textual): substring-neutralize
|
||||||
|
# via the private NeutralizationService whenever neutralize is EFFECTIVE for the
|
||||||
|
# field (explicit OR inherited). The placeholders stay embedded in the text so the
|
||||||
|
# record remains usable; the field name is passed as a type hint.
|
||||||
|
# 2. BINARY (bytes): never neutralized -- the column is DROPPED when neutralization
|
||||||
|
# applies to the table/field.
|
||||||
|
# 3. OTHER SCALARS (number/float/int/date/bool): neutralized (whole-value placeholder)
|
||||||
|
# ONLY when the field flag is set EXPLICITLY -- never via inheritance.
|
||||||
|
|
||||||
|
_NEUT_CONCURRENCY = 4
|
||||||
|
|
||||||
|
|
||||||
|
def _isStructuralField(key: str) -> bool:
|
||||||
|
"""Identifiers / system columns are references, not PII content -- never neutralized.
|
||||||
|
|
||||||
|
Excludes primary/foreign keys and audit columns so neutralization never corrupts
|
||||||
|
record references and never wastes an LLM call on a UUID/enum value.
|
||||||
|
"""
|
||||||
|
if key.startswith("_") or key.startswith("sys"):
|
||||||
|
return True
|
||||||
|
if key == "id" or key.endswith("Id") or key.endswith("_id"):
|
||||||
|
return True
|
||||||
|
return key in ("mandateId", "featureInstanceId", "instanceId", "createdBy", "updatedBy")
|
||||||
|
|
||||||
|
|
||||||
|
def _isTextValue(value: Any) -> bool:
|
||||||
|
"""True for values that should be treated as neutralizable text (str/JSON-ish)."""
|
||||||
|
return isinstance(value, str) or isinstance(value, (dict, list))
|
||||||
|
|
||||||
|
|
||||||
|
async def _neutralizeOneText(fieldName: str, text: str, neutralizer: Any) -> Optional[str]:
|
||||||
|
"""Substring-neutralize a single text value, using the field name as a type hint.
|
||||||
|
|
||||||
|
The hint (``"<field>: "``) is prepended so the private LLM can infer the entity
|
||||||
|
type for short PII columns, then stripped from the result. On any prefix mismatch
|
||||||
|
or error the cell is fail-safe redacted (never returns the raw value).
|
||||||
|
"""
|
||||||
|
prefix = f"{fieldName}: "
|
||||||
|
try:
|
||||||
|
result = await neutralizer.processTextAsync(prefix + text)
|
||||||
|
except Exception as e: # noqa: BLE001 - neutralization must fail closed
|
||||||
|
logger.warning("field neutralization failed for '%s': %s", fieldName, e)
|
||||||
|
return "[REDACTED]"
|
||||||
|
out = result.get("neutralized_text") if isinstance(result, dict) else None
|
||||||
|
if not isinstance(out, str):
|
||||||
|
return "[REDACTED]"
|
||||||
|
if out.startswith(prefix):
|
||||||
|
return out[len(prefix):]
|
||||||
|
# Engine altered the hint prefix (rare) -- fail closed rather than leak.
|
||||||
|
logger.warning("field neutralization prefix mismatch for '%s'; redacting", fieldName)
|
||||||
|
return "[REDACTED]"
|
||||||
|
|
||||||
|
|
||||||
|
async def _neutralizeAndSerializeRows(
|
||||||
|
rows: List[Dict[str, Any]],
|
||||||
|
policy: Dict[str, Any],
|
||||||
|
neutralizer: Any,
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Apply the A2 field-neutralization rules to raw rows and JSON-serialize them."""
|
||||||
|
tableActive = bool(policy.get("tableActive"))
|
||||||
|
explicitFields: Set[str] = set(policy.get("explicitFields") or [])
|
||||||
|
|
||||||
|
outRows: List[Dict[str, Any]] = []
|
||||||
|
# (fieldName, originalText) -> neutralizedText (dedup across the whole result set)
|
||||||
|
pending: Dict[tuple, Optional[str]] = {}
|
||||||
|
cellRefs: List[tuple] = [] # (rowIdx, key, fieldName, originalText)
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
out: Dict[str, Any] = {}
|
||||||
|
for key, value in row.items():
|
||||||
|
fieldExplicit = key in explicitFields
|
||||||
|
fieldEffective = fieldExplicit or tableActive
|
||||||
|
|
||||||
|
if value is None:
|
||||||
|
out[key] = None
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Identifiers / system columns: serialize but never neutralize.
|
||||||
|
if _isStructuralField(key):
|
||||||
|
out[key] = value.isoformat() if hasattr(value, "isoformat") else (
|
||||||
|
f"<binary {len(value)} bytes>" if isinstance(value, (bytes, bytearray)) else value
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if isinstance(value, (bytes, bytearray)):
|
||||||
|
# Rule 2: binary is dropped when neutralization applies; else legacy marker.
|
||||||
|
if tableActive or fieldExplicit:
|
||||||
|
continue
|
||||||
|
out[key] = f"<binary {len(value)} bytes>"
|
||||||
|
continue
|
||||||
|
|
||||||
|
if _isTextValue(value):
|
||||||
|
textVal = value if isinstance(value, str) else json.dumps(value, ensure_ascii=False, default=str)
|
||||||
|
if fieldEffective and textVal != "":
|
||||||
|
pending.setdefault((key, textVal), None)
|
||||||
|
cellRefs.append((len(outRows), key, key, textVal))
|
||||||
|
out[key] = textVal
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Rule 3: other scalars (number/float/int/date/bool) -- explicit only.
|
||||||
|
serialized = value.isoformat() if hasattr(value, "isoformat") else value
|
||||||
|
if fieldExplicit:
|
||||||
|
shortHash = hashlib.sha256(str(value).encode()).hexdigest()[:8]
|
||||||
|
out[key] = f"[{_PLACEHOLDER_PREFIX}.{key}.{shortHash}]"
|
||||||
|
else:
|
||||||
|
out[key] = serialized
|
||||||
|
outRows.append(out)
|
||||||
|
|
||||||
|
if not cellRefs:
|
||||||
|
return outRows
|
||||||
|
|
||||||
|
if neutralizer is None or not hasattr(neutralizer, "processTextAsync"):
|
||||||
|
# Fail-safe: neutralization required but no engine -> redact the affected cells.
|
||||||
|
for rowIdx, key, _fieldName, _origText in cellRefs:
|
||||||
|
outRows[rowIdx][key] = "[REDACTED]"
|
||||||
|
return outRows
|
||||||
|
|
||||||
|
sem = asyncio.Semaphore(_NEUT_CONCURRENCY)
|
||||||
|
|
||||||
|
async def _resolvePair(fieldName: str, origText: str) -> None:
|
||||||
|
async with sem:
|
||||||
|
pending[(fieldName, origText)] = await _neutralizeOneText(fieldName, origText, neutralizer)
|
||||||
|
|
||||||
|
await asyncio.gather(*[
|
||||||
|
_resolvePair(fieldName, origText) for (fieldName, origText) in pending.keys()
|
||||||
|
])
|
||||||
|
|
||||||
|
for rowIdx, key, fieldName, origText in cellRefs:
|
||||||
|
neutralized = pending.get((fieldName, origText))
|
||||||
|
if neutralized is not None:
|
||||||
|
outRows[rowIdx][key] = neutralized
|
||||||
|
return outRows
|
||||||
|
|
|
||||||
|
|
@ -231,6 +231,22 @@ def _registerDefaultToolboxes() -> None:
|
||||||
"trustee_refreshAccountingData",
|
"trustee_refreshAccountingData",
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
|
ToolboxDefinition(
|
||||||
|
id="neutralization",
|
||||||
|
label="Neutralization / Reveal",
|
||||||
|
description=(
|
||||||
|
"Privacy-sensitive de-neutralization. NOT active by default - must be "
|
||||||
|
"explicitly requested. Contains revealDocument, which resolves "
|
||||||
|
"neutralization placeholders ([type.uuid]) back to cleartext using ONLY "
|
||||||
|
"the local mapping (no external LLM) and returns the result as a "
|
||||||
|
"transient one-time download. Cleartext is never saved, indexed, or kept "
|
||||||
|
"in the chat history."
|
||||||
|
),
|
||||||
|
isDefault=False,
|
||||||
|
tools=[
|
||||||
|
"revealDocument",
|
||||||
|
],
|
||||||
|
),
|
||||||
]
|
]
|
||||||
for tb in defaults:
|
for tb in defaults:
|
||||||
_toolboxRegistry.registerToolbox(tb)
|
_toolboxRegistry.registerToolbox(tb)
|
||||||
|
|
|
||||||
|
|
@ -160,8 +160,11 @@ class AiService:
|
||||||
3. billingCallback on aiObjects: records one billing transaction per model call
|
3. billingCallback on aiObjects: records one billing transaction per model call
|
||||||
with exact provider + model name (set before AI call, invoked by _callWithModel)
|
with exact provider + model name (set before AI call, invoked by _callWithModel)
|
||||||
|
|
||||||
NEUTRALIZATION: If enabled, prompt text is neutralized before the AI call
|
NEUTRALIZATION: If enabled, prompt text is neutralized before the AI call.
|
||||||
and placeholders in the response are rehydrated afterwards.
|
The response is persisted exactly as returned by the model (placeholders are
|
||||||
|
NOT rehydrated/re-saved with cleartext -- that would defeat neutralization).
|
||||||
|
De-neutralization for download is an explicit, transient action via the
|
||||||
|
agent's `revealDocument` tool (no save/index).
|
||||||
"""
|
"""
|
||||||
await self.ensureAiObjectsInitialized()
|
await self.ensureAiObjectsInitialized()
|
||||||
|
|
||||||
|
|
@ -241,7 +244,9 @@ class AiService:
|
||||||
"""Streaming variant of callAi. Yields str deltas during generation, then final AiCallResponse.
|
"""Streaming variant of callAi. Yields str deltas during generation, then final AiCallResponse.
|
||||||
|
|
||||||
NEUTRALIZATION: If enabled, prompt text is neutralized before streaming.
|
NEUTRALIZATION: If enabled, prompt text is neutralized before streaming.
|
||||||
Rehydration happens on the final AiCallResponse (not on individual str deltas).
|
The streamed/persisted response keeps placeholders as returned by the model
|
||||||
|
(no cleartext re-hydration into storage). Use the agent's `revealDocument`
|
||||||
|
tool for an explicit, transient de-neutralization for download.
|
||||||
"""
|
"""
|
||||||
await self.ensureAiObjectsInitialized()
|
await self.ensureAiObjectsInitialized()
|
||||||
|
|
||||||
|
|
@ -623,7 +628,7 @@ detectedIntent-Werte:
|
||||||
return basePrompt
|
return basePrompt
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# NEUTRALIZATION: Centralized prompt neutralization / response rehydration
|
# NEUTRALIZATION: Centralized prompt neutralization (no response rehydration)
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
async def _hasNeutralizationModel(self) -> bool:
|
async def _hasNeutralizationModel(self) -> bool:
|
||||||
|
|
@ -920,20 +925,6 @@ detectedIntent-Werte:
|
||||||
logger.info(f"_neutralizeRequest complete: neutralized={_wasNeutralized}, excluded={len(excludedDocs)}")
|
logger.info(f"_neutralizeRequest complete: neutralized={_wasNeutralized}, excluded={len(excludedDocs)}")
|
||||||
return request, _wasNeutralized, excludedDocs
|
return request, _wasNeutralized, excludedDocs
|
||||||
|
|
||||||
def _rehydrateResponse(self, responseText: str) -> str:
|
|
||||||
"""Replace neutralization placeholders with original values in AI response."""
|
|
||||||
if not responseText:
|
|
||||||
return responseText
|
|
||||||
try:
|
|
||||||
neutralSvc = self._get_service("neutralization")
|
|
||||||
if not neutralSvc or not hasattr(neutralSvc, 'resolveText'):
|
|
||||||
return responseText
|
|
||||||
resolved = neutralSvc.resolveText(responseText)
|
|
||||||
return resolved if resolved else responseText
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Response rehydration failed: {e}")
|
|
||||||
return responseText
|
|
||||||
|
|
||||||
def _preflightBillingCheck(self) -> None:
|
def _preflightBillingCheck(self) -> None:
|
||||||
"""
|
"""
|
||||||
Pre-flight billing validation - like a 0 CHF credit card authorization check.
|
Pre-flight billing validation - like a 0 CHF credit card authorization check.
|
||||||
|
|
@ -1689,7 +1680,8 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
language: str,
|
language: str,
|
||||||
title: str,
|
title: str,
|
||||||
userPrompt: str,
|
userPrompt: str,
|
||||||
parentOperationId: str
|
parentOperationId: str,
|
||||||
|
documentTheme: Optional[str] = None
|
||||||
) -> List[RenderedDocument]:
|
) -> List[RenderedDocument]:
|
||||||
"""
|
"""
|
||||||
Phase 5E: Rendert gefüllte Struktur zum Ziel-Format.
|
Phase 5E: Rendert gefüllte Struktur zum Ziel-Format.
|
||||||
|
|
@ -1741,7 +1733,8 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
title,
|
title,
|
||||||
userPrompt,
|
userPrompt,
|
||||||
self,
|
self,
|
||||||
parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
|
parentOperationId=renderOperationId, # Parent-Referenz für ChatLog-Hierarchie
|
||||||
|
documentTheme=documentTheme
|
||||||
)
|
)
|
||||||
|
|
||||||
# ChatLog abschließen
|
# ChatLog abschließen
|
||||||
|
|
@ -1783,7 +1776,8 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
outputFormat: Optional[str] = None,
|
outputFormat: Optional[str] = None,
|
||||||
title: Optional[str] = None,
|
title: Optional[str] = None,
|
||||||
parentOperationId: Optional[str] = None,
|
parentOperationId: Optional[str] = None,
|
||||||
generationIntent: Optional[str] = None # NEW: Explicit intent from action (skips detection)
|
generationIntent: Optional[str] = None, # NEW: Explicit intent from action (skips detection)
|
||||||
|
documentTheme: Optional[str] = None # Named style preset for document rendering
|
||||||
) -> AiResponse:
|
) -> AiResponse:
|
||||||
"""
|
"""
|
||||||
Unified AI content generation with explicit intent requirement.
|
Unified AI content generation with explicit intent requirement.
|
||||||
|
|
@ -1802,6 +1796,8 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
parentOperationId: Optional parent operation ID for hierarchical logging
|
parentOperationId: Optional parent operation ID for hierarchical logging
|
||||||
generationIntent: REQUIRED explicit intent ("document" | "code" | "image") from action.
|
generationIntent: REQUIRED explicit intent ("document" | "code" | "image") from action.
|
||||||
NO auto-detection - actions must explicitly specify intent.
|
NO auto-detection - actions must explicitly specify intent.
|
||||||
|
documentTheme: Optional named style preset (general/finance/legal/technical/
|
||||||
|
hr/marketing) forwarded to the renderer for document generation.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
AiResponse with content, metadata, and optional documents
|
AiResponse with content, metadata, and optional documents
|
||||||
|
|
@ -1872,7 +1868,8 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
contentParts=contentParts,
|
contentParts=contentParts,
|
||||||
outputFormat=outputFormat,
|
outputFormat=outputFormat,
|
||||||
title=title,
|
title=title,
|
||||||
parentOperationId=parentOperationId
|
parentOperationId=parentOperationId,
|
||||||
|
documentTheme=documentTheme
|
||||||
)
|
)
|
||||||
|
|
||||||
# DATA_EXTRACT: Extract content from documents and process with AI (no structure generation)
|
# DATA_EXTRACT: Extract content from documents and process with AI (no structure generation)
|
||||||
|
|
@ -2088,7 +2085,8 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
contentParts: Optional[List[ContentPart]],
|
contentParts: Optional[List[ContentPart]],
|
||||||
outputFormat: str,
|
outputFormat: str,
|
||||||
title: str,
|
title: str,
|
||||||
parentOperationId: Optional[str]
|
parentOperationId: Optional[str],
|
||||||
|
documentTheme: Optional[str] = None
|
||||||
) -> AiResponse:
|
) -> AiResponse:
|
||||||
"""Handle document generation using document generation path."""
|
"""Handle document generation using document generation path."""
|
||||||
from modules.serviceCenter.services.serviceGeneration.paths.documentPath import DocumentGenerationPath
|
from modules.serviceCenter.services.serviceGeneration.paths.documentPath import DocumentGenerationPath
|
||||||
|
|
@ -2105,7 +2103,8 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
contentParts=contentParts,
|
contentParts=contentParts,
|
||||||
outputFormat=outputFormat,
|
outputFormat=outputFormat,
|
||||||
title=title or "Generated Document",
|
title=title or "Generated Document",
|
||||||
parentOperationId=parentOperationId
|
parentOperationId=parentOperationId,
|
||||||
|
documentTheme=documentTheme
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ from modules.interfaces.interfaceDbBilling import getInterface as getBillingInte
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Markup percentage for internal pricing (+50% für Infrastruktur und Platform Service + 50% für Währungsrisiko ==> Faktor 2.0)
|
# Markup percentage on the AI base price (400% ==> Faktor 5.0: Infrastruktur, Platform Service, Währungsrisiko)
|
||||||
BILLING_MARKUP_PERCENT = 400
|
BILLING_MARKUP_PERCENT = 400
|
||||||
|
|
||||||
# Singleton cache
|
# Singleton cache
|
||||||
|
|
@ -150,7 +150,7 @@ class BillingService:
|
||||||
if basePriceCHF <= 0:
|
if basePriceCHF <= 0:
|
||||||
return 0.0
|
return 0.0
|
||||||
|
|
||||||
# Apply markup (50% = multiply by 1.5)
|
# Apply markup (400% = multiply by 5.0)
|
||||||
markup_multiplier = 1 + (BILLING_MARKUP_PERCENT / 100)
|
markup_multiplier = 1 + (BILLING_MARKUP_PERCENT / 100)
|
||||||
return round(basePriceCHF * markup_multiplier, 6)
|
return round(basePriceCHF * markup_multiplier, 6)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -383,7 +383,7 @@ class GenerationService:
|
||||||
'workflowId': 'unknown'
|
'workflowId': 'unknown'
|
||||||
}
|
}
|
||||||
|
|
||||||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None) -> List[RenderedDocument]:
|
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None, documentTheme: Optional[str] = None, imageResolver=None) -> List[RenderedDocument]:
|
||||||
"""
|
"""
|
||||||
Render extracted JSON content to the specified output format.
|
Render extracted JSON content to the specified output format.
|
||||||
Processes EACH document separately and calls renderer for each.
|
Processes EACH document separately and calls renderer for each.
|
||||||
|
|
@ -401,13 +401,20 @@ class GenerationService:
|
||||||
aiService: AI service instance for generation prompt creation
|
aiService: AI service instance for generation prompt creation
|
||||||
parentOperationId: Optional parent operation ID for hierarchical logging
|
parentOperationId: Optional parent operation ID for hierarchical logging
|
||||||
style: Optional style overrides (deep-merged with DEFAULT_STYLE)
|
style: Optional style overrides (deep-merged with DEFAULT_STYLE)
|
||||||
|
documentTheme: Optional named theme preset (general/finance/legal/
|
||||||
|
technical/hr/marketing). Resolved as DEFAULT_STYLE <- preset <- style,
|
||||||
|
so an explicit ``style`` override always wins.
|
||||||
|
imageResolver: Optional callable ``fileId -> bytes`` for lazy, on-demand
|
||||||
|
resolution of block images that carry only a ``fileId`` (no embedded
|
||||||
|
base64). Lets large documents avoid holding every image's bytes in
|
||||||
|
the JSON simultaneously. When None, images must be pre-embedded.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of RenderedDocument objects.
|
List of RenderedDocument objects.
|
||||||
Each RenderedDocument represents one rendered file (main document or supporting file)
|
Each RenderedDocument represents one rendered file (main document or supporting file)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
resolvedStyle = resolveStyle(style)
|
resolvedStyle = resolveStyle(style, documentTheme)
|
||||||
# Validate JSON input
|
# Validate JSON input
|
||||||
if not isinstance(extractedContent, dict):
|
if not isinstance(extractedContent, dict):
|
||||||
raise ValueError("extractedContent must be a JSON dictionary")
|
raise ValueError("extractedContent must be a JSON dictionary")
|
||||||
|
|
@ -451,6 +458,11 @@ class GenerationService:
|
||||||
if not renderer:
|
if not renderer:
|
||||||
logger.warning(f"Unsupported format '{docFormat}' for document {doc.get('id', docIndex)}, skipping")
|
logger.warning(f"Unsupported format '{docFormat}' for document {doc.get('id', docIndex)}, skipping")
|
||||||
continue
|
continue
|
||||||
|
# Provide the per-render lazy image resolver (fileId -> bytes) so
|
||||||
|
# renderers can fetch block images on demand instead of relying on
|
||||||
|
# all bytes being pre-embedded in the document JSON.
|
||||||
|
if imageResolver is not None:
|
||||||
|
renderer._imageResolver = imageResolver
|
||||||
|
|
||||||
# Check output style classification (code/document/image/etc.) from renderer
|
# Check output style classification (code/document/image/etc.) from renderer
|
||||||
from .renderers.registry import getOutputStyle
|
from .renderers.registry import getOutputStyle
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,8 @@ class DocumentGenerationPath:
|
||||||
contentParts: Optional[List[ContentPart]] = None,
|
contentParts: Optional[List[ContentPart]] = None,
|
||||||
outputFormat: str = "txt",
|
outputFormat: str = "txt",
|
||||||
title: Optional[str] = None,
|
title: Optional[str] = None,
|
||||||
parentOperationId: Optional[str] = None
|
parentOperationId: Optional[str] = None,
|
||||||
|
documentTheme: Optional[str] = None
|
||||||
) -> AiResponse:
|
) -> AiResponse:
|
||||||
"""
|
"""
|
||||||
Generate document using existing chapter/section model.
|
Generate document using existing chapter/section model.
|
||||||
|
|
@ -165,7 +166,8 @@ class DocumentGenerationPath:
|
||||||
language, # Global fallback (per-document language extracted from structure in renderReport)
|
language, # Global fallback (per-document language extracted from structure in renderReport)
|
||||||
title or "Generated Document",
|
title or "Generated Document",
|
||||||
userPrompt,
|
userPrompt,
|
||||||
docOperationId
|
docOperationId,
|
||||||
|
documentTheme=documentTheme
|
||||||
)
|
)
|
||||||
|
|
||||||
# Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
|
# Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,42 @@ class BaseRenderer(ABC):
|
||||||
def __init__(self, services=None):
|
def __init__(self, services=None):
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.services = services # Add services attribute
|
self.services = services # Add services attribute
|
||||||
|
# Optional per-render lazy image resolver: fileId -> raw bytes. Set by
|
||||||
|
# GenerationService.renderReport. When present, block images that only
|
||||||
|
# carry a fileId (no embedded base64Data) are fetched on demand during
|
||||||
|
# rendering, so the document JSON never holds all image bytes at once.
|
||||||
|
self._imageResolver = None
|
||||||
|
|
||||||
|
def _lazyResolveImageBase64(self, *sources) -> str:
|
||||||
|
"""Resolve a fileId/_fileRef (found in any of the given dicts) to base64.
|
||||||
|
|
||||||
|
Returns "" when no resolver is configured, no id is present, or the
|
||||||
|
fetch fails/empty. The raw bytes are held only transiently here; nothing
|
||||||
|
is written back into the source JSON, keeping peak memory ~ one image.
|
||||||
|
"""
|
||||||
|
resolver = getattr(self, "_imageResolver", None)
|
||||||
|
if not callable(resolver):
|
||||||
|
return ""
|
||||||
|
fileId = ""
|
||||||
|
for src in sources:
|
||||||
|
if isinstance(src, dict):
|
||||||
|
fileId = src.get("fileId") or src.get("_fileRef") or ""
|
||||||
|
if fileId:
|
||||||
|
break
|
||||||
|
if not fileId:
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
rawBytes = resolver(fileId)
|
||||||
|
except Exception as e: # noqa: BLE001 - a single bad image must not abort the doc
|
||||||
|
self.logger.warning(f"lazy image resolve failed for fileId={fileId}: {e}")
|
||||||
|
return ""
|
||||||
|
if not rawBytes:
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
return base64.b64encode(rawBytes).decode("ascii")
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
self.logger.warning(f"lazy image encode failed for fileId={fileId}: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getSupportedFormats(cls) -> List[str]:
|
def getSupportedFormats(cls) -> List[str]:
|
||||||
|
|
|
||||||
|
|
@ -158,8 +158,14 @@ class RendererDocx(BaseRenderer):
|
||||||
# Fallback to metadata.title only if title parameter is empty
|
# Fallback to metadata.title only if title parameter is empty
|
||||||
document_title = title if title else metadata.get("title", "Generated Document")
|
document_title = title if title else metadata.get("title", "Generated Document")
|
||||||
|
|
||||||
|
# A cover_page section renders its own title page; skip the standalone
|
||||||
|
# title in that case so the title is not duplicated.
|
||||||
|
hasCoverPage = any(
|
||||||
|
(s.get("content_type") if isinstance(s, dict) else "") == "cover_page"
|
||||||
|
for s in sections
|
||||||
|
)
|
||||||
# Add document title using Title style
|
# Add document title using Title style
|
||||||
if document_title:
|
if document_title and not hasCoverPage:
|
||||||
doc.add_paragraph(document_title, style='Title')
|
doc.add_paragraph(document_title, style='Title')
|
||||||
|
|
||||||
# Process each section in order
|
# Process each section in order
|
||||||
|
|
@ -377,6 +383,10 @@ class RendererDocx(BaseRenderer):
|
||||||
self._renderJsonCodeBlock(doc, element, styles)
|
self._renderJsonCodeBlock(doc, element, styles)
|
||||||
elif element_type == "image":
|
elif element_type == "image":
|
||||||
self._renderJsonImage(doc, element, styles)
|
self._renderJsonImage(doc, element, styles)
|
||||||
|
elif element_type == "cover_page" or section_type == "cover_page":
|
||||||
|
self._renderCoverPage(doc, element, styles)
|
||||||
|
elif element_type == "image_grid" or section_type == "image_grid":
|
||||||
|
self._renderImageGrid(doc, element, styles)
|
||||||
else:
|
else:
|
||||||
# Fallback: if element_type not set, use section_type
|
# Fallback: if element_type not set, use section_type
|
||||||
if section_type == "table":
|
if section_type == "table":
|
||||||
|
|
@ -1030,6 +1040,89 @@ class RendererDocx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||||
|
|
||||||
|
def _imageStreamFromContent(self, content: Dict[str, Any]):
|
||||||
|
"""Return a BytesIO of image bytes from base64Data or a lazily-resolved fileId.
|
||||||
|
|
||||||
|
Returns None when nothing resolvable is present. Kept small so layout
|
||||||
|
primitives (cover/grid) share the same lazy-resolution path as block images.
|
||||||
|
"""
|
||||||
|
if not isinstance(content, dict):
|
||||||
|
return None
|
||||||
|
b64 = content.get("base64Data") or content.get("logoBase64") or ""
|
||||||
|
if not b64:
|
||||||
|
b64 = self._lazyResolveImageBase64(content)
|
||||||
|
if not b64:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return io.BytesIO(base64.b64decode(b64))
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _renderCoverPage(self, doc: Document, element: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||||
|
"""Render a cover/title page in DOCX, ending with a page break."""
|
||||||
|
try:
|
||||||
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||||
|
content = element.get("content", element) if isinstance(element, dict) else {}
|
||||||
|
if not isinstance(content, dict):
|
||||||
|
content = {}
|
||||||
|
for _ in range(6):
|
||||||
|
doc.add_paragraph("")
|
||||||
|
logoStream = self._imageStreamFromContent(content)
|
||||||
|
if logoStream is not None:
|
||||||
|
p = doc.add_paragraph()
|
||||||
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
|
p.add_run().add_picture(logoStream, width=Inches(2.0))
|
||||||
|
title = (content.get("title") or "").strip()
|
||||||
|
if title:
|
||||||
|
p = doc.add_paragraph()
|
||||||
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
|
run = p.add_run(title)
|
||||||
|
run.bold = True
|
||||||
|
run.font.size = Pt(28)
|
||||||
|
for key, sizePt in (("subtitle", 16), ("author", 12), ("date", 12)):
|
||||||
|
val = (content.get(key) or "").strip()
|
||||||
|
if not val:
|
||||||
|
continue
|
||||||
|
p = doc.add_paragraph()
|
||||||
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
|
p.add_run(val).font.size = Pt(sizePt)
|
||||||
|
doc.add_page_break()
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Error rendering cover_page: {e}")
|
||||||
|
|
||||||
|
def _renderImageGrid(self, doc: Document, element: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||||
|
"""Render an image grid (N columns) as a borderless DOCX table of pictures."""
|
||||||
|
try:
|
||||||
|
content = element.get("content", element) if isinstance(element, dict) else {}
|
||||||
|
if not isinstance(content, dict):
|
||||||
|
return
|
||||||
|
images = content.get("images") or []
|
||||||
|
if not isinstance(images, list) or not images:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
columns = max(1, int(content.get("columns", 2)))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
columns = 2
|
||||||
|
|
||||||
|
streams = [s for s in (self._imageStreamFromContent(i) for i in images) if s is not None]
|
||||||
|
if not streams:
|
||||||
|
return
|
||||||
|
import math
|
||||||
|
nrows = math.ceil(len(streams) / columns)
|
||||||
|
table = doc.add_table(rows=nrows, cols=columns)
|
||||||
|
cellWidthInches = max(1.0, 6.5 / columns - 0.1)
|
||||||
|
for idx, stream in enumerate(streams):
|
||||||
|
r, c = divmod(idx, columns)
|
||||||
|
cell = table.cell(r, c)
|
||||||
|
para = cell.paragraphs[0]
|
||||||
|
try:
|
||||||
|
para.add_run().add_picture(stream, width=Inches(cellWidthInches))
|
||||||
|
except Exception as ie:
|
||||||
|
self.logger.warning(f"image_grid cell failed: {ie}")
|
||||||
|
doc.add_paragraph("")
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Error rendering image_grid: {e}")
|
||||||
|
|
||||||
def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||||
"""Render a JSON image to DOCX."""
|
"""Render a JSON image to DOCX."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -1056,6 +1149,11 @@ class RendererDocx(BaseRenderer):
|
||||||
if not caption:
|
if not caption:
|
||||||
caption = image_data.get("caption", "")
|
caption = image_data.get("caption", "")
|
||||||
|
|
||||||
|
# Last resort: lazily resolve a fileId reference to bytes on demand
|
||||||
|
# (large-document path - keeps image bytes out of the document JSON).
|
||||||
|
if not base64_data:
|
||||||
|
base64_data = self._lazyResolveImageBase64(content if isinstance(content, dict) else None, image_data)
|
||||||
|
|
||||||
# CRITICAL: Ensure we don't render base64 data as text
|
# CRITICAL: Ensure we don't render base64 data as text
|
||||||
# If base64_data looks like it might be rendered elsewhere, skip it
|
# If base64_data looks like it might be rendered elsewhere, skip it
|
||||||
if not base64_data:
|
if not base64_data:
|
||||||
|
|
|
||||||
|
|
@ -165,6 +165,10 @@ class RendererPdf(BaseRenderer):
|
||||||
|
|
||||||
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, unifiedStyle: Dict[str, Any] = None) -> str:
|
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, unifiedStyle: Dict[str, Any] = None) -> str:
|
||||||
"""Generate PDF content from structured JSON document using AI-generated styling."""
|
"""Generate PDF content from structured JSON document using AI-generated styling."""
|
||||||
|
# Large-document streaming: image flowables are backed by temp files (reportlab
|
||||||
|
# reads them from disk at build time) so image bytes are not all resident in
|
||||||
|
# memory simultaneously. Collected here, deleted after the build.
|
||||||
|
self._tempImageFiles = []
|
||||||
try:
|
try:
|
||||||
# Get style set from unified style or legacy approach
|
# Get style set from unified style or legacy approach
|
||||||
if unifiedStyle:
|
if unifiedStyle:
|
||||||
|
|
@ -210,7 +214,13 @@ class RendererPdf(BaseRenderer):
|
||||||
document_title = (title or "").strip()
|
document_title = (title or "").strip()
|
||||||
if not document_title and isinstance(metadata, dict):
|
if not document_title and isinstance(metadata, dict):
|
||||||
document_title = (metadata.get("title") or "").strip()
|
document_title = (metadata.get("title") or "").strip()
|
||||||
if document_title:
|
# A cover_page section already renders its own title page; rendering the
|
||||||
|
# standalone document title on top of it would duplicate the title.
|
||||||
|
hasCoverPage = any(
|
||||||
|
(s.get("content_type") if isinstance(s, dict) else "") == "cover_page"
|
||||||
|
for s in sections
|
||||||
|
)
|
||||||
|
if document_title and not hasCoverPage:
|
||||||
story.append(self._paragraphFromInlineMarkdown(document_title, self._createDocumentTitleStyle(styles)))
|
story.append(self._paragraphFromInlineMarkdown(document_title, self._createDocumentTitleStyle(styles)))
|
||||||
|
|
||||||
# Process each section (sections already extracted above)
|
# Process each section (sections already extracted above)
|
||||||
|
|
@ -233,6 +243,18 @@ class RendererPdf(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
|
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
|
||||||
raise Exception(f"PDF generation failed: {str(e)}")
|
raise Exception(f"PDF generation failed: {str(e)}")
|
||||||
|
finally:
|
||||||
|
self._cleanupTempImageFiles()
|
||||||
|
|
||||||
|
def _cleanupTempImageFiles(self) -> None:
|
||||||
|
"""Delete temp image files created for streamed (file-backed) PDF images."""
|
||||||
|
import os
|
||||||
|
for path in getattr(self, "_tempImageFiles", []) or []:
|
||||||
|
try:
|
||||||
|
os.unlink(path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
self._tempImageFiles = []
|
||||||
|
|
||||||
def _buildPdfWithOverflowGuard(self, doc, story: List[Any], buffer) -> None:
|
def _buildPdfWithOverflowGuard(self, doc, story: List[Any], buffer) -> None:
|
||||||
"""Try doc.build(); on 'too large on page' LayoutError, drop the offending
|
"""Try doc.build(); on 'too large on page' LayoutError, drop the offending
|
||||||
|
|
@ -790,6 +812,10 @@ class RendererPdf(BaseRenderer):
|
||||||
all_elements.extend(self._renderJsonCodeBlock(element, styles))
|
all_elements.extend(self._renderJsonCodeBlock(element, styles))
|
||||||
elif element_type == "image":
|
elif element_type == "image":
|
||||||
all_elements.extend(self._renderJsonImage(element, styles))
|
all_elements.extend(self._renderJsonImage(element, styles))
|
||||||
|
elif element_type == "cover_page" or section_type == "cover_page":
|
||||||
|
all_elements.extend(self._renderCoverPage(element, styles))
|
||||||
|
elif element_type == "image_grid" or section_type == "image_grid":
|
||||||
|
all_elements.extend(self._renderImageGrid(element, styles))
|
||||||
else:
|
else:
|
||||||
# Fallback: if element_type not set, use section_type as fallback
|
# Fallback: if element_type not set, use section_type as fallback
|
||||||
if section_type == "table":
|
if section_type == "table":
|
||||||
|
|
@ -1057,6 +1083,83 @@ class RendererPdf(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def _renderCoverPage(self, element: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||||
|
"""Render a cover/title page: centered title, subtitle, author, date, optional logo.
|
||||||
|
|
||||||
|
Content keys: title, subtitle, author, date, logo (fileId/_fileRef). Ends with a
|
||||||
|
page break so the body starts on the next page (legal filing / report front page).
|
||||||
|
"""
|
||||||
|
from reportlab.platypus import Spacer, PageBreak
|
||||||
|
content = element.get("content", element) if isinstance(element, dict) else {}
|
||||||
|
if not isinstance(content, dict):
|
||||||
|
content = {}
|
||||||
|
out: List[Any] = [Spacer(1, 160)]
|
||||||
|
# Optional logo at the top of the cover.
|
||||||
|
logoB64 = self._lazyResolveImageBase64(content) or content.get("logoBase64", "")
|
||||||
|
if logoB64:
|
||||||
|
out = self._renderJsonImage({"content": {"base64Data": logoB64, "altText": "Logo"}}, styles)
|
||||||
|
out.append(Spacer(1, 60))
|
||||||
|
title = (content.get("title") or "").strip()
|
||||||
|
if title:
|
||||||
|
out.append(self._paragraphFromInlineMarkdown(title, self._createDocumentTitleStyle(styles)))
|
||||||
|
out.append(Spacer(1, 18))
|
||||||
|
for key, sizePt in (("subtitle", 16), ("author", 12), ("date", 12)):
|
||||||
|
val = (content.get(key) or "").strip()
|
||||||
|
if not val:
|
||||||
|
continue
|
||||||
|
st = ParagraphStyle(f"cover_{key}", parent=self._createNormalStyle(styles), alignment=1, fontSize=sizePt)
|
||||||
|
out.append(Paragraph(self._escapeReportlabXml(val), st))
|
||||||
|
out.append(Spacer(1, 8))
|
||||||
|
out.append(PageBreak())
|
||||||
|
return out
|
||||||
|
|
||||||
|
def _renderImageGrid(self, element: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||||
|
"""Render an image grid (N columns) -- marketing-style image arrangement.
|
||||||
|
|
||||||
|
Content: ``columns`` (int, default 2) and ``images`` (list of dicts each carrying
|
||||||
|
base64Data or a fileId/_fileRef resolved lazily). Images are laid out in a
|
||||||
|
borderless reportlab Table so they sit side by side.
|
||||||
|
"""
|
||||||
|
from reportlab.platypus import Table, Spacer
|
||||||
|
content = element.get("content", element) if isinstance(element, dict) else {}
|
||||||
|
if not isinstance(content, dict):
|
||||||
|
return []
|
||||||
|
images = content.get("images") or []
|
||||||
|
if not isinstance(images, list) or not images:
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
columns = max(1, int(content.get("columns", 2)))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
columns = 2
|
||||||
|
|
||||||
|
cellFlowables: List[Any] = []
|
||||||
|
for img in images:
|
||||||
|
if not isinstance(img, dict):
|
||||||
|
continue
|
||||||
|
rendered = self._renderJsonImage({"content": img}, styles)
|
||||||
|
# Keep only the image flowable(s); skip caption paragraphs for a clean grid.
|
||||||
|
from reportlab.platypus import Image as ReportLabImage
|
||||||
|
imgsOnly = [f for f in rendered if isinstance(f, ReportLabImage)]
|
||||||
|
cellFlowables.append(imgsOnly[0] if imgsOnly else (rendered[0] if rendered else Spacer(1, 1)))
|
||||||
|
if not cellFlowables:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Pad to a full final row, then chunk into rows of `columns`.
|
||||||
|
while len(cellFlowables) % columns != 0:
|
||||||
|
cellFlowables.append(Spacer(1, 1))
|
||||||
|
rows = [cellFlowables[i:i + columns] for i in range(0, len(cellFlowables), columns)]
|
||||||
|
colW = _PDF_CONTENT_WIDTH_PT / columns
|
||||||
|
# Scale each image to fit its column cell.
|
||||||
|
from reportlab.platypus import Image as ReportLabImage
|
||||||
|
for row in rows:
|
||||||
|
for cell in row:
|
||||||
|
if isinstance(cell, ReportLabImage) and getattr(cell, "drawWidth", 0) > colW - 8:
|
||||||
|
scale = (colW - 8) / cell.drawWidth
|
||||||
|
cell.drawWidth *= scale
|
||||||
|
cell.drawHeight *= scale
|
||||||
|
table = Table(rows, colWidths=[colW] * columns)
|
||||||
|
return [table, Spacer(1, 10)]
|
||||||
|
|
||||||
def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||||
"""Render a JSON image to PDF elements using reportlab."""
|
"""Render a JSON image to PDF elements using reportlab."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -1094,6 +1197,11 @@ class RendererPdf(BaseRenderer):
|
||||||
if match:
|
if match:
|
||||||
base64_data = match.group(1)
|
base64_data = match.group(1)
|
||||||
|
|
||||||
|
# Last resort: lazily resolve a fileId reference to bytes on demand
|
||||||
|
# (large-document path - keeps image bytes out of the document JSON).
|
||||||
|
if not base64_data:
|
||||||
|
base64_data = self._lazyResolveImageBase64(content if isinstance(content, dict) else None, image_data)
|
||||||
|
|
||||||
if not base64_data:
|
if not base64_data:
|
||||||
self.logger.warning(f"No base64 data found for image. Alt text: {alt_text}")
|
self.logger.warning(f"No base64 data found for image. Alt text: {alt_text}")
|
||||||
return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
|
return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
|
||||||
|
|
@ -1183,8 +1291,20 @@ class RendererPdf(BaseRenderer):
|
||||||
imgHeight = 3 * inch # ~216 points, safe for ~751pt available height
|
imgHeight = 3 * inch # ~216 points, safe for ~751pt available height
|
||||||
imageStream.seek(0)
|
imageStream.seek(0)
|
||||||
|
|
||||||
# Create reportlab Image
|
# Create reportlab Image from a TEMP FILE rather than the in-memory
|
||||||
reportlabImage = ReportLabImage(imageStream, width=imgWidth, height=imgHeight)
|
# stream: reportlab reads file-backed images lazily at build time, so
|
||||||
|
# the bytes of all images are not held in memory at once (large-doc path).
|
||||||
|
import tempfile
|
||||||
|
imageStream.seek(0)
|
||||||
|
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".img")
|
||||||
|
try:
|
||||||
|
tmp.write(imageStream.read())
|
||||||
|
finally:
|
||||||
|
tmp.close()
|
||||||
|
if not hasattr(self, "_tempImageFiles") or self._tempImageFiles is None:
|
||||||
|
self._tempImageFiles = []
|
||||||
|
self._tempImageFiles.append(tmp.name)
|
||||||
|
reportlabImage = ReportLabImage(tmp.name, width=imgWidth, height=imgHeight)
|
||||||
|
|
||||||
elements = [reportlabImage]
|
elements = [reportlabImage]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,97 @@ DEFAULT_STYLE: Dict[str, Any] = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Theme presets (A3): named, purpose-specific style overrides that are
|
||||||
|
# deep-merged onto DEFAULT_STYLE. A preset only declares the keys it changes;
|
||||||
|
# everything else inherits the default. Explicit per-call `style` overrides
|
||||||
|
# always win over the preset.
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
THEME_PRESETS: Dict[str, Dict[str, Any]] = {
|
||||||
|
# "general" intentionally empty -> identical to DEFAULT_STYLE.
|
||||||
|
"general": {},
|
||||||
|
"finance": {
|
||||||
|
"fonts": {"primary": "Calibri"},
|
||||||
|
"colors": {"primary": "#0B3D2E", "secondary": "#14532D", "accent": "#047857"},
|
||||||
|
"documentTitle": {"color": "#0B3D2E", "align": "left"},
|
||||||
|
"headings": {
|
||||||
|
"h1": {"color": "#0B3D2E"},
|
||||||
|
"h2": {"color": "#0B3D2E"},
|
||||||
|
"h3": {"color": "#14532D"},
|
||||||
|
"h4": {"color": "#14532D"},
|
||||||
|
},
|
||||||
|
"table": {"headerBg": "#0B3D2E", "rowBandingEven": "#ECFDF5"},
|
||||||
|
},
|
||||||
|
"legal": {
|
||||||
|
# Serif, sober, single-column, justified body, no logo banner.
|
||||||
|
"fonts": {"primary": "Times New Roman"},
|
||||||
|
"colors": {"primary": "#1A1A1A", "secondary": "#333333", "accent": "#5A5A5A"},
|
||||||
|
"documentTitle": {"color": "#1A1A1A", "align": "center", "sizePt": 20},
|
||||||
|
"headings": {
|
||||||
|
"h1": {"color": "#1A1A1A", "sizePt": 16},
|
||||||
|
"h2": {"color": "#1A1A1A", "sizePt": 14},
|
||||||
|
"h3": {"color": "#333333", "sizePt": 12},
|
||||||
|
"h4": {"color": "#333333", "sizePt": 11},
|
||||||
|
},
|
||||||
|
"paragraph": {"sizePt": 11, "lineSpacing": 1.5, "color": "#1A1A1A", "align": "justify"},
|
||||||
|
"table": {"headerBg": "#333333", "rowBandingEven": "#F5F5F5", "borderColor": "#999999"},
|
||||||
|
"page": {"showPageNumbers": True},
|
||||||
|
},
|
||||||
|
"technical": {
|
||||||
|
"fonts": {"primary": "Arial", "monospace": "Consolas"},
|
||||||
|
"colors": {"primary": "#0F172A", "secondary": "#1E293B", "accent": "#2563EB"},
|
||||||
|
"documentTitle": {"color": "#0F172A", "align": "left"},
|
||||||
|
"headings": {
|
||||||
|
"h1": {"color": "#0F172A"},
|
||||||
|
"h2": {"color": "#1E293B"},
|
||||||
|
"h3": {"color": "#1E293B"},
|
||||||
|
"h4": {"color": "#334155"},
|
||||||
|
},
|
||||||
|
"paragraph": {"sizePt": 10, "lineSpacing": 1.2},
|
||||||
|
"codeBlock": {"fontSizePt": 9, "background": "#0F172A"},
|
||||||
|
"table": {"headerBg": "#1E293B", "rowBandingEven": "#EEF2FF"},
|
||||||
|
},
|
||||||
|
"hr": {
|
||||||
|
"fonts": {"primary": "Calibri"},
|
||||||
|
"colors": {"primary": "#5B21B6", "secondary": "#6D28D9", "accent": "#9333EA"},
|
||||||
|
"documentTitle": {"color": "#5B21B6", "align": "center"},
|
||||||
|
"headings": {
|
||||||
|
"h1": {"color": "#5B21B6"},
|
||||||
|
"h2": {"color": "#6D28D9"},
|
||||||
|
"h3": {"color": "#7C3AED"},
|
||||||
|
"h4": {"color": "#7C3AED"},
|
||||||
|
},
|
||||||
|
"table": {"headerBg": "#5B21B6", "rowBandingEven": "#F5F3FF"},
|
||||||
|
},
|
||||||
|
"marketing": {
|
||||||
|
# Bold, image-friendly, generous spacing, larger title.
|
||||||
|
"fonts": {"primary": "Verdana"},
|
||||||
|
"colors": {"primary": "#BE123C", "secondary": "#E11D48", "accent": "#F59E0B"},
|
||||||
|
"documentTitle": {"color": "#BE123C", "sizePt": 34, "align": "center", "spaceAfterPt": 24},
|
||||||
|
"headings": {
|
||||||
|
"h1": {"color": "#BE123C", "sizePt": 24},
|
||||||
|
"h2": {"color": "#E11D48", "sizePt": 19},
|
||||||
|
"h3": {"color": "#E11D48", "sizePt": 15},
|
||||||
|
"h4": {"color": "#9F1239", "sizePt": 13},
|
||||||
|
},
|
||||||
|
"paragraph": {"sizePt": 12, "lineSpacing": 1.3},
|
||||||
|
"image": {"defaultWidthPt": 540, "maxWidthPt": 900, "alignment": "center"},
|
||||||
|
"table": {"headerBg": "#BE123C", "rowBandingEven": "#FFF1F2"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def resolveTheme(themeName: str | None) -> Dict[str, Any]:
|
||||||
|
"""Return the partial style override for a named theme preset.
|
||||||
|
|
||||||
|
Unknown / empty names fall back to ``{}`` (i.e. plain DEFAULT_STYLE).
|
||||||
|
The lookup is case-insensitive.
|
||||||
|
"""
|
||||||
|
if not themeName:
|
||||||
|
return {}
|
||||||
|
return dict(THEME_PRESETS.get(str(themeName).strip().lower(), {}))
|
||||||
|
|
||||||
|
|
||||||
def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
|
def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Recursively merge override into base. Both dicts left unchanged; returns new dict."""
|
"""Recursively merge override into base. Both dicts left unchanged; returns new dict."""
|
||||||
result = {}
|
result = {}
|
||||||
|
|
@ -76,8 +167,17 @@ def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def resolveStyle(agentStyle: dict | None) -> Dict[str, Any]:
|
def resolveStyle(agentStyle: dict | None, documentTheme: str | None = None) -> Dict[str, Any]:
|
||||||
"""Deep-merge DEFAULT_STYLE <- agentStyle. Returns fully resolved style dict."""
|
"""Resolve the effective style: ``DEFAULT_STYLE <- themePreset <- agentStyle``.
|
||||||
if not agentStyle:
|
|
||||||
return dict(DEFAULT_STYLE)
|
Precedence (lowest to highest): platform defaults, the named ``documentTheme``
|
||||||
return _deepMerge(DEFAULT_STYLE, agentStyle)
|
preset, then any explicit per-call ``agentStyle`` override. With no theme and
|
||||||
|
no override this returns plain :data:`DEFAULT_STYLE`.
|
||||||
|
"""
|
||||||
|
resolved = dict(DEFAULT_STYLE)
|
||||||
|
themeOverride = resolveTheme(documentTheme)
|
||||||
|
if themeOverride:
|
||||||
|
resolved = _deepMerge(resolved, themeOverride)
|
||||||
|
if agentStyle:
|
||||||
|
resolved = _deepMerge(resolved, agentStyle)
|
||||||
|
return resolved
|
||||||
|
|
|
||||||
|
|
@ -134,6 +134,68 @@ def _parseInlineRuns(text: str) -> list:
|
||||||
return runs if runs else [{"type": "text", "value": text}]
|
return runs if runs else [{"type": "text", "value": text}]
|
||||||
|
|
||||||
|
|
||||||
|
def _imageRefToDict(token: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Parse one image reference line into an image content dict.
|
||||||
|
|
||||||
|
Accepts markdown image syntax ```` / ````
|
||||||
|
or a bare ``file:ID`` / URL. Returns None for blank lines.
|
||||||
|
"""
|
||||||
|
token = (token or "").strip()
|
||||||
|
if not token:
|
||||||
|
return None
|
||||||
|
m = re.match(r"^!\[([^\]]*)\]\(([^)\"]+)(?:\s+\"(\d+)pt\")?\)\s*$", token)
|
||||||
|
if m:
|
||||||
|
alt = (m.group(1) or "").strip() or "Image"
|
||||||
|
src = (m.group(2) or "").strip()
|
||||||
|
widthStr = m.group(3)
|
||||||
|
else:
|
||||||
|
alt, src, widthStr = "Image", token, None
|
||||||
|
fileId = src[5:] if src.startswith("file:") else ""
|
||||||
|
out: Dict[str, Any] = {"altText": alt, "base64Data": "", "_fileRef": fileId, "_srcUrl": src if not fileId else ""}
|
||||||
|
if widthStr:
|
||||||
|
out["widthPt"] = int(widthStr)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _parseCoverPageBlock(codeLines: List[str]) -> Dict[str, Any]:
|
||||||
|
"""Parse a ```cover_page fenced block of ``key: value`` lines.
|
||||||
|
|
||||||
|
Recognized keys: title, subtitle, author, date, logo (``file:ID`` or URL).
|
||||||
|
"""
|
||||||
|
content: Dict[str, Any] = {}
|
||||||
|
for raw in codeLines:
|
||||||
|
if ":" not in raw:
|
||||||
|
continue
|
||||||
|
key, _, value = raw.partition(":")
|
||||||
|
key = key.strip().lower()
|
||||||
|
value = value.strip()
|
||||||
|
if not value:
|
||||||
|
continue
|
||||||
|
if key in ("title", "subtitle", "author", "date"):
|
||||||
|
content[key] = value
|
||||||
|
elif key == "logo":
|
||||||
|
content["_fileRef"] = value[5:] if value.startswith("file:") else ""
|
||||||
|
if not content["_fileRef"]:
|
||||||
|
content["_srcUrl"] = value
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def _parseImageGridBlock(codeLines: List[str]) -> Dict[str, Any]:
|
||||||
|
"""Parse a ```image_grid fenced block: optional ``columns: N`` plus image refs."""
|
||||||
|
columns = 2
|
||||||
|
images: List[Dict[str, Any]] = []
|
||||||
|
for raw in codeLines:
|
||||||
|
stripped = raw.strip()
|
||||||
|
m = re.match(r"^columns\s*:\s*(\d+)\s*$", stripped, re.IGNORECASE)
|
||||||
|
if m:
|
||||||
|
columns = max(1, int(m.group(1)))
|
||||||
|
continue
|
||||||
|
img = _imageRefToDict(stripped)
|
||||||
|
if img:
|
||||||
|
images.append(img)
|
||||||
|
return {"columns": columns, "images": images}
|
||||||
|
|
||||||
|
|
||||||
def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
|
def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Convert markdown content to the standard document JSON format with Inline-Run model.
|
Convert markdown content to the standard document JSON format with Inline-Run model.
|
||||||
|
|
@ -178,6 +240,19 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
|
||||||
codeLines.append(lines[i])
|
codeLines.append(lines[i])
|
||||||
i += 1
|
i += 1
|
||||||
i += 1
|
i += 1
|
||||||
|
# Layout primitives are authored as fenced blocks with a special "language".
|
||||||
|
if lang == "cover_page":
|
||||||
|
sections.append({
|
||||||
|
"id": _nextId(), "content_type": "cover_page", "order": order,
|
||||||
|
"elements": [{"content": _parseCoverPageBlock(codeLines)}],
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
if lang == "image_grid":
|
||||||
|
sections.append({
|
||||||
|
"id": _nextId(), "content_type": "image_grid", "order": order,
|
||||||
|
"elements": [{"content": _parseImageGridBlock(codeLines)}],
|
||||||
|
})
|
||||||
|
continue
|
||||||
sections.append({
|
sections.append({
|
||||||
"id": _nextId(), "content_type": "code_block", "order": order,
|
"id": _nextId(), "content_type": "code_block", "order": order,
|
||||||
"elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}],
|
"elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}],
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ The UDB shows three logical hierarchies as a single user-facing tree:
|
||||||
For every visible node the UI needs:
|
For every visible node the UI needs:
|
||||||
- a stable `key` (used both for expand-state and as parent reference)
|
- a stable `key` (used both for expand-state and as parent reference)
|
||||||
- a `kind`, `label`, optional `icon`
|
- a `kind`, `label`, optional `icon`
|
||||||
- effective values for all three flags (neutralize, scope, ragIndexEnabled)
|
- effective values for flags (neutralize, ragIndexEnabled)
|
||||||
- whether a backing DB record exists (`dataSourceId` + `modelType`)
|
- whether a backing DB record exists (`dataSourceId` + `modelType`)
|
||||||
- whether the node has children to expand
|
- whether the node has children to expand
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
"""Cascade-inherit semantics for DataSource flags (neutralize, ragIndexEnabled, scope).
|
"""Cascade-inherit semantics for DataSource flags (neutralize, ragIndexEnabled).
|
||||||
|
|
||||||
Three-state flags allow tree elements to either set an explicit value or
|
Three-state flags allow tree elements to either set an explicit value or
|
||||||
inherit the value from their nearest ancestor in the path hierarchy.
|
inherit the value from their nearest ancestor in the path hierarchy.
|
||||||
|
|
@ -19,7 +19,8 @@ Path-traversal rules:
|
||||||
- Sub-elements have paths like `/folder1/sub`. Their parent path is the
|
- Sub-elements have paths like `/folder1/sub`. Their parent path is the
|
||||||
longest prefix path that exists as a DataSource record (string-based).
|
longest prefix path that exists as a DataSource record (string-based).
|
||||||
- If no ancestor with an explicit value exists, the default is `False`
|
- If no ancestor with an explicit value exists, the default is `False`
|
||||||
(or `'personal'` for scope) — matching the legacy behavior of NULL = inherit.
|
— matching the legacy behavior of NULL = inherit.
|
||||||
|
(scope was removed from DataSource in 2026-06 for privacy reasons.)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
@ -27,7 +28,7 @@ from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_INHERITABLE_FLAGS = ("neutralize", "ragIndexEnabled", "scope")
|
_INHERITABLE_FLAGS = ("neutralize", "ragIndexEnabled")
|
||||||
_INHERITABLE_FDS_FLAGS = ("neutralize", "ragIndexEnabled")
|
_INHERITABLE_FDS_FLAGS = ("neutralize", "ragIndexEnabled")
|
||||||
|
|
||||||
# Connection-root DataSources carry the authority as their sourceType
|
# Connection-root DataSources carry the authority as their sourceType
|
||||||
|
|
@ -56,8 +57,6 @@ def _normalisePath(path: Optional[str]) -> str:
|
||||||
|
|
||||||
|
|
||||||
def _flagDefault(flag: str) -> Any:
|
def _flagDefault(flag: str) -> Any:
|
||||||
if flag == "scope":
|
|
||||||
return "personal"
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -545,7 +544,8 @@ def resolveEffectiveForPath(
|
||||||
"""Resolve effective flags for ANY (connectionId, sourceType, path) tuple.
|
"""Resolve effective flags for ANY (connectionId, sourceType, path) tuple.
|
||||||
|
|
||||||
Works whether or not a DataSource record exists for this exact path.
|
Works whether or not a DataSource record exists for this exact path.
|
||||||
Returns dict with effectiveNeutralize, effectiveScope, effectiveRagIndexEnabled.
|
Returns dict with effectiveNeutralize, effectiveRagIndexEnabled.
|
||||||
|
(effectiveScope removed 2026-06 — personal sources have no scope.)
|
||||||
"""
|
"""
|
||||||
normPath = _normalisePath(path)
|
normPath = _normalisePath(path)
|
||||||
exactRecord = None
|
exactRecord = None
|
||||||
|
|
@ -561,7 +561,6 @@ def resolveEffectiveForPath(
|
||||||
if exactRecord:
|
if exactRecord:
|
||||||
return {
|
return {
|
||||||
"effectiveNeutralize": getEffectiveFlag(exactRecord, "neutralize", allDs, mode=mode),
|
"effectiveNeutralize": getEffectiveFlag(exactRecord, "neutralize", allDs, mode=mode),
|
||||||
"effectiveScope": getEffectiveFlag(exactRecord, "scope", allDs, mode=mode),
|
|
||||||
"effectiveRagIndexEnabled": getEffectiveFlag(exactRecord, "ragIndexEnabled", allDs, mode=mode),
|
"effectiveRagIndexEnabled": getEffectiveFlag(exactRecord, "ragIndexEnabled", allDs, mode=mode),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -571,12 +570,10 @@ def resolveEffectiveForPath(
|
||||||
"sourceType": sourceType,
|
"sourceType": sourceType,
|
||||||
"path": normPath,
|
"path": normPath,
|
||||||
"neutralize": None,
|
"neutralize": None,
|
||||||
"scope": None,
|
|
||||||
"ragIndexEnabled": None,
|
"ragIndexEnabled": None,
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
"effectiveNeutralize": getEffectiveFlag(virtualRec, "neutralize", allDs, mode=mode),
|
"effectiveNeutralize": getEffectiveFlag(virtualRec, "neutralize", allDs, mode=mode),
|
||||||
"effectiveScope": getEffectiveFlag(virtualRec, "scope", allDs, mode=mode),
|
|
||||||
"effectiveRagIndexEnabled": getEffectiveFlag(virtualRec, "ragIndexEnabled", allDs, mode=mode),
|
"effectiveRagIndexEnabled": getEffectiveFlag(virtualRec, "ragIndexEnabled", allDs, mode=mode),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -163,7 +163,7 @@ def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list]
|
||||||
over all DataSources whose effective value resolves to True, including
|
over all DataSources whose effective value resolves to True, including
|
||||||
inherited ones.
|
inherited ones.
|
||||||
|
|
||||||
Returned dicts carry **resolved** flags (`neutralize`, `scope`) so the
|
Returned dicts carry **resolved** flags (`neutralize`) so the
|
||||||
downstream walkers can keep reading `ds.get("neutralize")` directly
|
downstream walkers can keep reading `ds.get("neutralize")` directly
|
||||||
without having to know about the inheritance chain.
|
without having to know about the inheritance chain.
|
||||||
|
|
||||||
|
|
@ -185,7 +185,6 @@ def _loadRagEnabledDataSources(connectionId: str, dataSourceIds: Optional[list]
|
||||||
continue
|
continue
|
||||||
dsCopy = dict(ds) if isinstance(ds, dict) else {**ds.__dict__}
|
dsCopy = dict(ds) if isinstance(ds, dict) else {**ds.__dict__}
|
||||||
dsCopy["neutralize"] = getEffectiveFlag(ds, "neutralize", allDs)
|
dsCopy["neutralize"] = getEffectiveFlag(ds, "neutralize", allDs)
|
||||||
dsCopy["scope"] = getEffectiveFlag(ds, "scope", allDs)
|
|
||||||
dsCopy["ragIndexEnabled"] = True
|
dsCopy["ragIndexEnabled"] = True
|
||||||
|
|
||||||
if connectionRootLimits:
|
if connectionRootLimits:
|
||||||
|
|
|
||||||
|
|
@ -152,7 +152,6 @@ async def _featureBootstrapHandler(
|
||||||
|
|
||||||
try:
|
try:
|
||||||
dbConnector = _getFeatureDbConnector(featureCode)
|
dbConnector = _getFeatureDbConnector(featureCode)
|
||||||
provider = FeatureDataProvider(dbConnector)
|
|
||||||
|
|
||||||
rootUser = getRootUser()
|
rootUser = getRootUser()
|
||||||
ctx = ServiceCenterContext(
|
ctx = ServiceCenterContext(
|
||||||
|
|
@ -162,6 +161,22 @@ async def _featureBootstrapHandler(
|
||||||
)
|
)
|
||||||
knowledgeService = getService("knowledge", ctx)
|
knowledgeService = getService("knowledge", ctx)
|
||||||
|
|
||||||
|
# A2: index the SAME neutralized text the query path returns (parity).
|
||||||
|
neutralizationService = getService("neutralization", ctx)
|
||||||
|
neutralizePolicy = None
|
||||||
|
if effectiveNeutralize or neutralizeFields:
|
||||||
|
neutralizePolicy = {
|
||||||
|
tableName: {
|
||||||
|
"tableActive": bool(effectiveNeutralize),
|
||||||
|
"explicitFields": set(neutralizeFields),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
provider = FeatureDataProvider(
|
||||||
|
dbConnector,
|
||||||
|
neutralizePolicy=neutralizePolicy,
|
||||||
|
neutralizationService=neutralizationService,
|
||||||
|
)
|
||||||
|
|
||||||
extraFilters = [
|
extraFilters = [
|
||||||
{"field": k, "op": "=", "value": v}
|
{"field": k, "op": "=", "value": v}
|
||||||
for k, v in recordFilter.items()
|
for k, v in recordFilter.items()
|
||||||
|
|
@ -186,12 +201,16 @@ async def _featureBootstrapHandler(
|
||||||
if not rows:
|
if not rows:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Apply the A2 field-neutralization policy + JSON-serialize (same as
|
||||||
|
# the sub-agent query path) before building the embedding text.
|
||||||
|
rows = await provider.finalizeRowsAsync(tableName, rows)
|
||||||
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
rowId = row.get("id", "")
|
rowId = row.get("id", "")
|
||||||
if not rowId:
|
if not rowId:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
textContent = _serializeRowToText(row, neutralizeFields if effectiveNeutralize else None)
|
textContent = _serializeRowToText(row)
|
||||||
if not textContent.strip():
|
if not textContent.strip():
|
||||||
fdsSkipped += 1
|
fdsSkipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
|
||||||
|
|
@ -77,10 +77,11 @@ class UdbNode(ABC):
|
||||||
def supportsFlag(self, flag: str) -> bool:
|
def supportsFlag(self, flag: str) -> bool:
|
||||||
"""Whether this node carries a value for `flag` at all.
|
"""Whether this node carries a value for `flag` at all.
|
||||||
|
|
||||||
Subclasses override to restrict (e.g. FDS has no scope; fdsField
|
Subclasses override to restrict (e.g. fdsField only has neutralize).
|
||||||
only has neutralize).
|
Scope was removed from DataSource nodes (privacy, 2026-06) and never
|
||||||
|
existed on FDS nodes. Only Files (folder-files) retain scope.
|
||||||
"""
|
"""
|
||||||
return flag in ("neutralize", "scope", "ragIndexEnabled")
|
return flag in ("neutralize", "ragIndexEnabled")
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def canEdit(self, context: Any, rootIf: Any) -> bool:
|
def canEdit(self, context: Any, rootIf: Any) -> bool:
|
||||||
|
|
@ -185,7 +186,7 @@ class SyntheticContainerNode(UdbNode):
|
||||||
"dataSourceId": None,
|
"dataSourceId": None,
|
||||||
"modelType": None,
|
"modelType": None,
|
||||||
"effectiveNeutralize": self.getEffectiveFlag("neutralize", allDs, allFds, "aggregate"),
|
"effectiveNeutralize": self.getEffectiveFlag("neutralize", allDs, allFds, "aggregate"),
|
||||||
"effectiveScope": self.getEffectiveFlag("scope", allDs, allFds, "aggregate") or "personal",
|
"effectiveScope": "personal",
|
||||||
"effectiveRagIndexEnabled": self.getEffectiveFlag("ragIndexEnabled", allDs, allFds, "aggregate"),
|
"effectiveRagIndexEnabled": self.getEffectiveFlag("ragIndexEnabled", allDs, allFds, "aggregate"),
|
||||||
"supportsRag": False,
|
"supportsRag": False,
|
||||||
"canBeAdded": False,
|
"canBeAdded": False,
|
||||||
|
|
@ -248,12 +249,14 @@ class _DataSourceFamilyNode(UdbNode):
|
||||||
return _isConnectionOwner(rootIf, str(context.user.id), self.connectionId)
|
return _isConnectionOwner(rootIf, str(context.user.id), self.connectionId)
|
||||||
|
|
||||||
def getEffectiveFlag(self, flag, allDs, allFds, mode="aggregate") -> Any:
|
def getEffectiveFlag(self, flag, allDs, allFds, mode="aggregate") -> Any:
|
||||||
|
if not self.supportsFlag(flag):
|
||||||
|
return False
|
||||||
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import (
|
from modules.serviceCenter.services.serviceKnowledge._inheritFlags import (
|
||||||
resolveEffectiveForPath,
|
resolveEffectiveForPath,
|
||||||
)
|
)
|
||||||
out = resolveEffectiveForPath(self.connectionId, self.sourceType, self.path, allDs, mode=mode)
|
out = resolveEffectiveForPath(self.connectionId, self.sourceType, self.path, allDs, mode=mode)
|
||||||
key = "effective" + flag[0].upper() + flag[1:]
|
key = "effective" + flag[0].upper() + flag[1:]
|
||||||
return out.get(key, False if flag != "scope" else "personal")
|
return out.get(key, False)
|
||||||
|
|
||||||
def setFlag(self, flag, value, rootIf) -> List[str]:
|
def setFlag(self, flag, value, rootIf) -> List[str]:
|
||||||
from modules.datamodels.datamodelDataSource import DataSource
|
from modules.datamodels.datamodelDataSource import DataSource
|
||||||
|
|
@ -371,7 +374,7 @@ def _dsDict(node: _DataSourceFamilyNode, allDs: List[Dict[str, Any]]) -> Dict[st
|
||||||
"dataSourceId": node.rec.get("id") if node.rec else None,
|
"dataSourceId": node.rec.get("id") if node.rec else None,
|
||||||
"modelType": "DataSource" if node.rec else None,
|
"modelType": "DataSource" if node.rec else None,
|
||||||
"effectiveNeutralize": node.getEffectiveFlag("neutralize", allDs, [], "aggregate"),
|
"effectiveNeutralize": node.getEffectiveFlag("neutralize", allDs, [], "aggregate"),
|
||||||
"effectiveScope": node.getEffectiveFlag("scope", allDs, [], "aggregate"),
|
"effectiveScope": "personal",
|
||||||
"effectiveRagIndexEnabled": node.getEffectiveFlag("ragIndexEnabled", allDs, [], "aggregate"),
|
"effectiveRagIndexEnabled": node.getEffectiveFlag("ragIndexEnabled", allDs, [], "aggregate"),
|
||||||
"supportsRag": True,
|
"supportsRag": True,
|
||||||
"canBeAdded": node.rec is None,
|
"canBeAdded": node.rec is None,
|
||||||
|
|
@ -900,8 +903,6 @@ def _aggregateFromChildren(node: UdbNode, flag: str,
|
||||||
"""
|
"""
|
||||||
children = node.getLogicalChildren(allDs, allFds, None, None)
|
children = node.getLogicalChildren(allDs, allFds, None, None)
|
||||||
if not children:
|
if not children:
|
||||||
if flag == "scope":
|
|
||||||
return "personal"
|
|
||||||
return False
|
return False
|
||||||
seen = set()
|
seen = set()
|
||||||
last: Any = None
|
last: Any = None
|
||||||
|
|
@ -917,7 +918,7 @@ def _aggregateFromChildren(node: UdbNode, flag: str,
|
||||||
if len(seen) > 1:
|
if len(seen) > 1:
|
||||||
return "mixed"
|
return "mixed"
|
||||||
if not seen:
|
if not seen:
|
||||||
return "personal" if flag == "scope" else False
|
return False
|
||||||
return last
|
return last
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -575,9 +575,6 @@ def _deleteUserDataFromFeatureDatabases(userId: str, currentUser) -> Dict[str, A
|
||||||
elif featureCode == "realestate":
|
elif featureCode == "realestate":
|
||||||
from modules.features.realestate.interfaceFeatureRealEstate import getInterface as getRealEstateInterface
|
from modules.features.realestate.interfaceFeatureRealEstate import getInterface as getRealEstateInterface
|
||||||
featureInterface = getRealEstateInterface(currentUser)
|
featureInterface = getRealEstateInterface(currentUser)
|
||||||
elif featureCode == "chatbot":
|
|
||||||
from modules.features.chatbot.interfaceFeatureChatbot import getInterface as getChatbotInterface
|
|
||||||
featureInterface = getChatbotInterface(currentUser)
|
|
||||||
elif featureCode == "neutralization":
|
elif featureCode == "neutralization":
|
||||||
from modules.features.neutralization.interfaceFeatureNeutralizer import getInterface as getNeutralizerInterface
|
from modules.features.neutralization.interfaceFeatureNeutralizer import getInterface as getNeutralizerInterface
|
||||||
featureInterface = getNeutralizerInterface(currentUser)
|
featureInterface = getNeutralizerInterface(currentUser)
|
||||||
|
|
|
||||||
|
|
@ -440,7 +440,6 @@ def _registerFeatureUiLabels():
|
||||||
"modules.features.workspace.mainWorkspace",
|
"modules.features.workspace.mainWorkspace",
|
||||||
"modules.features.realEstate.mainRealEstate",
|
"modules.features.realEstate.mainRealEstate",
|
||||||
"modules.features.neutralization.mainNeutralization",
|
"modules.features.neutralization.mainNeutralization",
|
||||||
"modules.features.chatbot.mainChatbot",
|
|
||||||
)
|
)
|
||||||
added = 0
|
added = 0
|
||||||
for modPath in _featureModulePaths:
|
for modPath in _featureModulePaths:
|
||||||
|
|
@ -481,7 +480,6 @@ def _registerRbacLabels():
|
||||||
"modules.features.workspace.mainWorkspace",
|
"modules.features.workspace.mainWorkspace",
|
||||||
"modules.features.realEstate.mainRealEstate",
|
"modules.features.realEstate.mainRealEstate",
|
||||||
"modules.features.neutralization.mainNeutralization",
|
"modules.features.neutralization.mainNeutralization",
|
||||||
"modules.features.chatbot.mainChatbot",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
added = 0
|
added = 0
|
||||||
|
|
|
||||||
|
|
@ -330,16 +330,6 @@ NAVIGATION_SECTIONS = [
|
||||||
"adminOnly": True,
|
"adminOnly": True,
|
||||||
"sysAdminOnly": True,
|
"sysAdminOnly": True,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"id": "admin-stt-benchmark",
|
|
||||||
"objectKey": "ui.admin.sttBenchmark",
|
|
||||||
"label": t("STT Benchmark"),
|
|
||||||
"icon": "FaMicrophone",
|
|
||||||
"path": "/admin/stt-benchmark",
|
|
||||||
"order": 92,
|
|
||||||
"adminOnly": True,
|
|
||||||
"sysAdminOnly": True,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": "admin-languages",
|
"id": "admin-languages",
|
||||||
"objectKey": "ui.admin.languages",
|
"objectKey": "ui.admin.languages",
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
return ActionResult.isFailure(error="prompt is required")
|
return ActionResult.isFailure(error="prompt is required")
|
||||||
|
|
||||||
documentType = parameters.get("documentType")
|
documentType = parameters.get("documentType")
|
||||||
|
documentTheme = parameters.get("documentTheme") or None
|
||||||
# Prefer explicit outputFormat (flow UI); resultType remains for legacy / API callers.
|
# Prefer explicit outputFormat (flow UI); resultType remains for legacy / API callers.
|
||||||
resultType = parameters.get("outputFormat") or parameters.get("resultType")
|
resultType = parameters.get("outputFormat") or parameters.get("resultType")
|
||||||
if isinstance(resultType, str):
|
if isinstance(resultType, str):
|
||||||
|
|
@ -82,7 +83,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
outputFormat=resultType, # Can be None - AI determines from prompt
|
outputFormat=resultType, # Can be None - AI determines from prompt
|
||||||
title=title,
|
title=title,
|
||||||
parentOperationId=parentOperationId,
|
parentOperationId=parentOperationId,
|
||||||
generationIntent="document" # NEW: Explicit intent, skips detection
|
generationIntent="document", # NEW: Explicit intent, skips detection
|
||||||
|
documentTheme=documentTheme # Named style preset for the renderer
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert AiResponse to ActionResult
|
# Convert AiResponse to ActionResult
|
||||||
|
|
|
||||||
|
|
@ -89,10 +89,10 @@ class MethodAi(MethodBase):
|
||||||
name="documentTheme",
|
name="documentTheme",
|
||||||
type="str",
|
type="str",
|
||||||
frontendType=FrontendType.SELECT,
|
frontendType=FrontendType.SELECT,
|
||||||
frontendOptions=["general", "finance", "legal", "technical", "hr"],
|
frontendOptions=["general", "finance", "legal", "technical", "hr", "marketing"],
|
||||||
required=False,
|
required=False,
|
||||||
default="general",
|
default="general",
|
||||||
description="Style hint for the document renderer (e.g. finance, legal). Used by the AI agent to choose colors and layout."
|
description="Named style preset for the document renderer (general/finance/legal/technical/hr/marketing). The agent forwards it to the renderDocument tool's documentTheme."
|
||||||
),
|
),
|
||||||
"resultType": WorkflowActionParameter(
|
"resultType": WorkflowActionParameter(
|
||||||
name="resultType",
|
name="resultType",
|
||||||
|
|
@ -118,7 +118,7 @@ class MethodAi(MethodBase):
|
||||||
frontendType=FrontendType.CHECKBOX,
|
frontendType=FrontendType.CHECKBOX,
|
||||||
required=False,
|
required=False,
|
||||||
default=False,
|
default=False,
|
||||||
description="If true, uses fast simple AI call without document generation pipeline. Use for chatbot responses and simple text generation."
|
description="If true, uses fast simple AI call without document generation pipeline. Use for conversational responses and simple text generation."
|
||||||
),
|
),
|
||||||
"contentParts": WorkflowActionParameter(
|
"contentParts": WorkflowActionParameter(
|
||||||
name="contentParts",
|
name="contentParts",
|
||||||
|
|
@ -385,6 +385,15 @@ class MethodAi(MethodBase):
|
||||||
required=False,
|
required=False,
|
||||||
description="Type of document (content hint for the model); used as title fallback when title is empty."
|
description="Type of document (content hint for the model); used as title fallback when title is empty."
|
||||||
),
|
),
|
||||||
|
"documentTheme": WorkflowActionParameter(
|
||||||
|
name="documentTheme",
|
||||||
|
type="str",
|
||||||
|
frontendType=FrontendType.SELECT,
|
||||||
|
frontendOptions=["general", "finance", "legal", "technical", "hr", "marketing"],
|
||||||
|
required=False,
|
||||||
|
default="general",
|
||||||
|
description="Named style preset applied by the renderer (colors, fonts, spacing): general, finance, legal, technical, hr, marketing."
|
||||||
|
),
|
||||||
"resultType": WorkflowActionParameter(
|
"resultType": WorkflowActionParameter(
|
||||||
name="resultType",
|
name="resultType",
|
||||||
type="str",
|
type="str",
|
||||||
|
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
|
|
||||||
from .methodChatbot import MethodChatbot
|
|
||||||
|
|
||||||
__all__ = ['MethodChatbot']
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
|
|
||||||
|
|
@ -1,157 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
|
|
||||||
"""
|
|
||||||
Query Database action for Chatbot operations.
|
|
||||||
Executes SQL queries via the preprocessor connector.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
from typing import Dict, Any
|
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
||||||
from modules.connectors.connectorPreprocessor import PreprocessorConnector
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def queryDatabase(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
Execute a SQL query via the preprocessor connector.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- sqlQuery (str, required): SQL SELECT query to execute. Can also be extracted from analysis_result document if provided in documentList.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Init progress logger
|
|
||||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
||||||
operationId = f"chatbot_query_db_{workflowId}_{int(time.time())}"
|
|
||||||
|
|
||||||
# Start progress tracking
|
|
||||||
parentOperationId = parameters.get('parentOperationId')
|
|
||||||
self.services.chat.progressLogStart(
|
|
||||||
operationId,
|
|
||||||
"Database Query",
|
|
||||||
"Executing SQL Query",
|
|
||||||
"Preprocessing API",
|
|
||||||
parentOperationId=parentOperationId
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get SQL query from parameters or extract from documentList
|
|
||||||
sqlQuery = parameters.get("sqlQuery")
|
|
||||||
|
|
||||||
# If sqlQuery not provided, try to extract from documentList (analysis_result)
|
|
||||||
if not sqlQuery:
|
|
||||||
documentListParam = parameters.get("documentList")
|
|
||||||
if documentListParam:
|
|
||||||
# Get documents from previous task
|
|
||||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
|
||||||
if isinstance(documentListParam, str):
|
|
||||||
docList = DocumentReferenceList.from_string_list([documentListParam])
|
|
||||||
elif isinstance(documentListParam, list):
|
|
||||||
docList = DocumentReferenceList.from_string_list(documentListParam)
|
|
||||||
else:
|
|
||||||
docList = documentListParam
|
|
||||||
|
|
||||||
# Get documents from workflow
|
|
||||||
documents = self.services.chat.getChatDocumentsFromDocumentList(docList)
|
|
||||||
|
|
||||||
# Try to extract SQL query from JSON document
|
|
||||||
for doc in documents:
|
|
||||||
try:
|
|
||||||
# ChatDocument objects have fileId - get file data from database
|
|
||||||
if hasattr(doc, 'fileId') and doc.fileId:
|
|
||||||
# Get file data from database
|
|
||||||
fileData = self.services.interfaceDbComponent.getFileData(doc.fileId)
|
|
||||||
if fileData:
|
|
||||||
# Decode bytes if needed
|
|
||||||
if isinstance(fileData, bytes):
|
|
||||||
docData = fileData.decode('utf-8')
|
|
||||||
else:
|
|
||||||
docData = str(fileData)
|
|
||||||
|
|
||||||
# Try to parse as JSON
|
|
||||||
analysisData = json.loads(docData)
|
|
||||||
sqlQuery = analysisData.get("sqlQuery")
|
|
||||||
|
|
||||||
if sqlQuery:
|
|
||||||
logger.info(f"Extracted SQL query from analysis_result document: {sqlQuery[:100]}...")
|
|
||||||
break
|
|
||||||
except (json.JSONDecodeError, AttributeError, KeyError, TypeError) as e:
|
|
||||||
logger.debug(f"Could not parse document as JSON: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not sqlQuery:
|
|
||||||
return ActionResult.isFailure(error="SQL query is required. Provide sqlQuery parameter or analysis_result document with sqlQuery field.")
|
|
||||||
|
|
||||||
# Update progress
|
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Validating query")
|
|
||||||
|
|
||||||
# Validate: only SELECT queries allowed
|
|
||||||
sqlNormalized = sqlQuery.strip().upper()
|
|
||||||
if not sqlNormalized.startswith("SELECT"):
|
|
||||||
return ActionResult.isFailure(error="Only SELECT queries are allowed")
|
|
||||||
forbiddenKeywords = ["INSERT", "UPDATE", "DELETE", "DROP", "ALTER", "CREATE", "TRUNCATE", "EXEC", "EXECUTE"]
|
|
||||||
for kw in forbiddenKeywords:
|
|
||||||
if f" {kw} " in f" {sqlNormalized} " or sqlNormalized.startswith(f"{kw} "):
|
|
||||||
return ActionResult.isFailure(error=f"Forbidden SQL keyword detected: {kw}")
|
|
||||||
|
|
||||||
# Initialize connector
|
|
||||||
connector = PreprocessorConnector()
|
|
||||||
|
|
||||||
# Update progress
|
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.5, "Executing query")
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = await connector.executeQuery(sqlQuery)
|
|
||||||
except Exception:
|
|
||||||
await connector.close()
|
|
||||||
raise
|
|
||||||
|
|
||||||
# Update progress
|
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.8, "Formatting results")
|
|
||||||
|
|
||||||
# Generate meaningful filename
|
|
||||||
meaningful_name = self._generateMeaningfulFileName(
|
|
||||||
base_name="database_query",
|
|
||||||
extension="txt",
|
|
||||||
action_name="queryDatabase"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create validation metadata
|
|
||||||
validationMetadata = self._createValidationMetadata(
|
|
||||||
"queryDatabase",
|
|
||||||
sqlQuery=sqlQuery[:200] if len(sqlQuery) > 200 else sqlQuery, # Truncate for metadata
|
|
||||||
resultLength=len(result)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create action document
|
|
||||||
document = ActionDocument(
|
|
||||||
documentName=meaningful_name,
|
|
||||||
documentData=result,
|
|
||||||
mimeType="text/plain",
|
|
||||||
validationMetadata=validationMetadata
|
|
||||||
)
|
|
||||||
|
|
||||||
# Complete progress tracking
|
|
||||||
self.services.chat.progressLogFinish(operationId, True)
|
|
||||||
|
|
||||||
# Close connector
|
|
||||||
await connector.close()
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(documents=[document])
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error executing database query: {str(e)}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.services.chat.progressLogFinish(operationId, False)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return ActionResult.isFailure(
|
|
||||||
error=str(e)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
@ -1,55 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from modules.workflows.methods.methodBase import MethodBase
|
|
||||||
from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter
|
|
||||||
from modules.shared.frontendTypes import FrontendType
|
|
||||||
|
|
||||||
# Import actions
|
|
||||||
from .actions.queryDatabase import queryDatabase
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class MethodChatbot(MethodBase):
|
|
||||||
"""Chatbot operations methods."""
|
|
||||||
|
|
||||||
def __init__(self, services):
|
|
||||||
super().__init__(services)
|
|
||||||
self.name = "chatbot"
|
|
||||||
self.description = "Chatbot operations"
|
|
||||||
|
|
||||||
# RBAC-Integration: Action-Definitionen mit actionId
|
|
||||||
self._actions = {
|
|
||||||
"queryDatabase": WorkflowActionDefinition(
|
|
||||||
actionId="chatbot.queryDatabase",
|
|
||||||
description="Execute a SQL SELECT query via the preprocessor connector. Returns formatted query results.",
|
|
||||||
dynamicMode=False,
|
|
||||||
outputType="QueryResult",
|
|
||||||
parameters={
|
|
||||||
"sqlQuery": WorkflowActionParameter(
|
|
||||||
name="sqlQuery",
|
|
||||||
type="str",
|
|
||||||
uiHint="textarea",
|
|
||||||
frontendType=FrontendType.TEXTAREA,
|
|
||||||
required=False,
|
|
||||||
description="SQL SELECT query to execute. If not provided, will attempt to extract from analysis_result document in documentList."
|
|
||||||
),
|
|
||||||
"documentList": WorkflowActionParameter(
|
|
||||||
name="documentList",
|
|
||||||
type="DocumentList",
|
|
||||||
frontendType=FrontendType.DOCUMENT_REFERENCE,
|
|
||||||
required=False,
|
|
||||||
description="Document reference(s) containing analysis_result with sqlQuery field. Used if sqlQuery parameter is not provided."
|
|
||||||
)
|
|
||||||
},
|
|
||||||
execute=queryDatabase.__get__(self, self.__class__)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Validate actions after definition
|
|
||||||
self._validateActions()
|
|
||||||
|
|
||||||
# Register actions as methods (optional, für direkten Zugriff)
|
|
||||||
self.queryDatabase = queryDatabase.__get__(self, self.__class__)
|
|
||||||
|
|
||||||
|
|
@ -81,7 +81,7 @@ pytest>=8.0.0
|
||||||
pytest-asyncio>=0.21.0
|
pytest-asyncio>=0.21.0
|
||||||
|
|
||||||
## Configuration Validation
|
## Configuration Validation
|
||||||
jsonschema>=4.0.0 # Required for chatbot workflow config validation
|
jsonschema>=4.0.0 # Required for feature/workflow config validation
|
||||||
|
|
||||||
## Streaming JSON parser (memory-safe import of large DB exports)
|
## Streaming JSON parser (memory-safe import of large DB exports)
|
||||||
ijson>=3.2.0
|
ijson>=3.2.0
|
||||||
|
|
@ -123,11 +123,4 @@ extract-msg>=0.55.0
|
||||||
pyproj>=3.6.0 # For coordinate transformations (EPSG:2056 <-> EPSG:4326)
|
pyproj>=3.6.0 # For coordinate transformations (EPSG:2056 <-> EPSG:4326)
|
||||||
shapely>=2.0.0 # For geometric operations (intersections, area calculations)
|
shapely>=2.0.0 # For geometric operations (intersections, area calculations)
|
||||||
geopandas>=0.14.0 # For reading and querying GeoPackage files
|
geopandas>=0.14.0 # For reading and querying GeoPackage files
|
||||||
fiona>=1.9.0 # Required by geopandas for reading GeoPackage files
|
fiona>=1.9.0 # Required by geopandas for reading GeoPackage files
|
||||||
|
|
||||||
## LangChain & LangGraph for chatbot workflow
|
|
||||||
langchain==1.2.8
|
|
||||||
langchain-core==1.2.8
|
|
||||||
langgraph==1.0.7
|
|
||||||
langchain-tavily==0.2.17
|
|
||||||
nest-asyncio>=1.6.0 # For running async code in sync context (LangGraph compatibility)
|
|
||||||
|
|
@ -11,7 +11,6 @@ erstellt: <dateiname>_structure.json
|
||||||
Datenbanken:
|
Datenbanken:
|
||||||
- poweron_app (User, Mandate, RBAC, Features, etc.)
|
- poweron_app (User, Mandate, RBAC, Features, etc.)
|
||||||
- poweron_chat (Chat-Konversationen und Nachrichten)
|
- poweron_chat (Chat-Konversationen und Nachrichten)
|
||||||
- poweron_chatbot (Chatbot-Feature: Konversationen, Nachrichten, Logs)
|
|
||||||
- poweron_management (Workflows, Prompts, Connections, etc.)
|
- poweron_management (Workflows, Prompts, Connections, etc.)
|
||||||
- poweron_realestate (Real Estate Daten)
|
- poweron_realestate (Real Estate Daten)
|
||||||
- poweron_trustee (Trustee Daten)
|
- poweron_trustee (Trustee Daten)
|
||||||
|
|
@ -104,7 +103,6 @@ ALL_DATABASES = [
|
||||||
"poweron_app",
|
"poweron_app",
|
||||||
"poweron_billing",
|
"poweron_billing",
|
||||||
"poweron_chat",
|
"poweron_chat",
|
||||||
"poweron_chatbot",
|
|
||||||
"poweron_commcoach",
|
"poweron_commcoach",
|
||||||
"poweron_knowledge",
|
"poweron_knowledge",
|
||||||
"poweron_management",
|
"poweron_management",
|
||||||
|
|
@ -122,7 +120,6 @@ ALL_DATABASES = [
|
||||||
DATABASE_CONFIG = {
|
DATABASE_CONFIG = {
|
||||||
"poweron_app": "DB_APP",
|
"poweron_app": "DB_APP",
|
||||||
"poweron_chat": "DB_CHAT",
|
"poweron_chat": "DB_CHAT",
|
||||||
"poweron_chatbot": "DB_CHATBOT",
|
|
||||||
"poweron_management": "DB_MANAGEMENT",
|
"poweron_management": "DB_MANAGEMENT",
|
||||||
"poweron_realestate": "DB_REALESTATE",
|
"poweron_realestate": "DB_REALESTATE",
|
||||||
"poweron_trustee": "DB_TRUSTEE",
|
"poweron_trustee": "DB_TRUSTEE",
|
||||||
|
|
@ -772,7 +769,6 @@ def main():
|
||||||
Datenbanken:
|
Datenbanken:
|
||||||
poweron_app - User, Mandate, RBAC, Features
|
poweron_app - User, Mandate, RBAC, Features
|
||||||
poweron_chat - Chat-Konversationen
|
poweron_chat - Chat-Konversationen
|
||||||
poweron_chatbot - Chatbot-Feature
|
|
||||||
poweron_management - Workflows, Prompts, Connections
|
poweron_management - Workflows, Prompts, Connections
|
||||||
poweron_realestate - Real Estate Daten
|
poweron_realestate - Real Estate Daten
|
||||||
poweron_trustee - Trustee Daten
|
poweron_trustee - Trustee Daten
|
||||||
|
|
@ -781,7 +777,7 @@ Beispiele:
|
||||||
python script_db_export_migration.py
|
python script_db_export_migration.py
|
||||||
python script_db_export_migration.py --pretty
|
python script_db_export_migration.py --pretty
|
||||||
python script_db_export_migration.py -o backup.json --pretty
|
python script_db_export_migration.py -o backup.json --pretty
|
||||||
python script_db_export_migration.py --db poweron_app,poweron_chat,poweron_chatbot
|
python script_db_export_migration.py --db poweron_app,poweron_chat,poweron_management
|
||||||
python script_db_export_migration.py --exclude Token,AuthEvent --include-meta
|
python script_db_export_migration.py --exclude Token,AuthEvent --include-meta
|
||||||
python script_db_export_migration.py --summary
|
python script_db_export_migration.py --summary
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -1,101 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Initialize poweron_chatbot database for the Chatbot feature.
|
|
||||||
|
|
||||||
Creates the poweron_chatbot database if it does not exist.
|
|
||||||
Uses DB_CHATBOT_* config (falls back to DB_*).
|
|
||||||
Tables (ChatbotConversation, ChatbotMessage, ChatbotDocument, ChatbotLog) are
|
|
||||||
auto-created by the connector on first use.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
python script_db_init_chatbot.py [--dry-run]
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import argparse
|
|
||||||
import logging
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
scriptPath = Path(__file__).resolve()
|
|
||||||
gatewayPath = scriptPath.parent.parent
|
|
||||||
sys.path.insert(0, str(gatewayPath))
|
|
||||||
os.chdir(str(gatewayPath))
|
|
||||||
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
import psycopg2
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
|
||||||
|
|
||||||
DB_NAME = "poweron_chatbot"
|
|
||||||
CONFIG_PREFIX = "DB_CHATBOT"
|
|
||||||
|
|
||||||
|
|
||||||
def _get_config():
|
|
||||||
"""Get DB config: DB_CHATBOT_* with fallback to DB_*."""
|
|
||||||
host = APP_CONFIG.get(f"{CONFIG_PREFIX}_HOST") or APP_CONFIG.get("DB_HOST", "localhost")
|
|
||||||
port = int(APP_CONFIG.get(f"{CONFIG_PREFIX}_PORT") or APP_CONFIG.get("DB_PORT", "5432"))
|
|
||||||
user = APP_CONFIG.get(f"{CONFIG_PREFIX}_USER") or APP_CONFIG.get("DB_USER")
|
|
||||||
password = (
|
|
||||||
APP_CONFIG.get(f"{CONFIG_PREFIX}_PASSWORD_SECRET")
|
|
||||||
or APP_CONFIG.get(f"{CONFIG_PREFIX}_PASSWORD")
|
|
||||||
or APP_CONFIG.get("DB_PASSWORD_SECRET")
|
|
||||||
or APP_CONFIG.get("DB_PASSWORD")
|
|
||||||
)
|
|
||||||
return {"host": host, "port": port, "user": user, "password": password}
|
|
||||||
|
|
||||||
|
|
||||||
def init_chatbot_db(dry_run: bool = False) -> bool:
|
|
||||||
"""Create poweron_chatbot database if it does not exist."""
|
|
||||||
config = _get_config()
|
|
||||||
if not config["user"] or not config["password"]:
|
|
||||||
logger.error("DB_USER and DB_PASSWORD (or DB_CHATBOT_*) required")
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
conn = psycopg2.connect(
|
|
||||||
host=config["host"],
|
|
||||||
port=config["port"],
|
|
||||||
database="postgres",
|
|
||||||
user=config["user"],
|
|
||||||
password=config["password"],
|
|
||||||
)
|
|
||||||
conn.autocommit = True
|
|
||||||
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.execute(
|
|
||||||
"SELECT 1 FROM pg_database WHERE datname = %s",
|
|
||||||
(DB_NAME,),
|
|
||||||
)
|
|
||||||
exists = cur.fetchone() is not None
|
|
||||||
|
|
||||||
if exists:
|
|
||||||
logger.info(f"Database {DB_NAME} already exists")
|
|
||||||
else:
|
|
||||||
if dry_run:
|
|
||||||
logger.info(f"[DRY-RUN] Would create database {DB_NAME}")
|
|
||||||
else:
|
|
||||||
cur.execute(f'CREATE DATABASE "{DB_NAME}"')
|
|
||||||
logger.info(f"Created database {DB_NAME}")
|
|
||||||
|
|
||||||
conn.close()
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to init {DB_NAME}: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description="Initialize poweron_chatbot database")
|
|
||||||
parser.add_argument("--dry-run", action="store_true", help="Do not create, only report")
|
|
||||||
args = parser.parse_args()
|
|
||||||
ok = init_chatbot_db(dry_run=args.dry_run)
|
|
||||||
sys.exit(0 if ok else 1)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
@ -47,7 +47,6 @@ ALL_DATABASES = [
|
||||||
"poweron_billing",
|
"poweron_billing",
|
||||||
"poweron_workspace",
|
"poweron_workspace",
|
||||||
"poweron_graphicaleditor",
|
"poweron_graphicaleditor",
|
||||||
"poweron_chatbot",
|
|
||||||
"poweron_trustee",
|
"poweron_trustee",
|
||||||
"poweron_commcoach",
|
"poweron_commcoach",
|
||||||
"poweron_neutralization",
|
"poweron_neutralization",
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,5 @@ pytest tests/demo/test_demo_uc1_trustee.py -v
|
||||||
| `test_demo_bootstrap.py` | Idempotent load/remove, mandates, user, features, RMA, neutralization |
|
| `test_demo_bootstrap.py` | Idempotent load/remove, mandates, user, features, RMA, neutralization |
|
||||||
| `test_demo_uc1_trustee.py` | Trustee instances, RMA config, system workflow templates |
|
| `test_demo_uc1_trustee.py` | Trustee instances, RMA config, system workflow templates |
|
||||||
| `test_demo_uc2_realestate.py` | Workspace instances for agent demo |
|
| `test_demo_uc2_realestate.py` | Workspace instances for agent demo |
|
||||||
| `test_demo_uc3_chatbot.py` | Chatbot instance, knowledge-base files |
|
|
||||||
| `test_demo_uc4_i18n.py` | i18n readiness, Spanish not pre-installed |
|
| `test_demo_uc4_i18n.py` | i18n readiness, Spanish not pre-installed |
|
||||||
| `test_demo_neutralization.py` | Neutralization config enabled, test PDF exists |
|
| `test_demo_neutralization.py` | Neutralization config enabled, test PDF exists |
|
||||||
|
|
|
||||||
|
|
@ -60,19 +60,6 @@ class TestDemoBootstrap:
|
||||||
instances = _getFeatureInstances(db, mid, featureCode)
|
instances = _getFeatureInstances(db, mid, featureCode)
|
||||||
assert len(instances) >= 1, f"Feature '{featureCode}' missing in Alpina Treuhand AG"
|
assert len(instances) >= 1, f"Feature '{featureCode}' missing in Alpina Treuhand AG"
|
||||||
|
|
||||||
def test_alpinaNoChatbot(self, db, mandateAlpina):
|
|
||||||
"""Alpina should NOT have a chatbot instance."""
|
|
||||||
mid = mandateAlpina.get("id")
|
|
||||||
instances = _getFeatureInstances(db, mid, "chatbot")
|
|
||||||
assert len(instances) == 0, "Alpina Treuhand should not have chatbot"
|
|
||||||
|
|
||||||
def test_happylifeNoChatbot(self, db, mandateHappylife):
|
|
||||||
"""HappyLife also should NOT have a chatbot instance — chatbot was
|
|
||||||
removed from the InvestorDemo on 2026-04-20 (see changelog)."""
|
|
||||||
mid = mandateHappylife.get("id")
|
|
||||||
instances = _getFeatureInstances(db, mid, "chatbot")
|
|
||||||
assert len(instances) == 0, "HappyLife should no longer have chatbot (removed 2026-04-20)"
|
|
||||||
|
|
||||||
|
|
||||||
class TestDemoBootstrapRma:
|
class TestDemoBootstrapRma:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,39 +0,0 @@
|
||||||
"""
|
|
||||||
T-UC3: Knowledge Chatbot.
|
|
||||||
|
|
||||||
The chatbot feature instance was removed from the InvestorDemo on
|
|
||||||
2026-04-20 (see changelog) — neither HappyLife nor Alpina bootstrap a
|
|
||||||
chatbot today; the actual RAG demo runs via workspace. We still verify
|
|
||||||
the knowledge-base demo files are present and that the bootstrap does
|
|
||||||
NOT (re)create chatbot instances in either mandate.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
from pathlib import Path
|
|
||||||
from tests.demo.conftest import _getFeatureInstances
|
|
||||||
|
|
||||||
|
|
||||||
class TestChatbotSetup:
|
|
||||||
|
|
||||||
def test_chatbotNotInHappylife(self, db, mandateHappylife):
|
|
||||||
"""HappyLife should NOT have a chatbot instance (removed 2026-04-20)."""
|
|
||||||
mid = mandateHappylife.get("id")
|
|
||||||
instances = _getFeatureInstances(db, mid, "chatbot")
|
|
||||||
assert len(instances) == 0, "HappyLife should no longer bootstrap a chatbot instance"
|
|
||||||
|
|
||||||
def test_chatbotNotInAlpina(self, db, mandateAlpina):
|
|
||||||
"""Alpina should NOT have a chatbot instance."""
|
|
||||||
mid = mandateAlpina.get("id")
|
|
||||||
instances = _getFeatureInstances(db, mid, "chatbot")
|
|
||||||
assert len(instances) == 0, "Alpina should not have chatbot"
|
|
||||||
|
|
||||||
|
|
||||||
class TestKnowledgeBaseFiles:
|
|
||||||
|
|
||||||
def test_knowledgeBaseFilesExist(self):
|
|
||||||
"""Knowledge-base documents must exist in demoData."""
|
|
||||||
kbDir = Path(__file__).resolve().parent.parent.parent / "demoData" / "knowledge-base"
|
|
||||||
assert kbDir.exists(), f"knowledge-base dir not found at {kbDir}"
|
|
||||||
files = list(kbDir.iterdir())
|
|
||||||
docs = [f for f in files if f.suffix in (".md", ".html", ".pdf", ".docx", ".txt")]
|
|
||||||
assert len(docs) >= 3, f"Expected at least 3 knowledge-base docs, found {len(docs)}: {[f.name for f in docs]}"
|
|
||||||
154
tests/serviceGeneration/test_large_document_render.py
Normal file
154
tests/serviceGeneration/test_large_document_render.py
Normal file
|
|
@ -0,0 +1,154 @@
|
||||||
|
# Copyright (c) 2026 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""A3 / AC15: lazy file-reference image resolution for large documents.
|
||||||
|
|
||||||
|
Block images may carry only a ``fileId`` (no embedded base64). The renderer
|
||||||
|
fetches the bytes on demand via an injected ``_imageResolver``, so the document
|
||||||
|
JSON never holds every image's bytes simultaneously. These tests exercise the
|
||||||
|
PDF and DOCX block-image paths directly (real reportlab / python-docx), without
|
||||||
|
the full GenerationService/services plumbing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import io
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle
|
||||||
|
from modules.serviceCenter.services.serviceGeneration.renderers.rendererPdf import RendererPdf
|
||||||
|
from modules.serviceCenter.services.serviceGeneration.renderers.rendererDocx import RendererDocx
|
||||||
|
|
||||||
|
|
||||||
|
def _tinyPng() -> bytes:
|
||||||
|
from PIL import Image as PILImage
|
||||||
|
|
||||||
|
buf = io.BytesIO()
|
||||||
|
PILImage.new("RGB", (8, 8), (200, 30, 30)).save(buf, format="PNG")
|
||||||
|
return buf.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
class _ResolverSpy:
|
||||||
|
"""Records the fileIds it was asked for and returns fixed PNG bytes."""
|
||||||
|
|
||||||
|
def __init__(self, data: bytes):
|
||||||
|
self._data = data
|
||||||
|
self.calls = []
|
||||||
|
|
||||||
|
def __call__(self, fileId):
|
||||||
|
self.calls.append(fileId)
|
||||||
|
return self._data
|
||||||
|
|
||||||
|
|
||||||
|
def _imageElement(fileId: str):
|
||||||
|
return {"type": "image", "content": {"fileId": fileId, "altText": "Image"}}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Base helper ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_base_helper_resolves_bytes_to_base64():
|
||||||
|
png = _tinyPng()
|
||||||
|
r = RendererPdf()
|
||||||
|
r._imageResolver = _ResolverSpy(png)
|
||||||
|
b64 = r._lazyResolveImageBase64({"fileId": "img-1"})
|
||||||
|
import base64 as _b64
|
||||||
|
assert _b64.b64decode(b64) == png
|
||||||
|
|
||||||
|
|
||||||
|
def test_base_helper_without_resolver_returns_empty():
|
||||||
|
r = RendererPdf()
|
||||||
|
assert r._lazyResolveImageBase64({"fileId": "img-1"}) == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_base_helper_without_fileid_returns_empty():
|
||||||
|
r = RendererPdf()
|
||||||
|
r._imageResolver = _ResolverSpy(_tinyPng())
|
||||||
|
assert r._lazyResolveImageBase64({"altText": "no id"}) == ""
|
||||||
|
|
||||||
|
|
||||||
|
# ── DOCX block images ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_docx_lazy_resolves_block_images():
|
||||||
|
from docx import Document
|
||||||
|
|
||||||
|
spy = _ResolverSpy(_tinyPng())
|
||||||
|
r = RendererDocx()
|
||||||
|
r._imageResolver = spy
|
||||||
|
|
||||||
|
doc = Document()
|
||||||
|
n = 5
|
||||||
|
elements = [_imageElement(f"img-{i}") for i in range(n)]
|
||||||
|
for el in elements:
|
||||||
|
r._renderJsonImage(doc, el, {})
|
||||||
|
|
||||||
|
# All N images embedded, resolver hit once per image, in order.
|
||||||
|
assert len(doc.inline_shapes) == n
|
||||||
|
assert spy.calls == [f"img-{i}" for i in range(n)]
|
||||||
|
# Stays lazy: the source JSON never gained embedded base64.
|
||||||
|
assert all("base64Data" not in el["content"] for el in elements)
|
||||||
|
|
||||||
|
|
||||||
|
def test_docx_without_resolver_embeds_no_image():
|
||||||
|
from docx import Document
|
||||||
|
|
||||||
|
r = RendererDocx() # no _imageResolver
|
||||||
|
doc = Document()
|
||||||
|
r._renderJsonImage(doc, _imageElement("img-1"), {})
|
||||||
|
assert len(doc.inline_shapes) == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ── PDF block images ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_pdf_lazy_resolves_block_image():
|
||||||
|
from reportlab.platypus import Image as ReportLabImage
|
||||||
|
|
||||||
|
spy = _ResolverSpy(_tinyPng())
|
||||||
|
r = RendererPdf()
|
||||||
|
r._imageResolver = spy
|
||||||
|
styles = r._convertUnifiedStyleToInternal(resolveStyle(None))
|
||||||
|
|
||||||
|
el = _imageElement("img-1")
|
||||||
|
flowables = r._renderJsonImage(el, styles)
|
||||||
|
|
||||||
|
assert spy.calls == ["img-1"]
|
||||||
|
assert any(isinstance(f, ReportLabImage) for f in flowables)
|
||||||
|
assert "base64Data" not in el["content"]
|
||||||
|
r._cleanupTempImageFiles()
|
||||||
|
|
||||||
|
|
||||||
|
def test_pdf_without_resolver_returns_placeholder():
|
||||||
|
from reportlab.platypus import Image as ReportLabImage
|
||||||
|
|
||||||
|
r = RendererPdf() # no _imageResolver
|
||||||
|
styles = r._convertUnifiedStyleToInternal(resolveStyle(None))
|
||||||
|
flowables = r._renderJsonImage(_imageElement("img-1"), styles)
|
||||||
|
assert not any(isinstance(f, ReportLabImage) for f in flowables)
|
||||||
|
|
||||||
|
|
||||||
|
def test_pdf_images_are_file_backed_and_cleaned():
|
||||||
|
"""Step 2: PDF image flowables read from temp files (not in-memory streams),
|
||||||
|
so image bytes are not all resident at build time. Temp files are cleaned up."""
|
||||||
|
import os
|
||||||
|
from reportlab.platypus import Image as ReportLabImage
|
||||||
|
|
||||||
|
spy = _ResolverSpy(_tinyPng())
|
||||||
|
r = RendererPdf()
|
||||||
|
r._imageResolver = spy
|
||||||
|
r._tempImageFiles = []
|
||||||
|
styles = r._convertUnifiedStyleToInternal(resolveStyle(None))
|
||||||
|
|
||||||
|
n = 4
|
||||||
|
images = []
|
||||||
|
for i in range(n):
|
||||||
|
flowables = r._renderJsonImage(_imageElement(f"img-{i}"), styles)
|
||||||
|
images += [f for f in flowables if isinstance(f, ReportLabImage)]
|
||||||
|
|
||||||
|
assert len(images) == n
|
||||||
|
# Each image is file-backed by a tracked temp file that currently exists on disk.
|
||||||
|
assert len(r._tempImageFiles) == n
|
||||||
|
for img in images:
|
||||||
|
assert img.filename in r._tempImageFiles
|
||||||
|
assert os.path.exists(img.filename)
|
||||||
|
|
||||||
|
paths = list(r._tempImageFiles)
|
||||||
|
r._cleanupTempImageFiles()
|
||||||
|
assert all(not os.path.exists(p) for p in paths)
|
||||||
|
assert r._tempImageFiles == []
|
||||||
146
tests/serviceGeneration/test_layout_primitives.py
Normal file
146
tests/serviceGeneration/test_layout_primitives.py
Normal file
|
|
@ -0,0 +1,146 @@
|
||||||
|
# Copyright (c) 2026 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""A3: layout primitives (cover_page, image_grid).
|
||||||
|
|
||||||
|
Covers the markdown authoring conventions (fenced ```cover_page / ```image_grid
|
||||||
|
blocks) and the PDF/DOCX renderer handlers, using real reportlab / python-docx.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle
|
||||||
|
from modules.serviceCenter.services.serviceGeneration.renderers.rendererPdf import RendererPdf
|
||||||
|
from modules.serviceCenter.services.serviceGeneration.renderers.rendererDocx import RendererDocx
|
||||||
|
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
|
||||||
|
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||||
|
|
||||||
|
|
||||||
|
def _tinyPngB64() -> str:
|
||||||
|
from PIL import Image as PILImage
|
||||||
|
|
||||||
|
buf = io.BytesIO()
|
||||||
|
PILImage.new("RGB", (8, 8), (30, 120, 200)).save(buf, format="PNG")
|
||||||
|
return base64.b64encode(buf.getvalue()).decode("ascii")
|
||||||
|
|
||||||
|
|
||||||
|
def _imgContent():
|
||||||
|
return {"base64Data": _tinyPngB64(), "altText": "Pic"}
|
||||||
|
|
||||||
|
|
||||||
|
# ── datamodel ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_layout_types_are_registered():
|
||||||
|
assert "cover_page" in supportedSectionTypes
|
||||||
|
assert "image_grid" in supportedSectionTypes
|
||||||
|
|
||||||
|
|
||||||
|
# ── markdown authoring ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_markdown_parses_cover_page_block():
|
||||||
|
md = (
|
||||||
|
"```cover_page\n"
|
||||||
|
"title: Klageschrift\n"
|
||||||
|
"subtitle: In Sachen A gegen B\n"
|
||||||
|
"author: Dr. Muster\n"
|
||||||
|
"date: 2026-06-02\n"
|
||||||
|
"logo: file:logo-1\n"
|
||||||
|
"```\n"
|
||||||
|
)
|
||||||
|
doc = markdownToDocumentJson(md, "T")
|
||||||
|
sections = doc["documents"][0]["sections"]
|
||||||
|
assert len(sections) == 1
|
||||||
|
sec = sections[0]
|
||||||
|
assert sec["content_type"] == "cover_page"
|
||||||
|
content = sec["elements"][0]["content"]
|
||||||
|
assert content["title"] == "Klageschrift"
|
||||||
|
assert content["subtitle"] == "In Sachen A gegen B"
|
||||||
|
assert content["author"] == "Dr. Muster"
|
||||||
|
assert content["date"] == "2026-06-02"
|
||||||
|
assert content["_fileRef"] == "logo-1"
|
||||||
|
|
||||||
|
|
||||||
|
def test_markdown_parses_image_grid_block():
|
||||||
|
md = (
|
||||||
|
"```image_grid\n"
|
||||||
|
"columns: 3\n"
|
||||||
|
'\n'
|
||||||
|
"file:img-2\n"
|
||||||
|
"https://example.com/x.png\n"
|
||||||
|
"```\n"
|
||||||
|
)
|
||||||
|
doc = markdownToDocumentJson(md, "T")
|
||||||
|
sec = doc["documents"][0]["sections"][0]
|
||||||
|
assert sec["content_type"] == "image_grid"
|
||||||
|
content = sec["elements"][0]["content"]
|
||||||
|
assert content["columns"] == 3
|
||||||
|
assert len(content["images"]) == 3
|
||||||
|
assert content["images"][0]["_fileRef"] == "img-1"
|
||||||
|
assert content["images"][0]["widthPt"] == 200
|
||||||
|
assert content["images"][1]["_fileRef"] == "img-2"
|
||||||
|
assert content["images"][2]["_srcUrl"] == "https://example.com/x.png"
|
||||||
|
|
||||||
|
|
||||||
|
def test_markdown_plain_code_block_still_works():
|
||||||
|
md = "```python\nprint('x')\n```\n"
|
||||||
|
doc = markdownToDocumentJson(md, "T")
|
||||||
|
sec = doc["documents"][0]["sections"][0]
|
||||||
|
assert sec["content_type"] == "code_block"
|
||||||
|
assert sec["elements"][0]["content"]["language"] == "python"
|
||||||
|
|
||||||
|
|
||||||
|
# ── PDF handlers ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_pdf_cover_page_emits_page_break():
|
||||||
|
from reportlab.platypus import PageBreak
|
||||||
|
|
||||||
|
r = RendererPdf()
|
||||||
|
styles = r._convertUnifiedStyleToInternal(resolveStyle(None))
|
||||||
|
el = {"content": {"title": "Report", "subtitle": "2026", "author": "PM"}}
|
||||||
|
flowables = r._renderCoverPage(el, styles)
|
||||||
|
assert any(isinstance(f, PageBreak) for f in flowables)
|
||||||
|
assert len(flowables) >= 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_pdf_image_grid_builds_table():
|
||||||
|
from reportlab.platypus import Table
|
||||||
|
|
||||||
|
r = RendererPdf()
|
||||||
|
r._tempImageFiles = []
|
||||||
|
styles = r._convertUnifiedStyleToInternal(resolveStyle(None))
|
||||||
|
el = {"content": {"columns": 2, "images": [_imgContent(), _imgContent(), _imgContent()]}}
|
||||||
|
flowables = r._renderImageGrid(el, styles)
|
||||||
|
tables = [f for f in flowables if isinstance(f, Table)]
|
||||||
|
assert len(tables) == 1
|
||||||
|
# 3 images, 2 columns -> 2 rows (last cell padded).
|
||||||
|
assert len(tables[0]._cellvalues) == 2
|
||||||
|
r._cleanupTempImageFiles()
|
||||||
|
|
||||||
|
|
||||||
|
# ── DOCX handlers ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_docx_cover_page_renders_and_breaks():
|
||||||
|
from docx import Document
|
||||||
|
|
||||||
|
r = RendererDocx()
|
||||||
|
doc = Document()
|
||||||
|
before = len(doc.paragraphs)
|
||||||
|
r._renderCoverPage(doc, {"content": {"title": "Klageschrift", "author": "Dr. M"}}, {})
|
||||||
|
# New paragraphs were added (spacers + title + author) and a page break exists.
|
||||||
|
assert len(doc.paragraphs) > before
|
||||||
|
xml = doc.element.xml
|
||||||
|
assert "w:br" in xml and 'type="page"' in xml
|
||||||
|
|
||||||
|
|
||||||
|
def test_docx_image_grid_builds_table_with_pictures():
|
||||||
|
from docx import Document
|
||||||
|
|
||||||
|
r = RendererDocx()
|
||||||
|
doc = Document()
|
||||||
|
r._renderImageGrid(doc, {"content": {"columns": 2, "images": [_imgContent(), _imgContent(), _imgContent()]}}, {})
|
||||||
|
assert len(doc.tables) == 1
|
||||||
|
table = doc.tables[0]
|
||||||
|
assert len(table.columns) == 2
|
||||||
|
assert len(table.rows) == 2
|
||||||
|
assert len(doc.inline_shapes) == 3
|
||||||
|
|
@ -1,7 +1,12 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
import pytest
|
import pytest
|
||||||
from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle, DEFAULT_STYLE
|
from modules.serviceCenter.services.serviceGeneration.styleDefaults import (
|
||||||
|
resolveStyle,
|
||||||
|
resolveTheme,
|
||||||
|
DEFAULT_STYLE,
|
||||||
|
THEME_PRESETS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_none_returns_defaults():
|
def test_resolve_none_returns_defaults():
|
||||||
|
|
@ -44,3 +49,45 @@ def test_override_document_title_partial_merge():
|
||||||
assert result["documentTitle"]["sizePt"] == 32
|
assert result["documentTitle"]["sizePt"] == 32
|
||||||
assert result["documentTitle"]["align"] == "center"
|
assert result["documentTitle"]["align"] == "center"
|
||||||
assert result["headings"]["h1"]["sizePt"] == DEFAULT_STYLE["headings"]["h1"]["sizePt"]
|
assert result["headings"]["h1"]["sizePt"] == DEFAULT_STYLE["headings"]["h1"]["sizePt"]
|
||||||
|
|
||||||
|
|
||||||
|
# ── Theme presets (A3) ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_resolve_theme_unknown_is_empty():
|
||||||
|
assert resolveTheme(None) == {}
|
||||||
|
assert resolveTheme("does-not-exist") == {}
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_theme_case_insensitive():
|
||||||
|
assert resolveTheme("FINANCE") == THEME_PRESETS["finance"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_general_theme_equals_defaults():
|
||||||
|
assert resolveStyle(None, "general") == DEFAULT_STYLE
|
||||||
|
|
||||||
|
|
||||||
|
def test_theme_applies_preset_over_defaults():
|
||||||
|
result = resolveStyle(None, "legal")
|
||||||
|
# legal preset changes the primary font to a serif and justifies body text
|
||||||
|
assert result["fonts"]["primary"] == "Times New Roman"
|
||||||
|
assert result["paragraph"]["align"] == "justify"
|
||||||
|
# untouched keys still come from DEFAULT_STYLE
|
||||||
|
assert result["page"]["format"] == DEFAULT_STYLE["page"]["format"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_explicit_style_overrides_theme():
|
||||||
|
# theme sets finance green; explicit style must win
|
||||||
|
result = resolveStyle({"colors": {"primary": "#FF0000"}}, "finance")
|
||||||
|
assert result["colors"]["primary"] == "#FF0000"
|
||||||
|
# non-overridden theme key still applies
|
||||||
|
assert result["table"]["headerBg"] == THEME_PRESETS["finance"]["table"]["headerBg"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_marketing_theme_enlarges_title_and_images():
|
||||||
|
result = resolveStyle(None, "marketing")
|
||||||
|
assert result["documentTitle"]["sizePt"] == 34
|
||||||
|
assert result["image"]["defaultWidthPt"] == 540
|
||||||
|
|
||||||
|
|
||||||
|
def test_unknown_theme_falls_back_to_defaults():
|
||||||
|
assert resolveStyle(None, "rainbow") == DEFAULT_STYLE
|
||||||
|
|
|
||||||
|
|
@ -264,7 +264,6 @@ def _instantiateMethod(methodCls):
|
||||||
("modules.workflows.methods.methodFile.methodFile", "MethodFile"),
|
("modules.workflows.methods.methodFile.methodFile", "MethodFile"),
|
||||||
("modules.workflows.methods.methodContext.methodContext", "MethodContext"),
|
("modules.workflows.methods.methodContext.methodContext", "MethodContext"),
|
||||||
("modules.workflows.methods.methodJira.methodJira", "MethodJira"),
|
("modules.workflows.methods.methodJira.methodJira", "MethodJira"),
|
||||||
("modules.workflows.methods.methodChatbot.methodChatbot", "MethodChatbot"),
|
|
||||||
])
|
])
|
||||||
def test_methodSignaturesAreHealthy(modulePath, className):
|
def test_methodSignaturesAreHealthy(modulePath, className):
|
||||||
"""Each shipping Method's _actions must validate against the catalog."""
|
"""Each shipping Method's _actions must validate against the catalog."""
|
||||||
|
|
|
||||||
149
tests/unit/serviceAgent/test_field_neutralization.py
Normal file
149
tests/unit/serviceAgent/test_field_neutralization.py
Normal file
|
|
@ -0,0 +1,149 @@
|
||||||
|
# Copyright (c) 2026 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""A2: type/inheritance-aware field neutralization for feature source data.
|
||||||
|
|
||||||
|
Tests the pure policy logic in
|
||||||
|
``modules.serviceCenter.services.serviceAgent.featureDataProvider._neutralizeAndSerializeRows``
|
||||||
|
with a fake NeutralizationService (no DB, no real LLM).
|
||||||
|
|
||||||
|
Rules under test:
|
||||||
|
1. strings -> substring-neutralized when effective (explicit OR inherited)
|
||||||
|
2. binary -> dropped when neutralization applies
|
||||||
|
3. other scalars -> neutralized ONLY when the field flag is explicit
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from modules.serviceCenter.services.serviceAgent.featureDataProvider import (
|
||||||
|
_neutralizeAndSerializeRows,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeNeutralizer:
|
||||||
|
"""Deterministic stand-in: replaces the token 'SECRET' with a placeholder.
|
||||||
|
|
||||||
|
Records every text it was asked to process so tests can assert that the
|
||||||
|
field name was passed as a type hint (prefix ``"<field>: "``).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.seen = []
|
||||||
|
|
||||||
|
async def processTextAsync(self, text, fileId=None):
|
||||||
|
self.seen.append(text)
|
||||||
|
neutralized = text.replace("SECRET", "[name.00000000-0000-0000-0000-000000000000]")
|
||||||
|
return {"neutralized_text": neutralized}
|
||||||
|
|
||||||
|
|
||||||
|
def _run(rows, policy, neutralizer):
|
||||||
|
return asyncio.run(_neutralizeAndSerializeRows(rows, policy, neutralizer))
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_substring_neutralized_when_inherited():
|
||||||
|
"""AC7: tableActive (inherited), empty explicitFields -> strings substring-neutralized."""
|
||||||
|
fake = _FakeNeutralizer()
|
||||||
|
rows = [{"id": "row-1", "note": "Patient SECRET has a fever"}]
|
||||||
|
policy = {"tableActive": True, "explicitFields": set()}
|
||||||
|
out = _run(rows, policy, fake)
|
||||||
|
assert out[0]["note"] == "Patient [name.00000000-0000-0000-0000-000000000000] has a fever"
|
||||||
|
# surrounding text preserved (NOT whole-value replaced)
|
||||||
|
assert out[0]["note"].startswith("Patient ")
|
||||||
|
# id is structural -> never neutralized
|
||||||
|
assert out[0]["id"] == "row-1"
|
||||||
|
|
||||||
|
|
||||||
|
def test_field_name_passed_as_type_hint():
|
||||||
|
"""AC8: the field name is prepended as a type hint and stripped from the result."""
|
||||||
|
fake = _FakeNeutralizer()
|
||||||
|
rows = [{"id": "row-1", "lastName": "SECRET"}]
|
||||||
|
policy = {"tableActive": True, "explicitFields": set()}
|
||||||
|
out = _run(rows, policy, fake)
|
||||||
|
assert out[0]["lastName"] == "[name.00000000-0000-0000-0000-000000000000]"
|
||||||
|
assert any(t.startswith("lastName: ") for t in fake.seen)
|
||||||
|
|
||||||
|
|
||||||
|
def test_binary_dropped_when_active():
|
||||||
|
"""AC9: binary columns are dropped (not masked) when neutralization applies."""
|
||||||
|
fake = _FakeNeutralizer()
|
||||||
|
rows = [{"id": "row-1", "photo": b"\x89PNG\x00\x01", "note": "ok"}]
|
||||||
|
policy = {"tableActive": True, "explicitFields": set()}
|
||||||
|
out = _run(rows, policy, fake)
|
||||||
|
assert "photo" not in out[0]
|
||||||
|
assert out[0]["id"] == "row-1"
|
||||||
|
|
||||||
|
|
||||||
|
def test_numeric_unchanged_when_only_inherited():
|
||||||
|
"""AC10: numeric/date fields are NOT neutralized via inheritance."""
|
||||||
|
fake = _FakeNeutralizer()
|
||||||
|
rows = [{"id": "row-1", "amount": 4200, "bookedAt": datetime.date(2026, 1, 2)}]
|
||||||
|
policy = {"tableActive": True, "explicitFields": set()}
|
||||||
|
out = _run(rows, policy, fake)
|
||||||
|
assert out[0]["amount"] == 4200
|
||||||
|
assert out[0]["bookedAt"] == "2026-01-02"
|
||||||
|
|
||||||
|
|
||||||
|
def test_numeric_neutralized_when_explicit():
|
||||||
|
"""AC11: numeric field neutralized (whole-value placeholder) when explicitly flagged."""
|
||||||
|
fake = _FakeNeutralizer()
|
||||||
|
rows = [{"id": "row-1", "amount": 4200}]
|
||||||
|
policy = {"tableActive": False, "explicitFields": {"amount"}}
|
||||||
|
out = _run(rows, policy, fake)
|
||||||
|
assert out[0]["amount"].startswith("[NEUT.amount.")
|
||||||
|
assert out[0]["amount"].endswith("]")
|
||||||
|
|
||||||
|
|
||||||
|
def test_failsafe_redacts_when_no_neutralizer():
|
||||||
|
"""No engine available but neutralization required -> fail closed ([REDACTED])."""
|
||||||
|
rows = [{"id": "row-1", "note": "Patient SECRET"}]
|
||||||
|
policy = {"tableActive": True, "explicitFields": set()}
|
||||||
|
out = _run(rows, policy, None)
|
||||||
|
assert out[0]["note"] == "[REDACTED]"
|
||||||
|
|
||||||
|
|
||||||
|
def test_dedup_across_rows_single_call_per_unique_value():
|
||||||
|
"""Identical (field,value) pairs are neutralized once and reused."""
|
||||||
|
fake = _FakeNeutralizer()
|
||||||
|
rows = [
|
||||||
|
{"id": "a", "note": "SECRET"},
|
||||||
|
{"id": "b", "note": "SECRET"},
|
||||||
|
]
|
||||||
|
policy = {"tableActive": True, "explicitFields": set()}
|
||||||
|
out = _run(rows, policy, fake)
|
||||||
|
assert out[0]["note"] == out[1]["note"]
|
||||||
|
# 'note: SECRET' appears exactly once in the engine call log (deduped)
|
||||||
|
assert fake.seen.count("note: SECRET") == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_inactive_table_leaves_strings_untouched():
|
||||||
|
"""No tableActive and no explicit fields -> strings pass through unchanged."""
|
||||||
|
fake = _FakeNeutralizer()
|
||||||
|
rows = [{"id": "row-1", "note": "Patient SECRET"}]
|
||||||
|
policy = {"tableActive": False, "explicitFields": set()}
|
||||||
|
out = _run(rows, policy, fake)
|
||||||
|
assert out[0]["note"] == "Patient SECRET"
|
||||||
|
assert fake.seen == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_rag_bootstrap_parity_with_query_path():
|
||||||
|
"""AC12: the RAG bootstrap serializes the SAME neutralized rows the query path returns.
|
||||||
|
|
||||||
|
Both paths run rows through ``_neutralizeAndSerializeRows``; the bootstrap then
|
||||||
|
turns the finalized row into embedding text via ``_serializeRowToText``. This
|
||||||
|
asserts no raw secret leaks into the index text and structural fields are skipped.
|
||||||
|
"""
|
||||||
|
from modules.serviceCenter.services.serviceKnowledge.subFeatureBootstrap import _serializeRowToText
|
||||||
|
|
||||||
|
fake = _FakeNeutralizer()
|
||||||
|
rows = [{"id": "row-1", "sysCreatedAt": "2026-01-01", "note": "Patient SECRET has a fever"}]
|
||||||
|
policy = {"tableActive": True, "explicitFields": set()}
|
||||||
|
|
||||||
|
finalized = _run(rows, policy, fake) # identical to the sub-agent query path
|
||||||
|
indexText = _serializeRowToText(finalized[0])
|
||||||
|
|
||||||
|
# Neutralized placeholder is indexed, raw secret is not.
|
||||||
|
assert "[name.00000000-0000-0000-0000-000000000000]" in indexText
|
||||||
|
assert "SECRET" not in indexText
|
||||||
|
# Structural fields are excluded from the embedding text.
|
||||||
|
assert "id:" not in indexText
|
||||||
|
assert "sysCreatedAt" not in indexText
|
||||||
|
|
@ -27,7 +27,6 @@ def _ds(idVal: str, path: str, **flags) -> dict:
|
||||||
"path": path,
|
"path": path,
|
||||||
"neutralize": None,
|
"neutralize": None,
|
||||||
"ragIndexEnabled": None,
|
"ragIndexEnabled": None,
|
||||||
"scope": None,
|
|
||||||
}
|
}
|
||||||
base.update(flags)
|
base.update(flags)
|
||||||
return base
|
return base
|
||||||
|
|
@ -97,15 +96,6 @@ class TestEffectiveFlagWalk(unittest.TestCase):
|
||||||
leaf = _ds("l", "/anything/anywhere")
|
leaf = _ds("l", "/anything/anywhere")
|
||||||
self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, leaf]))
|
self.assertTrue(_inheritFlags.getEffectiveFlag(leaf, "neutralize", [root, leaf]))
|
||||||
|
|
||||||
def test_scope_inheritance_with_string_default(self):
|
|
||||||
root = _ds("r", "/", scope="mandate")
|
|
||||||
leaf = _ds("l", "/folder")
|
|
||||||
self.assertEqual(_inheritFlags.getEffectiveFlag(leaf, "scope", [root, leaf]), "mandate")
|
|
||||||
|
|
||||||
def test_scope_default_personal_when_empty(self):
|
|
||||||
leaf = _ds("l", "/folder")
|
|
||||||
self.assertEqual(_inheritFlags.getEffectiveFlag(leaf, "scope", [leaf]), "personal")
|
|
||||||
|
|
||||||
def test_unknown_flag_raises(self):
|
def test_unknown_flag_raises(self):
|
||||||
leaf = _ds("l", "/")
|
leaf = _ds("l", "/")
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
|
|
@ -158,20 +148,6 @@ class TestEffectiveFlagAggregate(unittest.TestCase):
|
||||||
allDs = [root, child1, child2]
|
allDs = [root, child1, child2]
|
||||||
self.assertEqual(_inheritFlags.getEffectiveFlag(root, "neutralize", allDs, mode="aggregate"), "mixed")
|
self.assertEqual(_inheritFlags.getEffectiveFlag(root, "neutralize", allDs, mode="aggregate"), "mixed")
|
||||||
|
|
||||||
def test_mixed_scope(self):
|
|
||||||
root = _ds("r", "/", scope="personal")
|
|
||||||
child1 = _ds("c1", "/a", scope="team")
|
|
||||||
child2 = _ds("c2", "/b") # inherits personal from root
|
|
||||||
allDs = [root, child1, child2]
|
|
||||||
self.assertEqual(_inheritFlags.getEffectiveFlag(root, "scope", allDs, mode="aggregate"), "mixed")
|
|
||||||
|
|
||||||
def test_all_scope_same_explicit_returns_concrete(self):
|
|
||||||
root = _ds("r", "/", scope="team")
|
|
||||||
child1 = _ds("c1", "/a", scope="team")
|
|
||||||
child2 = _ds("c2", "/b") # inherits team
|
|
||||||
allDs = [root, child1, child2]
|
|
||||||
self.assertEqual(_inheritFlags.getEffectiveFlag(root, "scope", allDs, mode="aggregate"), "team")
|
|
||||||
|
|
||||||
def test_connection_root_aggregate_cross_sourcetype(self):
|
def test_connection_root_aggregate_cross_sourcetype(self):
|
||||||
connRoot = _ds("conn", "/", sourceType="msft", neutralize=True)
|
connRoot = _ds("conn", "/", sourceType="msft", neutralize=True)
|
||||||
spExplicit = _ds("sp", "/", sourceType="sharepointFolder", neutralize=False)
|
spExplicit = _ds("sp", "/", sourceType="sharepointFolder", neutralize=False)
|
||||||
|
|
@ -531,20 +507,20 @@ class TestBuildEffectiveByWorkspaceFds(unittest.TestCase):
|
||||||
|
|
||||||
class TestResolveEffectiveForPath(unittest.TestCase):
|
class TestResolveEffectiveForPath(unittest.TestCase):
|
||||||
def test_with_exact_record(self):
|
def test_with_exact_record(self):
|
||||||
root = _ds("r", "/", neutralize=True, scope="mandate", ragIndexEnabled=False)
|
root = _ds("r", "/", neutralize=True, ragIndexEnabled=False)
|
||||||
leaf = _ds("l", "/folder/sub", neutralize=False)
|
leaf = _ds("l", "/folder/sub", neutralize=False)
|
||||||
allDs = [root, leaf]
|
allDs = [root, leaf]
|
||||||
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/folder/sub", allDs)
|
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/folder/sub", allDs)
|
||||||
self.assertEqual(result["effectiveNeutralize"], False)
|
self.assertEqual(result["effectiveNeutralize"], False)
|
||||||
self.assertEqual(result["effectiveScope"], "mandate")
|
self.assertNotIn("effectiveScope", result)
|
||||||
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
||||||
|
|
||||||
def test_without_record_inherits_from_ancestor(self):
|
def test_without_record_inherits_from_ancestor(self):
|
||||||
root = _ds("r", "/", neutralize=True, scope="mandate", ragIndexEnabled=True)
|
root = _ds("r", "/", neutralize=True, ragIndexEnabled=True)
|
||||||
allDs = [root]
|
allDs = [root]
|
||||||
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/deep/path/file.txt", allDs)
|
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/deep/path/file.txt", allDs)
|
||||||
self.assertEqual(result["effectiveNeutralize"], True)
|
self.assertEqual(result["effectiveNeutralize"], True)
|
||||||
self.assertEqual(result["effectiveScope"], "mandate")
|
self.assertNotIn("effectiveScope", result)
|
||||||
self.assertEqual(result["effectiveRagIndexEnabled"], True)
|
self.assertEqual(result["effectiveRagIndexEnabled"], True)
|
||||||
|
|
||||||
def test_without_record_inherits_from_closest_ancestor(self):
|
def test_without_record_inherits_from_closest_ancestor(self):
|
||||||
|
|
@ -559,7 +535,7 @@ class TestResolveEffectiveForPath(unittest.TestCase):
|
||||||
allDs: list = []
|
allDs: list = []
|
||||||
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/path", allDs)
|
result = _inheritFlags.resolveEffectiveForPath("conn-1", "sharepointFolder", "/path", allDs)
|
||||||
self.assertEqual(result["effectiveNeutralize"], False)
|
self.assertEqual(result["effectiveNeutralize"], False)
|
||||||
self.assertEqual(result["effectiveScope"], "personal")
|
self.assertNotIn("effectiveScope", result)
|
||||||
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
self.assertEqual(result["effectiveRagIndexEnabled"], False)
|
||||||
|
|
||||||
def test_connection_root_covers_service_subtree(self):
|
def test_connection_root_covers_service_subtree(self):
|
||||||
|
|
@ -620,14 +596,15 @@ class TestResolveEffectiveForFds(unittest.TestCase):
|
||||||
result = _inheritFlags.resolveEffectiveForFds("fi-1", "*", None, allFds, mode="aggregate")
|
result = _inheritFlags.resolveEffectiveForFds("fi-1", "*", None, allFds, mode="aggregate")
|
||||||
self.assertEqual(result["effectiveRagIndexEnabled"], "mixed")
|
self.assertEqual(result["effectiveRagIndexEnabled"], "mixed")
|
||||||
|
|
||||||
def test_inheritable_fds_flags_excludes_scope(self):
|
def test_inheritable_flags_and_fds_flags(self):
|
||||||
self.assertIn("ragIndexEnabled", _inheritFlags._INHERITABLE_FDS_FLAGS)
|
self.assertIn("ragIndexEnabled", _inheritFlags._INHERITABLE_FDS_FLAGS)
|
||||||
self.assertIn("neutralize", _inheritFlags._INHERITABLE_FDS_FLAGS)
|
self.assertIn("neutralize", _inheritFlags._INHERITABLE_FDS_FLAGS)
|
||||||
self.assertNotIn("scope", _inheritFlags._INHERITABLE_FDS_FLAGS)
|
self.assertNotIn("scope", _inheritFlags._INHERITABLE_FDS_FLAGS)
|
||||||
|
self.assertNotIn("scope", _inheritFlags._INHERITABLE_FLAGS)
|
||||||
|
|
||||||
|
|
||||||
# ===========================================================================
|
# ===========================================================================
|
||||||
# FDS cascade resets RAG (in addition to neutralize and scope)
|
# FDS cascade resets RAG (in addition to neutralize)
|
||||||
# ===========================================================================
|
# ===========================================================================
|
||||||
|
|
||||||
class TestCascadeResetFdsRag(unittest.TestCase):
|
class TestCascadeResetFdsRag(unittest.TestCase):
|
||||||
|
|
@ -678,15 +655,6 @@ class TestVirtualCoordAggregate(unittest.TestCase):
|
||||||
)
|
)
|
||||||
self.assertEqual(result["effectiveNeutralize"], "mixed")
|
self.assertEqual(result["effectiveNeutralize"], "mixed")
|
||||||
|
|
||||||
def test_virtual_folder_mixed_scope(self):
|
|
||||||
child1 = _ds("c1", "/virtual/a", scope="mandate")
|
|
||||||
child2 = _ds("c2", "/virtual/b", scope="personal")
|
|
||||||
allDs = [child1, child2]
|
|
||||||
result = _inheritFlags.resolveEffectiveForPath(
|
|
||||||
"conn-1", "sharepointFolder", "/virtual", allDs, mode="aggregate",
|
|
||||||
)
|
|
||||||
self.assertEqual(result["effectiveScope"], "mixed")
|
|
||||||
|
|
||||||
def test_virtual_folder_mixed_rag(self):
|
def test_virtual_folder_mixed_rag(self):
|
||||||
child1 = _ds("c1", "/virtual/a", ragIndexEnabled=True)
|
child1 = _ds("c1", "/virtual/a", ragIndexEnabled=True)
|
||||||
child2 = _ds("c2", "/virtual/b", ragIndexEnabled=False)
|
child2 = _ds("c2", "/virtual/b", ragIndexEnabled=False)
|
||||||
|
|
|
||||||
|
|
@ -46,10 +46,10 @@ class TestSupportsFlag(unittest.TestCase):
|
||||||
self.assertFalse(n.supportsFlag("scope"))
|
self.assertFalse(n.supportsFlag("scope"))
|
||||||
self.assertFalse(n.supportsFlag("ragIndexEnabled"))
|
self.assertFalse(n.supportsFlag("ragIndexEnabled"))
|
||||||
|
|
||||||
def test_connection_supports_all_three(self):
|
def test_connection_supports_neutralize_and_rag(self):
|
||||||
n = ConnectionNode("c1", "msft", label="m", parentKey="personalRoot", rec=None)
|
n = ConnectionNode("c1", "msft", label="m", parentKey="personalRoot", rec=None)
|
||||||
self.assertTrue(n.supportsFlag("neutralize"))
|
self.assertTrue(n.supportsFlag("neutralize"))
|
||||||
self.assertTrue(n.supportsFlag("scope"))
|
self.assertFalse(n.supportsFlag("scope"))
|
||||||
self.assertTrue(n.supportsFlag("ragIndexEnabled"))
|
self.assertTrue(n.supportsFlag("ragIndexEnabled"))
|
||||||
|
|
||||||
def test_fds_table_supports_neutralize_and_rag_but_not_scope(self):
|
def test_fds_table_supports_neutralize_and_rag_but_not_scope(self):
|
||||||
|
|
@ -137,7 +137,7 @@ class TestGetEffectiveFlag(unittest.TestCase):
|
||||||
def test_ds_walk_inherits_from_authority_root(self):
|
def test_ds_walk_inherits_from_authority_root(self):
|
||||||
root = {
|
root = {
|
||||||
"id": "r", "connectionId": "c", "sourceType": "msft", "path": "/",
|
"id": "r", "connectionId": "c", "sourceType": "msft", "path": "/",
|
||||||
"userId": "user-1", "neutralize": True, "scope": None, "ragIndexEnabled": None,
|
"userId": "user-1", "neutralize": True, "ragIndexEnabled": None,
|
||||||
}
|
}
|
||||||
node = FolderNode(
|
node = FolderNode(
|
||||||
connectionId="c", service="sharepoint", sourceType="sharepointFolder",
|
connectionId="c", service="sharepoint", sourceType="sharepointFolder",
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue