gateway/modules/features/chatbot/chatbot.py
Ida Dittrich 6dc2afafb9 fix:performance improvements
- app.py: Pre-warm AI connectors at module load and in lifespan
- aicoreModelRegistry.py: Connector discovery cache, getAvailableModels cache, bulk RBAC, eager prewarm
- connectorDbPostgre.py: Connector cache, contextvars for userId, eviction (max 32)
- chatbot: Uses _get_cached_connector, Service center integration, BillingService exceptions, BillingService exceptions instead of direct imports
- interfaceDbApp.py: Uses _get_cached_connector
- interfaceDbManagement.py: Uses _get_cached_connector
- security/rbac.py: Adds checkResourceAccessBulk
2026-03-06 13:46:54 +01:00

1297 lines
61 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Chatbot domain logic."""
import contextvars
import re
import logging
import threading
from dataclasses import dataclass, field
from typing import Annotated, AsyncIterator, Any, List, Optional, TYPE_CHECKING
from pydantic import BaseModel
from langchain_core.messages import (
AIMessage,
BaseMessage,
HumanMessage,
SystemMessage,
ToolMessage,
trim_messages,
)
from langgraph.graph.message import add_messages
from langgraph.graph import StateGraph, START, END
from langgraph.graph.state import CompiledStateGraph
from modules.features.chatbot.bridges.ai import AICenterChatModel
from modules.features.chatbot.bridges.memory import (
CheckpointerResolver,
DatabaseCheckpointer,
set_checkpointer,
reset_checkpointer,
)
from modules.features.chatbot.bridges.tools import (
create_sql_query_tool,
create_tavily_search_tool,
create_send_streaming_message_tool,
)
from modules.services.serviceStreaming import ChatStreamingHelper
from modules.datamodels.datamodelUam import User
if TYPE_CHECKING:
from modules.features.chatbot.config import ChatbotConfig
logger = logging.getLogger(__name__)
def _tool_output_to_markdown_table(raw: str) -> str:
"""
Convert sqlite_query tool output to a markdown table for deterministic display.
Reduces model hallucination by providing a ready-to-copy table.
Format: "Query returned N rows:\\nColumns: A, B, C\\n1. A: x, B: y, C: z\\n..."
"""
if not raw or not raw.strip():
return raw
lines = [ln.strip() for ln in raw.strip().split("\n") if ln.strip()]
if len(lines) < 2:
return raw
# Parse header
row_count_line = lines[0] # "Query returned 20 rows:"
cols_line = next((ln for ln in lines if ln.lower().startswith("columns:")), None)
if not cols_line:
return raw
headers = [h.strip() for h in cols_line.replace("Columns:", "").split(",")]
if not headers:
return raw
# Parse data rows (1. Col: val, Col: val)
rows = []
for ln in lines:
if re.match(r"^\d+\.\s+", ln):
rest = re.sub(r"^\d+\.\s+", "", ln)
row = {}
for part in rest.split(", "):
if ": " in part:
k, v = part.split(": ", 1)
row[k.strip()] = str(v).strip()
if row:
rows.append([row.get(h, "") for h in headers])
if not rows:
return raw
# Build markdown table
sep = " | "
header_row = sep.join(headers)
div_row = sep.join(["---"] * len(headers))
data_rows = [sep.join(str(c) for c in r) for r in rows]
table = "\n".join([header_row, div_row] + data_rows)
suffix = ""
if "(Showing first" in raw or "of " in raw:
m = re.search(r"\(Showing first (\d+) of (\d+) rows\)", raw)
if m:
suffix = f"\n\nZeige {m.group(1)} von {m.group(2)} Artikeln."
return f"{row_count_line}\n\n{table}{suffix}"
def _sanitize_llm_response(text: str) -> str:
"""Strip chat template tokens and trailing junk that some models leak."""
if not text or not isinstance(text, str):
return text or ""
for sentinel in ("<|im_start|>", "<|im_end|>", "<|endoftext|>", "<|user|>", "<|assistant|>"):
if sentinel in text:
text = text.split(sentinel)[0]
return text.strip()
# Natural language markers to split system prompt into context sections
_SPLIT_MARKERS = {
"schema_start": "Die Datenbank enthält",
"schema_start_alt": "Die Datenbank enthält die Tabellen",
"response_structure_start": "Antwortstruktur ist strikt",
"response_structure_alt": "Antwortstruktur:",
"response_structure_fallback": "Antwortstruktur",
}
def _split_system_prompt(prompt: str) -> dict:
"""
Split system prompt by natural language section markers.
Returns: {intro, schema, response_structure}
- intro: Role, tools, general instructions (before schema)
- schema: Database tables, SQL rules, column definitions (for SQL generation)
- response_structure: Mandatory answer format (Einleitungssatz, Tabelle, etc.)
"""
if not prompt or not isinstance(prompt, str):
return {"intro": "", "schema": "", "response_structure": ""}
text = prompt.strip()
intro_end = len(text)
schema_start_idx = -1
schema_end = len(text)
response_start_idx = -1
# Find schema start
for marker in (_SPLIT_MARKERS["schema_start"], _SPLIT_MARKERS["schema_start_alt"]):
idx = text.find(marker)
if idx >= 0:
schema_start_idx = idx
intro_end = idx
break
# Find response structure start
for marker in (
_SPLIT_MARKERS["response_structure_start"],
_SPLIT_MARKERS["response_structure_alt"],
_SPLIT_MARKERS["response_structure_fallback"],
):
idx = text.find(marker)
if idx >= 0:
response_start_idx = idx
schema_end = idx if schema_start_idx >= 0 else len(text)
break
intro = text[:intro_end].strip() if intro_end > 0 else ""
schema = (
text[schema_start_idx:schema_end].strip()
if schema_start_idx >= 0 and schema_end > schema_start_idx
else ""
)
response_structure = (
text[response_start_idx:].strip()
if response_start_idx >= 0
else ""
)
# Fallback: if no markers found, use full prompt for intro
if not intro and not schema and not response_structure:
intro = text
elif not response_structure and intro:
response_structure = intro # Use intro's format hints as fallback
return {"intro": intro, "schema": schema, "response_structure": response_structure}
class ChatState(BaseModel):
"""Represents the state of a chat session."""
messages: Annotated[List[BaseMessage], add_messages]
plan: Optional[str] = None # Planner routing: "SQL", "TAVILY", "BOTH", "NONE"
@dataclass
class ChatbotGraphContext:
"""Per-request context for cached graph execution. Nodes read model/tools from here."""
model: AICenterChatModel
planner_model: AICenterChatModel
tools: List[Any]
tools_by_name: dict
sql_tool: Any
tavily_tool: Any
streaming_tool: Any
prompt_sections: dict
system_prompt: str
_graph_context: contextvars.ContextVar[Optional[ChatbotGraphContext]] = contextvars.ContextVar(
"chatbot_graph_context", default=None
)
def _get_graph_context() -> ChatbotGraphContext:
ctx = _graph_context.get()
if ctx is None:
raise RuntimeError(
"ChatbotGraphContext not set. Ensure graph context is set before invoking cached graph."
)
return ctx
def _set_graph_context(ctx: ChatbotGraphContext) -> contextvars.Token:
return _graph_context.set(ctx)
def _reset_graph_context(token: contextvars.Token) -> None:
"""Reset graph context. Safe when called from a different async context (e.g. generator cleanup)."""
try:
_graph_context.reset(token)
except ValueError:
# Token was created in a different context (e.g. after yield, generator cleanup)
pass
# Cached compiled graph; lock for thread-safe cache access
_compiled_graph_cache: Optional[CompiledStateGraph] = None
_compiled_graph_lock = threading.Lock()
def _get_or_build_cached_graph() -> CompiledStateGraph:
"""Return cached compiled graph or build and cache it. Thread-safe."""
global _compiled_graph_cache
with _compiled_graph_lock:
if _compiled_graph_cache is not None:
return _compiled_graph_cache
_compiled_graph_cache = _build_cached_graph()
logger.info("Chatbot: compiled graph cached for reuse")
return _compiled_graph_cache
def _build_cached_graph() -> CompiledStateGraph:
"""Build the chatbot graph with context-resolved nodes and CheckpointerResolver."""
checkpointer = CheckpointerResolver()
PLANNER_SYSTEM = (
"Du bist ein Assistent. Antworte NUR mit einem Wort: SQL, TAVILY, BOTH oder NONE.\n"
"SQL = Fragen zu Lager, Bestand, Artikel, Preisen, wie viele, Anzahl (Datenbankabfrage).\n"
"TAVILY = Internetsuche, Produktinfos außerhalb der DB, Markttrends.\n"
"BOTH = beides nötig. NONE = nur Begrüßung oder Danksagung, keine Daten nötig.\n"
"Beispiele: 'wie viele X auf Lager' -> SQL, 'Infos zu Produkt Y' -> TAVILY."
)
SCHEMA_TRUNCATION_SUFFIX = (
"\n\n[... Schema gekürzt. Wichtige Tabellen: Artikel, Lagerplatz_Artikel, Einkaufspreis, Lagerplatz. "
"Artikel-Spalte: a.\"Artikelbezeichnung\". "
"JOIN: Artikel a, Lagerplatz_Artikel l ON a.I_ID = l.R_ARTIKEL, Lagerplatz lp ON l.R_LAGERPLATZ = lp.I_ID.]"
)
SQL_PLAN_SUFFIX = (
"\n\n--- AUSGABEFORMAT (PFLICHT) ---\n"
"Antworte NUR mit einer SQL SELECT-Abfrage in diesem Format:\n"
"```sql\nDEINE_SQL_QUERY\n```\n"
"KRITISCH bei 'wie viele X auf Lager': Liefere ARTIKELZEILEN (Artikelnummer, Artikelbezeichnung, Bestand) "
"mit LIMIT 20, NICHT nur SELECT COUNT(*). Ohne Detailzeilen kann keine Tabelle angezeigt werden. "
"Gesamtanzahl optional via Unterabfrage im SELECT."
)
FORMULATE_TASK = (
"\n\n--- AKTUELLE AUFGABE ---\n"
"Du erhältst eine Benutzerfrage und die exakten Datenbankergebnisse. "
"KRITISCH: Nutze NUR die gelieferten Daten. Erfinde NIEMALS Daten (keine LED-A01, LED Rot, etc.). "
"Wenn die Ergebnisse NUR eine Zahl enthalten (z.B. '1. COUNT(*): 806'): Reportiere NUR diese Zahl, KEINE erfundene Tabelle. "
"Eine Tabelle darf NUR erstellt werden, wenn echte Zeilen '1. Spalte: Wert, ...' in den Daten stehen. "
"Beachte die obige ANTWORTSTRUKTUR."
)
bytes_per_token = 3
reserved_tokens = 3000
_SQL_KEYWORDS = (
"lager", "bestand", "artikel", "wie viele", "anzahl", "preis",
"lieferant", "lieferanten", "bestellen", "verfügbar", "inventar"
)
def _get_context_length(ctx: ChatbotGraphContext) -> int:
if hasattr(ctx.model, "_selected_model") and ctx.model._selected_model:
return getattr(ctx.model._selected_model, "contextLength", 128000)
return 128000
def _truncate_system_prompt(full_prompt: str, max_chars: int, suffix: str = "") -> str:
if len(full_prompt) <= max_chars:
return full_prompt
return full_prompt[: max_chars - len(suffix)] + suffix
async def planner_node(state: ChatState) -> dict:
ctx = _get_graph_context()
human_msgs = [m for m in state.messages if isinstance(m, HumanMessage)]
last_human = human_msgs[-1].content if human_msgs else ""
window = [SystemMessage(content=PLANNER_SYSTEM), HumanMessage(content=last_human)]
plan = "SQL"
try:
response = await ctx.planner_model.ainvoke(window)
except ValueError as exc:
if "No suitable model found" in str(exc):
logger.warning(f"Planner model selection failed: {exc}")
return {"plan": plan}
raise
content = (response.content or "").strip().upper()
for keyword in ("SQL", "TAVILY", "BOTH", "NONE"):
if keyword in content:
plan = keyword
break
return {"plan": plan}
def route_by_plan(state: ChatState) -> str:
ctx = _get_graph_context()
plan = (state.plan or "SQL").upper()
if plan == "NONE" and ctx.sql_tool:
last_user = ""
for m in reversed(state.messages):
if isinstance(m, HumanMessage):
last_user = (m.content or "").lower()
break
if any(kw in last_user for kw in _SQL_KEYWORDS):
logger.info("Planner returned NONE but user asked inventory question - routing to SQL")
plan = "SQL"
if plan in ("SQL", "BOTH") and ctx.sql_tool:
return "agent_sql_plan"
if plan == "TAVILY" and ctx.tavily_tool:
return "agent_tavily"
return "agent_answer"
def select_window(ctx: ChatbotGraphContext, msgs: List[BaseMessage], max_tokens_override: Optional[int] = None) -> List[BaseMessage]:
def approx_counter(items: List[BaseMessage]) -> int:
return sum(len(getattr(m, "content", "") or "") for m in items)
max_tokens = max_tokens_override or _get_context_length(ctx)
return trim_messages(
msgs,
strategy="last",
token_counter=approx_counter,
max_tokens=int(max_tokens * 0.8),
start_on="human",
end_on=("human", "tool"),
include_system=True,
)
async def _agent_common(state: ChatState, system_content: str, llm: Any, node_name: str) -> dict:
ctx = _get_graph_context()
msgs = select_window(ctx, state.messages)
if not msgs or not isinstance(msgs[0], SystemMessage):
window = [SystemMessage(content=system_content)] + msgs
else:
window = [SystemMessage(content=system_content)] + [m for m in msgs if not isinstance(m, SystemMessage)]
try:
response = await llm.ainvoke(window)
except ValueError as exc:
if "No suitable model found" in str(exc):
logger.warning(f"{node_name} model selection failed: {exc}")
response = AIMessage(
content="Es tut mir leid, derzeit steht kein passendes KI-Modell für diese Anfrage zur Verfügung. "
"Bitte versuchen Sie es später erneut oder wenden Sie sich an den Administrator."
)
else:
raise
return {"messages": [response]}
def _parse_sql_from_content(content: str) -> Optional[str]:
if not content:
return None
match = re.search(r"```(?:sql)?\s*([\s\S]*?)```", content)
if match:
sql = match.group(1).strip()
if sql and sql.upper().strip().startswith("SELECT"):
return sql
for line in content.split("\n"):
line = line.strip()
if line.upper().startswith("SELECT"):
return line
return None
def _sanitize_sql_typos(sql: str) -> str:
if not sql:
return sql
sql = re.sub(r"WHEN([A-Za-z_][A-Za-z0-9_.\"]*)", r"WHEN \1", sql, flags=re.IGNORECASE)
sql = re.sub(r"\bLAGerplatz_Artikel\b", "Lagerplatz_Artikel", sql)
sql = re.sub(r"\bLAGerplatz\b", "Lagerplatz", sql)
sql = sql.replace('"Einkaufspreis_neu"', '"Einkaufspreis"')
sql = sql.replace("Einkaufspreis_neu.", "Einkaufspreis.")
sql = re.sub(r'"Einkaufspreis"\."ARTIKEL"', '"Einkaufspreis"."m_Artikel"', sql)
return sql
async def agent_sql_plan_node(state: ChatState) -> dict:
ctx = _get_graph_context()
ctx_len = _get_context_length(ctx)
max_system_chars = max(1000, int(ctx_len * 0.8 - reserved_tokens) * bytes_per_token) - len(SQL_PLAN_SUFFIX)
schema_part = ctx.prompt_sections.get("schema") or ctx.prompt_sections.get("intro", "")
intro_part = (ctx.prompt_sections.get("intro", "") or "")[:400]
combined = f"{intro_part}\n\n{schema_part}" if intro_part else schema_part
system_content = _truncate_system_prompt(combined, max_system_chars, SCHEMA_TRUNCATION_SUFFIX) + SQL_PLAN_SUFFIX
llm = ctx.model
return await _agent_common(state, system_content, llm, "agent_sql_plan")
async def parse_execute_sql_node(state: ChatState) -> dict:
ctx = _get_graph_context()
sql_t = ctx.sql_tool
last_msg = state.messages[-1] if state.messages else None
if not isinstance(last_msg, AIMessage):
return {"messages": [ToolMessage(content="Fehler: Keine AI-Antwort zum Parsen.", tool_call_id="parse_0", name="sqlite_query")]}
sql = _parse_sql_from_content(last_msg.content or "")
if not sql or not sql_t:
return {"messages": [ToolMessage(content="Konnte keine SQL-Abfrage aus der Antwort extrahieren.", tool_call_id="parse_0", name="sqlite_query")]}
sql = _sanitize_sql_typos(sql)
try:
result = await sql_t.ainvoke({"query": sql})
except Exception as e:
logger.error(f"SQL execution failed: {e}")
result = f"Fehler bei der Ausführung: {e}"
return {"messages": [ToolMessage(content=str(result), tool_call_id="parse_0", name="sqlite_query")]}
async def agent_formulate_node(state: ChatState) -> dict:
ctx = _get_graph_context()
human_content = ""
tool_content = ""
for m in state.messages:
if isinstance(m, HumanMessage):
human_content = m.content or ""
if isinstance(m, ToolMessage) and getattr(m, "name", "") == "sqlite_query":
tool_content = m.content or ""
if not tool_content or not tool_content.strip():
logger.warning("agent_formulate: no tool_content (sqlite_query) in state.messages")
return {"messages": [AIMessage(content="Die Datenbankabfrage konnte keine Ergebnisse liefern. Bitte versuchen Sie es mit einer anderen Formulierung.")]}
if "Query failed" in tool_content or tool_content.strip().startswith("Error"):
err_summary = "Die Datenbankabfrage ist fehlgeschlagen."
if "no such column" in tool_content:
err_summary += " Ein Spaltenname scheint nicht zu passen. Bitte die Anfrage anders formulieren."
return {"messages": [AIMessage(content=err_summary)]}
formatted_data = _tool_output_to_markdown_table(tool_content)
logger.debug(f"agent_formulate: tool_content length={len(tool_content)}, formatted={len(formatted_data)}")
ctx_len = _get_context_length(ctx)
max_system_chars = max(3000, int(ctx_len * 0.5) * bytes_per_token) - len(FORMULATE_TASK)
resp_struct = ctx.prompt_sections.get("response_structure") or ctx.prompt_sections.get("intro", "")
intro_formulate = ctx.prompt_sections.get("intro", "")
combined = f"{intro_formulate}\n\n{resp_struct}" if intro_formulate != resp_struct else resp_struct
if len(combined) + len(FORMULATE_TASK) > max_system_chars:
combined = _truncate_system_prompt(combined, max_system_chars - len(FORMULATE_TASK), "")
system_content = combined + FORMULATE_TASK
prompt = (
f"Benutzerfrage: {human_content}\n\n"
"--- VORGEGEBENE DATEN (diese Tabelle/Zahlen UNVERÄNDERT in die Antwort übernehmen): ---\n"
f"{formatted_data}\n\n"
"Die obige Tabelle bzw. Zahlen sind die EINZIGEN erlaubten Daten. Kopiere sie 1:1. "
"Berechne keine eigenen Summen/Anzahlen - nutze die gelieferten Werte. Formuliere die Antwort:"
)
window = [SystemMessage(content=system_content), HumanMessage(content=prompt)]
try:
response = await ctx.model.ainvoke(window)
except ValueError as exc:
if "No suitable model found" in str(exc):
response = AIMessage(content="Es gab einen Fehler bei der Formulierung. Bitte versuchen Sie es erneut.")
else:
raise
if response.content:
response = AIMessage(content=_sanitize_llm_response(response.content))
return {"messages": [response]}
async def agent_tavily_node(state: ChatState) -> dict:
ctx = _get_graph_context()
resp_struct = ctx.prompt_sections.get("response_structure") or ""
intro_tavily = ctx.prompt_sections.get("intro", "")
combined = f"{intro_tavily}\n\n{resp_struct}" if resp_struct else intro_tavily
system_content = _truncate_system_prompt(combined, 6000, "")
tools_tavily = [t for t in [ctx.tavily_tool, ctx.streaming_tool] if t is not None]
llm_tavily = ctx.model.bind_tools(tools=tools_tavily) if tools_tavily else ctx.model
return await _agent_common(state, system_content, llm_tavily, "agent_tavily")
async def agent_answer_node(state: ChatState) -> dict:
ctx = _get_graph_context()
resp_struct = ctx.prompt_sections.get("response_structure") or ""
intro_answer = ctx.prompt_sections.get("intro", "")
combined = f"{intro_answer}\n\n{resp_struct}" if resp_struct else intro_answer
system_content = _truncate_system_prompt(combined, 6000, "")
llm = ctx.planner_model if ctx.planner_model else ctx.model
return await _agent_common(state, system_content, llm, "agent_answer")
def should_continue_tavily(state: ChatState) -> str:
last = state.messages[-1]
return "tools" if getattr(last, "tool_calls", None) else END
def route_back(state: ChatState) -> str:
ctx = _get_graph_context()
return "agent_tavily" if ctx.tavily_tool else "agent_answer"
async def tools_with_retry(state: ChatState) -> dict:
import asyncio
ctx = _get_graph_context()
last_message = state.messages[-1]
tool_calls = getattr(last_message, "tool_calls", [])
if not tool_calls:
return {"messages": []}
tools_by_name = ctx.tools_by_name
async def execute_single_tool(tool_call):
tool_name = tool_call.get("name") or tool_call.get("function", {}).get("name")
tool_id = tool_call.get("id", f"call_{tool_name}")
args = tool_call.get("args") or tool_call.get("function", {}).get("arguments", {})
if isinstance(args, str):
import json
try:
args = json.loads(args)
except Exception:
args = {"input": args}
tool = tools_by_name.get(tool_name)
if not tool:
return ToolMessage(content=f"Error: Tool '{tool_name}' not found", tool_call_id=tool_id, name=tool_name)
try:
if hasattr(tool, "coroutine") and asyncio.iscoroutinefunction(tool.coroutine):
result = await tool.coroutine(**args)
elif hasattr(tool, "ainvoke"):
result = await tool.ainvoke(args)
else:
result = tool.invoke(args)
return ToolMessage(content=str(result), tool_call_id=tool_id, name=tool_name)
except Exception as e:
logger.error(f"Tool {tool_name} failed: {e}")
return ToolMessage(content=f"Error executing {tool_name}: {str(e)}", tool_call_id=tool_id, name=tool_name)
tool_messages = await asyncio.gather(
*[execute_single_tool(tc) for tc in tool_calls],
return_exceptions=True
)
result_messages = []
for i, msg in enumerate(tool_messages):
if isinstance(msg, Exception):
tool_call = tool_calls[i]
tool_name = tool_call.get("name", "unknown")
tool_id = tool_call.get("id", f"call_{i}")
result_messages.append(ToolMessage(content=f"Error: {str(msg)}", tool_call_id=tool_id, name=tool_name))
else:
result_messages.append(msg)
result = {"messages": result_messages}
no_results_keywords = [
"returned 0 rows", "no data", "keine artikel gefunden", "keine ergebnisse"
]
for msg in result.get("messages", []):
content = getattr(msg, "content", "")
if isinstance(content, str):
content_lower = content.lower()
if any(keyword in content_lower for keyword in no_results_keywords):
retry_count = sum(1 for m in state.messages if "retry" in str(getattr(m, "content", "")).lower())
if retry_count < 2:
logger.info("No results found in tool output, adding retry instruction")
retry_message = HumanMessage(
content="WICHTIG: Die vorherige Suche hat keine Ergebnisse gefunden. "
"Bitte versuche eine alternative Suchstrategie:\n"
"1. Wenn die Frage im Format 'X von Y' war (z.B. 'Lampen von Eaton'), "
"verwende IMMER eine Kombination aus Lieferanten-Filter (WHERE a.\"Lieferant\" LIKE '%Y%') "
"UND Produkttyp-Filter (WHERE a.\"Artikelbezeichnung\" LIKE '%X%' OR ...)\n"
"2. Verwende mehrere Synonyme für den Produkttyp (z.B. bei 'Lampen': Lampe, LED, Beleuchtung, Licht, Leuchte, Strahler)\n"
"3. Führe zuerst eine COUNT-Abfrage durch, dann die Detail-Abfrage mit Lagerbeständen\n"
"4. Verwende LIKE '%Lieferant%' für den Lieferanten-Filter, um auch Varianten zu finden"
)
result["messages"].append(retry_message)
break
return result
workflow = StateGraph(ChatState)
workflow.add_node("planner", planner_node)
workflow.add_node("agent_sql_plan", agent_sql_plan_node)
workflow.add_node("parse_execute_sql", parse_execute_sql_node)
workflow.add_node("agent_formulate", agent_formulate_node)
workflow.add_node("tools", tools_with_retry)
workflow.add_node("agent_tavily", agent_tavily_node)
workflow.add_node("agent_answer", agent_answer_node)
workflow.add_edge(START, "planner")
workflow.add_conditional_edges("planner", route_by_plan)
workflow.add_edge("agent_sql_plan", "parse_execute_sql")
workflow.add_edge("parse_execute_sql", "agent_formulate")
workflow.add_edge("agent_formulate", END)
workflow.add_conditional_edges("agent_tavily", should_continue_tavily)
workflow.add_edge("agent_answer", END)
workflow.add_conditional_edges("tools", route_back)
return workflow.compile(checkpointer=checkpointer)
@dataclass
class Chatbot:
"""Represents a chatbot."""
model: AICenterChatModel
memory: DatabaseCheckpointer
planner_model: Optional[AICenterChatModel] = None # Fast model for routing (SQL/TAVILY/NONE)
app: CompiledStateGraph = None
_tools: List[Any] = field(default_factory=list) # Configured tools (for cached graph context)
system_prompt: str = "You are a helpful assistant."
workflow_id: str = "default"
config: Optional["ChatbotConfig"] = None
_event_manager: Any = None
@classmethod
async def create(
cls,
model: AICenterChatModel,
memory: DatabaseCheckpointer,
system_prompt: str,
workflow_id: str = "default",
config: Optional["ChatbotConfig"] = None,
event_manager=None,
planner_model: Optional[AICenterChatModel] = None,
) -> "Chatbot":
"""Factory method to create and configure a Chatbot instance.
Args:
model: The chat model to use (AICenterChatModel).
memory: The chat memory to use (DatabaseCheckpointer).
system_prompt: The system prompt to initialize the chatbot.
workflow_id: The workflow ID (maps to thread_id).
config: Optional chatbot configuration for dynamic tool enablement.
event_manager: Optional event manager for streaming (passed from route).
planner_model: Optional fast model for planner/routing (default: same as model).
Returns:
A configured Chatbot instance.
"""
instance = Chatbot(
model=model,
memory=memory,
system_prompt=system_prompt,
workflow_id=workflow_id,
config=config,
_event_manager=event_manager,
planner_model=planner_model,
)
configured_tools = await instance._configure_tools()
instance._tools = configured_tools
instance.app = _get_or_build_cached_graph()
return instance
async def _configure_tools(self) -> List[Any]:
"""Configure tools for the chatbot based on config.
Returns:
List of configured tools based on config settings.
"""
tools = []
# Get tool enablement from config (use defaults if no config)
sql_enabled = True
tavily_enabled = False
streaming_enabled = True
connector_type = "preprocessor"
if self.config:
sql_enabled = self.config.tools.is_sql_enabled()
tavily_enabled = self.config.tools.is_tavily_enabled()
streaming_enabled = self.config.tools.is_streaming_enabled()
connector_type = self.config.database.connector
logger.info(f"Chatbot tools config - SQL: {sql_enabled}, Tavily: {tavily_enabled}, "
f"Streaming: {streaming_enabled}, Connector: {connector_type}")
# SQL query tool (if enabled)
if sql_enabled:
sql_tool = create_sql_query_tool(connector_type=connector_type)
tools.append(sql_tool)
logger.debug(f"Added SQL query tool with connector: {connector_type}")
# Tavily search tool (if enabled)
if tavily_enabled:
tavily_tool = create_tavily_search_tool()
tools.append(tavily_tool)
logger.debug("Added Tavily search tool")
# Streaming status tool (if enabled and event_manager available)
if streaming_enabled and self._event_manager:
send_streaming_message = create_send_streaming_message_tool(self._event_manager)
tools.append(send_streaming_message)
logger.debug("Added streaming status tool")
logger.info(f"Configured {len(tools)} tools for chatbot workflow {self.workflow_id}")
return tools
def _build_app(
self, memory: DatabaseCheckpointer, tools: List[Any]
) -> CompiledStateGraph[ChatState, None, ChatState, ChatState]:
"""Builds the chatbot application workflow using LangGraph.
Supports small context windows via planning phase and tiered prompts.
Args:
memory: The chat memory to use.
tools: The list of tools the chatbot can use.
Returns:
A compiled state graph representing the chatbot application.
"""
# Build tool subsets per agent type
tools_by_name = {t.name: t for t in tools}
sql_tool = tools_by_name.get("sqlite_query")
tavily_tool = tools_by_name.get("tavily_search")
streaming_tool = tools_by_name.get("send_streaming_message")
tools_sql = [t for t in [sql_tool, tavily_tool, streaming_tool] if t is not None]
tools_tavily = [t for t in [tavily_tool, streaming_tool] if t is not None]
llm_plain = self.model
llm_planner = self.planner_model if self.planner_model else self.model
# SQL path uses structured prompts + parse/execute (no native tool calling) - fits /api/analyze
llm_tavily = self.model.bind_tools(tools=tools_tavily) if tools_tavily else self.model
# Minimal planner prompt (~250 tokens) - fits any 8K+ model
# Explicit: Lager, Bestand, Artikel, wie viele = SQL (Datenbank)
PLANNER_SYSTEM = (
"Du bist ein Assistent. Antworte NUR mit einem Wort: SQL, TAVILY, BOTH oder NONE.\n"
"SQL = Fragen zu Lager, Bestand, Artikel, Preisen, wie viele, Anzahl (Datenbankabfrage).\n"
"TAVILY = Internetsuche, Produktinfos außerhalb der DB, Markttrends.\n"
"BOTH = beides nötig. NONE = nur Begrüßung oder Danksagung, keine Daten nötig.\n"
"Beispiele: 'wie viele X auf Lager' -> SQL, 'Infos zu Produkt Y' -> TAVILY."
)
# Truncation suffix for schema when prompt is cut
SCHEMA_TRUNCATION_SUFFIX = (
"\n\n[... Schema gekürzt. Wichtige Tabellen: Artikel, Lagerplatz_Artikel, Einkaufspreis, Lagerplatz. "
"Artikel-Spalte: a.\"Artikelbezeichnung\". "
"JOIN: Artikel a, Lagerplatz_Artikel l ON a.I_ID = l.R_ARTIKEL, Lagerplatz lp ON l.R_LAGERPLATZ = lp.I_ID.]"
)
# Structured output for /api/analyze (no tool calls): model outputs SQL in code block, we parse and execute
SQL_PLAN_SUFFIX = (
"\n\n--- AUSGABEFORMAT (PFLICHT) ---\n"
"Antworte NUR mit einer SQL SELECT-Abfrage in diesem Format:\n"
"```sql\nDEINE_SQL_QUERY\n```\n"
"KRITISCH bei 'wie viele X auf Lager': Liefere ARTIKELZEILEN (Artikelnummer, Artikelbezeichnung, Bestand) "
"mit LIMIT 20, NICHT nur SELECT COUNT(*). Ohne Detailzeilen kann keine Tabelle angezeigt werden. "
"Gesamtanzahl optional via Unterabfrage im SELECT."
)
bytes_per_token = 3 # Balanced estimate for mixed content
reserved_tokens = 3000 # Tools block + conversation overhead
def _get_context_length() -> int:
"""Get selected model's context length; pre-select if needed."""
if hasattr(self.model, "_selected_model") and self.model._selected_model:
return getattr(self.model._selected_model, "contextLength", 128000)
return 128000
def _truncate_system_prompt(full_prompt: str, max_chars: int, suffix: str = "") -> str:
"""Truncate system prompt to fit context budget."""
if len(full_prompt) <= max_chars:
return full_prompt
return full_prompt[: max_chars - len(suffix)] + suffix
# Split system prompt by natural language sections for targeted context
_prompt_sections = _split_system_prompt(self.system_prompt)
def select_window(msgs: List[BaseMessage], max_tokens_override: Optional[int] = None) -> List[BaseMessage]:
"""Selects a window of messages that fit within the context window size."""
def approx_counter(items: List[BaseMessage]) -> int:
return sum(len(getattr(m, "content", "") or "") for m in items)
max_tokens = max_tokens_override or _get_context_length()
return trim_messages(
msgs,
strategy="last",
token_counter=approx_counter,
max_tokens=int(max_tokens * 0.8),
start_on="human",
end_on=("human", "tool"),
include_system=True,
)
async def planner_node(state: ChatState) -> dict:
"""Planner: minimal prompt, no tools. Outputs SQL/TAVILY/BOTH/NONE.
Does NOT add planner message to chat - only sets state.plan for routing.
Uses llm_planner (fast model) when available for lower latency."""
human_msgs = [m for m in state.messages if isinstance(m, HumanMessage)]
last_human = human_msgs[-1].content if human_msgs else ""
window = [
SystemMessage(content=PLANNER_SYSTEM),
HumanMessage(content=last_human),
]
plan = "SQL"
try:
response = await llm_planner.ainvoke(window)
except ValueError as exc:
if "No suitable model found" in str(exc):
logger.warning(f"Planner model selection failed: {exc}")
return {"plan": plan}
raise
content = (response.content or "").strip().upper()
for keyword in ("SQL", "TAVILY", "BOTH", "NONE"):
if keyword in content:
plan = keyword
break
return {"plan": plan}
# Keywords that indicate database/inventory query - override NONE to SQL
_SQL_KEYWORDS = (
"lager", "bestand", "artikel", "wie viele", "anzahl", "preis",
"lieferant", "lieferanten", "bestellen", "verfügbar", "inventar"
)
def route_by_plan(state: ChatState) -> str:
"""Route from planner to agent_sql_plan, agent_tavily, or agent_answer."""
plan = (state.plan or "SQL").upper()
# Override NONE when user clearly asks for inventory/data (e.g. "wie viele LEDs auf Lager")
if plan == "NONE" and sql_tool:
last_user = ""
for m in reversed(state.messages):
if isinstance(m, HumanMessage):
last_user = (m.content or "").lower()
break
if any(kw in last_user for kw in _SQL_KEYWORDS):
logger.info("Planner returned NONE but user asked inventory question - routing to SQL")
plan = "SQL"
if plan in ("SQL", "BOTH") and sql_tool:
return "agent_sql_plan"
if plan == "TAVILY" and tavily_tool:
return "agent_tavily"
return "agent_answer"
async def _agent_common(
state: ChatState,
system_content: str,
llm,
node_name: str,
) -> dict:
"""Shared logic for agent nodes."""
msgs = select_window(state.messages)
if not msgs or not isinstance(msgs[0], SystemMessage):
window = [SystemMessage(content=system_content)] + msgs
else:
window = [SystemMessage(content=system_content)] + [m for m in msgs if not isinstance(m, SystemMessage)]
try:
response = await llm.ainvoke(window)
except ValueError as exc:
if "No suitable model found" in str(exc):
logger.warning(f"{node_name} model selection failed: {exc}")
response = AIMessage(
content=(
"Es tut mir leid, derzeit steht kein passendes KI-Modell für diese Anfrage zur Verfügung. "
"Bitte versuchen Sie es später erneut oder wenden Sie sich an den Administrator."
)
)
else:
raise
return {"messages": [response]}
async def agent_sql_plan_node(state: ChatState) -> dict:
"""Generate SQL. Uses schema section + minimal intro. Output: ```sql...``` for parse/execute."""
ctx_len = _get_context_length()
max_system_chars = max(1000, int(ctx_len * 0.8 - reserved_tokens) * bytes_per_token) - len(SQL_PLAN_SUFFIX)
# Prefer schema section; add short intro if space allows
schema_part = _prompt_sections["schema"] or _prompt_sections["intro"]
intro_part = _prompt_sections["intro"][:400] if _prompt_sections["intro"] else ""
combined = f"{intro_part}\n\n{schema_part}" if intro_part else schema_part
system_content = _truncate_system_prompt(
combined, max_system_chars, SCHEMA_TRUNCATION_SUFFIX
) + SQL_PLAN_SUFFIX
return await _agent_common(state, system_content, llm_plain, "agent_sql_plan")
def _parse_sql_from_content(content: str) -> Optional[str]:
"""Extract SQL from ```sql...``` or ```...``` code block. Only allows SELECT."""
if not content:
return None
match = re.search(r"```(?:sql)?\s*([\s\S]*?)```", content)
if match:
sql = match.group(1).strip()
if sql and sql.upper().strip().startswith("SELECT"):
return sql
# Fallback: find line starting with SELECT
for line in content.split("\n"):
line = line.strip()
if line.upper().startswith("SELECT"):
return line
return None
def _sanitize_sql_typos(sql: str) -> str:
"""Fix common LLM SQL typos that cause syntax errors."""
if not sql:
return sql
# Fix "CASE WHENLAGerplatz" - missing space after WHEN when followed directly by identifier
sql = re.sub(r"WHEN([A-Za-z_][A-Za-z0-9_.\"]*)", r"WHEN \1", sql, flags=re.IGNORECASE)
# Fix "LAGerplatz_Artikel" / "LAGerplatz" -> correct casing
sql = re.sub(r"\bLAGerplatz_Artikel\b", "Lagerplatz_Artikel", sql)
sql = re.sub(r"\bLAGerplatz\b", "Lagerplatz", sql)
# Preprocessor uses Einkaufspreis (not Einkaufspreis_neu) and m_Artikel (not ARTIKEL)
sql = sql.replace('"Einkaufspreis_neu"', '"Einkaufspreis"')
sql = sql.replace("Einkaufspreis_neu.", "Einkaufspreis.")
sql = re.sub(
r'"Einkaufspreis"\."ARTIKEL"',
'"Einkaufspreis"."m_Artikel"',
sql,
)
return sql
async def parse_execute_sql_node(state: ChatState) -> dict:
"""Parse SQL from last AIMessage, execute via preprocessor, add ToolMessage."""
last_msg = state.messages[-1] if state.messages else None
if not isinstance(last_msg, AIMessage):
return {"messages": [ToolMessage(content="Fehler: Keine AI-Antwort zum Parsen.", tool_call_id="parse_0", name="sqlite_query")]}
sql = _parse_sql_from_content(last_msg.content or "")
if not sql or not sql_tool:
return {"messages": [ToolMessage(content="Konnte keine SQL-Abfrage aus der Antwort extrahieren.", tool_call_id="parse_0", name="sqlite_query")]}
sql = _sanitize_sql_typos(sql)
try:
result = await sql_tool.ainvoke({"query": sql})
except Exception as e:
logger.error(f"SQL execution failed: {e}")
result = f"Fehler bei der Ausführung: {e}"
return {"messages": [ToolMessage(content=str(result), tool_call_id="parse_0", name="sqlite_query")]}
FORMULATE_TASK = (
"\n\n--- AKTUELLE AUFGABE ---\n"
"Du erhältst eine Benutzerfrage und die exakten Datenbankergebnisse. "
"KRITISCH: Nutze NUR die gelieferten Daten. Erfinde NIEMALS Daten (keine LED-A01, LED Rot, etc.). "
"Wenn die Ergebnisse NUR eine Zahl enthalten (z.B. '1. COUNT(*): 806'): Reportiere NUR diese Zahl, KEINE erfundene Tabelle. "
"Eine Tabelle darf NUR erstellt werden, wenn echte Zeilen '1. Spalte: Wert, ...' in den Daten stehen. "
"Beachte die obige ANTWORTSTRUKTUR."
)
async def agent_formulate_node(state: ChatState) -> dict:
"""Formulate final answer. Uses intro + response_structure sections (not schema)."""
human_content = ""
tool_content = ""
for m in state.messages:
if isinstance(m, HumanMessage):
human_content = m.content or ""
if isinstance(m, ToolMessage) and getattr(m, "name", "") == "sqlite_query":
tool_content = m.content or ""
if not tool_content or not tool_content.strip():
logger.warning("agent_formulate: no tool_content (sqlite_query) in state.messages")
return {"messages": [AIMessage(content="Die Datenbankabfrage konnte keine Ergebnisse liefern. Bitte versuchen Sie es mit einer anderen Formulierung.")]}
# When SQL failed, return error directly - don't let model hallucinate success
if "Query failed" in tool_content or tool_content.strip().startswith("Error"):
err_summary = "Die Datenbankabfrage ist fehlgeschlagen."
if "no such column" in tool_content:
err_summary += " Ein Spaltenname scheint nicht zu passen. Bitte die Anfrage anders formulieren."
return {"messages": [AIMessage(content=err_summary)]}
# Convert to markdown table so model copies exact values instead of reformatting/hallucinating
formatted_data = _tool_output_to_markdown_table(tool_content)
logger.debug(f"agent_formulate: tool_content length={len(tool_content)}, formatted={len(formatted_data)}")
ctx_len = _get_context_length()
max_system_chars = max(3000, int(ctx_len * 0.5) * bytes_per_token) - len(FORMULATE_TASK)
# Use intro + response_structure (mandatory format)
resp_struct = _prompt_sections["response_structure"] or _prompt_sections["intro"]
intro_formulate = _prompt_sections["intro"]
combined = f"{intro_formulate}\n\n{resp_struct}" if intro_formulate != resp_struct else resp_struct
# Fit within context; prefer keeping response_structure intact
if len(combined) + len(FORMULATE_TASK) > max_system_chars:
combined = _truncate_system_prompt(combined, max_system_chars - len(FORMULATE_TASK), "")
system_content = combined + FORMULATE_TASK
prompt = (
f"Benutzerfrage: {human_content}\n\n"
"--- VORGEGEBENE DATEN (diese Tabelle/Zahlen UNVERÄNDERT in die Antwort übernehmen): ---\n"
f"{formatted_data}\n\n"
"Die obige Tabelle bzw. Zahlen sind die EINZIGEN erlaubten Daten. Kopiere sie 1:1. "
"Berechne keine eigenen Summen/Anzahlen - nutze die gelieferten Werte. Formuliere die Antwort:"
)
window = [SystemMessage(content=system_content), HumanMessage(content=prompt)]
try:
response = await llm_plain.ainvoke(window)
except ValueError as exc:
if "No suitable model found" in str(exc):
response = AIMessage(content="Es gab einen Fehler bei der Formulierung. Bitte versuchen Sie es erneut.")
else:
raise
# Sanitize: strip leaked chat template tokens (<|im_start|> etc.) and trailing junk
if response.content:
response = AIMessage(content=_sanitize_llm_response(response.content))
return {"messages": [response]}
async def agent_tavily_node(state: ChatState) -> dict:
"""Agent with Tavily only. Uses intro + response_structure (no schema)."""
resp_struct = _prompt_sections["response_structure"] or ""
intro_tavily = _prompt_sections["intro"]
combined = f"{intro_tavily}\n\n{resp_struct}" if resp_struct else intro_tavily
system_content = _truncate_system_prompt(combined, 6000, "")
return await _agent_common(state, system_content, llm_tavily, "agent_tavily")
async def agent_answer_node(state: ChatState) -> dict:
"""Agent with no tools (plan NONE). Uses fast model for lower latency."""
resp_struct = _prompt_sections["response_structure"] or ""
intro_answer = _prompt_sections["intro"]
combined = f"{intro_answer}\n\n{resp_struct}" if resp_struct else intro_answer
system_content = _truncate_system_prompt(combined, 6000, "")
return await _agent_common(state, system_content, llm_planner, "agent_answer")
def should_continue_tavily(state: ChatState) -> str:
last = state.messages[-1]
return "tools" if getattr(last, "tool_calls", None) else END
def route_back(state: ChatState) -> str:
"""Route from tools back to agent_tavily (SQL path uses parse_execute_sql, no tools loop)."""
# Tools node is only reached from agent_tavily when it returns tool_calls
return "agent_tavily" if tavily_tool else "agent_answer"
async def tools_with_retry(state: ChatState) -> dict:
"""Tools node with parallel execution and retry logic.
Args:
state: The current chat state.
Returns:
The updated chat state after tool execution.
"""
import asyncio
# Get tool calls from the last message
last_message = state.messages[-1]
tool_calls = getattr(last_message, "tool_calls", [])
if not tool_calls:
return {"messages": []}
# Create a lookup for tools by name
tools_by_name = {t.name: t for t in tools}
async def execute_single_tool(tool_call):
"""Execute a single tool call."""
tool_name = tool_call.get("name") or tool_call.get("function", {}).get("name")
tool_id = tool_call.get("id", f"call_{tool_name}")
args = tool_call.get("args") or tool_call.get("function", {}).get("arguments", {})
if isinstance(args, str):
import json
try:
args = json.loads(args)
except:
args = {"input": args}
tool = tools_by_name.get(tool_name)
if not tool:
return ToolMessage(
content=f"Error: Tool '{tool_name}' not found",
tool_call_id=tool_id,
name=tool_name
)
try:
# Execute tool asynchronously
if asyncio.iscoroutinefunction(tool.coroutine):
result = await tool.coroutine(**args)
elif hasattr(tool, 'ainvoke'):
result = await tool.ainvoke(args)
else:
result = tool.invoke(args)
return ToolMessage(
content=str(result),
tool_call_id=tool_id,
name=tool_name
)
except Exception as e:
logger.error(f"Tool {tool_name} failed: {e}")
return ToolMessage(
content=f"Error executing {tool_name}: {str(e)}",
tool_call_id=tool_id,
name=tool_name
)
# Execute ALL tool calls in parallel
logger.info(f"Executing {len(tool_calls)} tool calls in parallel")
tool_messages = await asyncio.gather(
*[execute_single_tool(tc) for tc in tool_calls],
return_exceptions=True
)
# Convert exceptions to error messages
result_messages = []
for i, msg in enumerate(tool_messages):
if isinstance(msg, Exception):
tool_call = tool_calls[i]
tool_name = tool_call.get("name", "unknown")
tool_id = tool_call.get("id", f"call_{i}")
result_messages.append(ToolMessage(
content=f"Error: {str(msg)}",
tool_call_id=tool_id,
name=tool_name
))
else:
result_messages.append(msg)
result = {"messages": result_messages}
# Check if we got no results and should retry
no_results_keywords = [
"returned 0 rows",
"no data",
"keine artikel gefunden",
"keine ergebnisse"
]
# Check tool results for no data
for msg in result.get("messages", []):
content = getattr(msg, "content", "")
if isinstance(content, str):
content_lower = content.lower()
if any(keyword in content_lower for keyword in no_results_keywords):
# Check if we haven't retried yet (avoid infinite loops)
retry_count = sum(1 for m in state.messages if "retry" in str(getattr(m, "content", "")).lower())
if retry_count < 2: # Allow max 2 retries
logger.info("No results found in tool output, adding retry instruction")
retry_message = HumanMessage(
content="WICHTIG: Die vorherige Suche hat keine Ergebnisse gefunden. "
"Bitte versuche eine alternative Suchstrategie:\n"
"1. Wenn die Frage im Format 'X von Y' war (z.B. 'Lampen von Eaton'), "
"verwende IMMER eine Kombination aus Lieferanten-Filter (WHERE a.\"Lieferant\" LIKE '%Y%') "
"UND Produkttyp-Filter (WHERE a.\"Artikelbezeichnung\" LIKE '%X%' OR ...)\n"
"2. Verwende mehrere Synonyme für den Produkttyp (z.B. bei 'Lampen': Lampe, LED, Beleuchtung, Licht, Leuchte, Strahler)\n"
"3. Führe zuerst eine COUNT-Abfrage durch, dann die Detail-Abfrage mit Lagerbeständen\n"
"4. Verwende LIKE '%Lieferant%' für den Lieferanten-Filter, um auch Varianten zu finden"
)
result["messages"].append(retry_message)
break
return result
# Compose the workflow: planner -> route -> agent_* -> tools (Tavily only) or END
workflow = StateGraph(ChatState)
workflow.add_node("planner", planner_node)
workflow.add_node("agent_sql_plan", agent_sql_plan_node)
workflow.add_node("parse_execute_sql", parse_execute_sql_node)
workflow.add_node("agent_formulate", agent_formulate_node)
workflow.add_node("agent_tavily", agent_tavily_node)
workflow.add_node("agent_answer", agent_answer_node)
workflow.add_node("tools", tools_with_retry)
workflow.add_edge(START, "planner")
workflow.add_conditional_edges("planner", route_by_plan)
# SQL path: agent_sql_plan -> parse_execute_sql -> agent_formulate -> END (no tools, /api/analyze compatible)
workflow.add_edge("agent_sql_plan", "parse_execute_sql")
workflow.add_edge("parse_execute_sql", "agent_formulate")
workflow.add_edge("agent_formulate", END)
workflow.add_conditional_edges("agent_tavily", should_continue_tavily)
workflow.add_edge("agent_answer", END)
workflow.add_conditional_edges("tools", route_back)
return workflow.compile(checkpointer=memory)
async def chat(self, message: str, chat_id: str = "default") -> List[BaseMessage]:
"""Processes a chat message by calling the LLM and tools and returns the chat history.
Args:
message: The user message to process.
chat_id: The chat thread ID.
Returns:
The list of messages in the chat history.
"""
config = {"configurable": {"thread_id": chat_id}}
tools_by_name = {t.name: t for t in self._tools}
graph_ctx = ChatbotGraphContext(
model=self.model,
planner_model=self.planner_model or self.model,
tools=self._tools,
tools_by_name=tools_by_name,
sql_tool=tools_by_name.get("sqlite_query"),
tavily_tool=tools_by_name.get("tavily_search"),
streaming_tool=tools_by_name.get("send_streaming_message"),
prompt_sections=_split_system_prompt(self.system_prompt),
system_prompt=self.system_prompt,
)
ctx_token = _set_graph_context(graph_ctx)
cp_token = set_checkpointer(self.memory)
try:
result = await self.app.ainvoke(
{"messages": [HumanMessage(content=message)]}, config=config
)
return result["messages"]
finally:
_reset_graph_context(ctx_token)
reset_checkpointer(cp_token)
async def stream_events(
self, *, message: str, chat_id: str = "default"
) -> AsyncIterator[dict]:
"""Stream UI-focused events using astream_events v2.
Args:
message: The user message to process.
chat_id: Logical thread identifier; forwarded in the runnable config so
memory and tools are scoped per thread.
Yields:
dict: One of:
- ``{"type": "status", "label": str}`` for short progress updates.
- ``{"type": "final", "response": {"thread": str, "chat_history": list[dict]}}``
where ``chat_history`` only includes ``user``/``assistant`` roles.
- ``{"type": "error", "message": str}`` if an exception occurs.
"""
# Thread-aware config for LangGraph/LangChain
config = {"configurable": {"thread_id": chat_id}}
def _is_root(ev: dict) -> bool:
"""Return True if the event is from the root run (v2: empty parent_ids)."""
return not ev.get("parent_ids")
# Build tool lookup for cached graph context
tools_by_name = {t.name: t for t in self._tools}
graph_ctx = ChatbotGraphContext(
model=self.model,
planner_model=self.planner_model or self.model,
tools=self._tools,
tools_by_name=tools_by_name,
sql_tool=tools_by_name.get("sqlite_query"),
tavily_tool=tools_by_name.get("tavily_search"),
streaming_tool=tools_by_name.get("send_streaming_message"),
prompt_sections=_split_system_prompt(self.system_prompt),
system_prompt=self.system_prompt,
)
ctx_token = _set_graph_context(graph_ctx)
cp_token = set_checkpointer(self.memory)
try:
async for event in self.app.astream_events(
{"messages": [HumanMessage(content=message)]},
config=config,
version="v2",
):
etype = event.get("event")
ename = event.get("name") or ""
edata = event.get("data") or {}
# Stream LLM tokens for ChatGPT-like incremental display
if etype in ("on_llm_stream", "on_chat_model_stream"):
ch = edata.get("chunk")
if ch is None:
continue
# Chunk can be string, AIMessageChunk (has .content), or dict
content = ""
if isinstance(ch, str):
content = ch
elif hasattr(ch, "content"):
content = ch.content or ""
if isinstance(content, list):
content = "".join(str(x) for x in content)
elif isinstance(ch, dict):
content = ch.get("content", "") or ""
if isinstance(content, str) and content:
yield {"type": "chunk", "content": content}
continue
# Stream human-readable progress via the special send_streaming_message tool
# Match the legacy implementation exactly (line 267-272 in legacy/chatbot.py)
if etype == "on_tool_start":
# Log all tool starts to debug
logger.debug(f"Tool start event: name='{ename}', event='{etype}'")
if ename == "send_streaming_message":
tool_in = edata.get("input") or {}
msg = tool_in.get("message")
logger.info(f"send_streaming_message tool called with input: {tool_in}")
if isinstance(msg, str) and msg.strip():
logger.info(f"Status-Update gesendet: {msg.strip()}")
yield {"type": "status", "label": msg.strip()}
continue
# Emit the final payload when the root run finishes
if etype == "on_chain_end" and _is_root(event):
output_obj = edata.get("output")
# Extract message list from the graph's final output
final_msgs = ChatStreamingHelper.extract_messages_from_output(
output_obj=output_obj
)
# Normalize for the frontend (only user/assistant with text content)
# Exclude planner-only and SQL-path intermediate messages from chat display
_planner_only = frozenset(("sql", "tavily", "both", "none"))
chat_history_payload: List[dict] = []
for m in final_msgs:
if isinstance(m, BaseMessage):
d = ChatStreamingHelper.message_to_dict(msg=m)
elif isinstance(m, dict):
d = ChatStreamingHelper.dict_message_to_dict(obj=m)
else:
continue
if d.get("role") not in ("user", "assistant") or not d.get("content"):
continue
content = (d.get("content") or "").strip()
if d.get("role") == "assistant" and content.lower() in _planner_only:
continue # Skip planner routing message
# Skip agent_sql_plan output: ```sql block OR raw SQL (SELECT...FROM/JOIN)
if d.get("role") == "assistant":
cu = content.upper()
if content.startswith("```") or (
cu.startswith("SELECT") and ("FROM" in cu or "JOIN" in cu)
):
continue
# Strip leaked chat template tokens (<|im_start|> etc.) from assistant messages
content = _sanitize_llm_response(content)
if not content:
continue
d = {**d, "content": content}
chat_history_payload.append(d)
yield {
"type": "final",
"response": {
"thread": chat_id,
"chat_history": chat_history_payload,
},
}
return
except Exception as exc:
# Emit a single error envelope and end the stream
logger.error(f"Exception in stream_events: {exc}", exc_info=True)
yield {"type": "error", "message": f"Fehler beim Verarbeiten: {exc}"}
finally:
_reset_graph_context(ctx_token)
reset_checkpointer(cp_token)