gateway/modules/features/chatbot/config.py

294 lines
10 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Configuration system for chatbot instances.
Loads configuration from the database (FeatureInstance.config JSONB field).
"""
import logging
from dataclasses import dataclass, field
from typing import Optional, Dict, Any, TYPE_CHECKING
if TYPE_CHECKING:
from modules.datamodels.datamodelFeatures import FeatureInstance
logger = logging.getLogger(__name__)
# Cache for loaded configs (by instance ID for database configs)
_config_cache: Dict[str, 'ChatbotConfig'] = {}
# Default system prompt when none is configured
DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant. You have access to SQL query tools and web search tools. Use them to help answer user questions."
@dataclass
class DatabaseConfig:
"""Database configuration for a chatbot instance."""
schema: Dict[str, Any] = field(default_factory=dict)
connector: str = "preprocessor"
def is_sql_enabled(self) -> bool:
"""Check if SQL queries are possible (has connector)."""
return bool(self.connector)
@dataclass
class ToolConfig:
"""Tool configuration for a chatbot instance."""
sql: Dict[str, Any] = field(default_factory=lambda: {"enabled": True})
tavily: Optional[Dict[str, Any]] = None
streaming: Dict[str, Any] = field(default_factory=lambda: {"enabled": True})
def is_sql_enabled(self) -> bool:
"""Check if SQL tool is enabled."""
if self.sql is None:
return True # Default enabled
return self.sql.get("enabled", True)
def is_tavily_enabled(self) -> bool:
"""Check if Tavily web search tool is enabled."""
if self.tavily is None:
return False # Default disabled
return self.tavily.get("enabled", False)
def is_streaming_enabled(self) -> bool:
"""Check if streaming status tool is enabled."""
if self.streaming is None:
return True # Default enabled
return self.streaming.get("enabled", True)
@dataclass
class ModelConfig:
"""Model configuration for a chatbot instance."""
operationType: str = "DATA_ANALYSE"
processingMode: str = "BASIC" # Changed from DETAILED for faster responses
@dataclass
class ChatbotConfig:
"""Configuration for a chatbot instance."""
id: str
name: str
systemPrompt: str
database: DatabaseConfig
tools: ToolConfig
model: ModelConfig
@classmethod
def from_dict(cls, data: Dict[str, Any], config_id: str = "default") -> 'ChatbotConfig':
"""
Create ChatbotConfig from dictionary.
Supports two config formats:
1. New format (file-based): systemPrompt, database, tools, model
2. Legacy frontend format: connector, prompts, behavior
Args:
data: Configuration dictionary (from JSON file or FeatureInstance.config)
config_id: Identifier for this config (instance ID or file name)
Returns:
ChatbotConfig instance with validated values
"""
# Detect config format and normalize
if "prompts" in data or "connector" in data or "behavior" in data:
# Legacy frontend format - convert to new format
data = cls._convert_legacy_config(data)
# Get system prompt - required field, use default if not provided
system_prompt = data.get("systemPrompt")
if not system_prompt:
logger.warning(f"Config {config_id}: No systemPrompt provided, using default")
system_prompt = DEFAULT_SYSTEM_PROMPT
# Parse database config
db_data = data.get("database", {})
database_config = DatabaseConfig(
schema=db_data.get("schema", {}),
connector=db_data.get("connector", "preprocessor")
)
# Parse tools config with defaults
tools_data = data.get("tools", {})
tools_config = ToolConfig(
sql=tools_data.get("sql", {"enabled": True}),
tavily=tools_data.get("tavily", {"enabled": False}),
streaming=tools_data.get("streaming", {"enabled": True})
)
# Parse model config with defaults
model_data = data.get("model", {})
model_config = ModelConfig(
operationType=model_data.get("operationType", "DATA_ANALYSE"),
processingMode=model_data.get("processingMode", "DETAILED")
)
return cls(
id=data.get("id", config_id),
name=data.get("name", "Chatbot"),
systemPrompt=system_prompt,
database=database_config,
tools=tools_config,
model=model_config
)
@staticmethod
def _convert_legacy_config(data: Dict[str, Any]) -> Dict[str, Any]:
"""
Convert legacy frontend config format to new format.
Legacy format (from AdminFeatureAccessPage.tsx):
{
"connector": {"types": [...], "type": "preprocessor"},
"prompts": {"customAnalysisPrompt": "...", "customFinalAnswerPrompt": "..."},
"behavior": {"enableWebResearch": true, ...}
}
New format:
{
"systemPrompt": "...",
"database": {"connector": "preprocessor"},
"tools": {"sql": {"enabled": true}, "tavily": {"enabled": true}}
}
"""
converted = {}
# Extract system prompt from prompts section
prompts = data.get("prompts", {})
system_prompt = prompts.get("customAnalysisPrompt") or prompts.get("customFinalAnswerPrompt")
if system_prompt:
converted["systemPrompt"] = system_prompt
# Extract connector from connector section
connector_data = data.get("connector", {})
connector_type = connector_data.get("type") or "preprocessor"
if isinstance(connector_data.get("types"), list) and connector_data["types"]:
connector_type = connector_data["types"][0] # Use first connector as primary
converted["database"] = {
"connector": connector_type,
"schema": {}
}
# Extract tool settings from behavior section
behavior = data.get("behavior", {})
enable_web_research = behavior.get("enableWebResearch", False)
converted["tools"] = {
"sql": {"enabled": True}, # SQL always enabled if connector is set
"tavily": {"enabled": enable_web_research},
"streaming": {"enabled": True} # Streaming always enabled
}
# Model config defaults - use BASIC for faster responses
converted["model"] = {
"operationType": "DATA_ANALYSE",
"processingMode": "BASIC"
}
# Copy other fields
if "id" in data:
converted["id"] = data["id"]
if "name" in data:
converted["name"] = data["name"]
logger.debug(f"Converted legacy config format to new format")
return converted
def to_dict(self) -> Dict[str, Any]:
"""Convert config to dictionary for serialization."""
return {
"id": self.id,
"name": self.name,
"systemPrompt": self.systemPrompt,
"database": {
"schema": self.database.schema,
"connector": self.database.connector
},
"tools": {
"sql": self.tools.sql,
"tavily": self.tools.tavily,
"streaming": self.tools.streaming
},
"model": {
"operationType": self.model.operationType,
"processingMode": self.model.processingMode
}
}
def load_chatbot_config_from_instance(instance: 'FeatureInstance') -> ChatbotConfig:
"""
Load chatbot configuration from a FeatureInstance's config field.
This is the primary method for loading chatbot configuration.
The config is stored in the FeatureInstance.config JSONB field.
Args:
instance: FeatureInstance object with config field
Returns:
ChatbotConfig instance
Raises:
ValueError: If instance has no config and no fallback available
"""
instance_id = instance.id
# Check cache first (by instance ID)
cache_key = f"instance_{instance_id}"
if cache_key in _config_cache:
logger.debug(f"Returning cached config for instance {instance_id}")
return _config_cache[cache_key]
# Get config from instance
config_data = instance.config
if not config_data:
logger.warning(f"Instance {instance_id} has no config, using minimal defaults")
config_data = {}
# Create config from dictionary
config = ChatbotConfig.from_dict(config_data, config_id=instance_id)
# Cache the config
_config_cache[cache_key] = config
logger.info(f"Loaded chatbot config from instance {instance_id}: {config.name}")
return config
def load_chatbot_config_from_dict(config_data: Dict[str, Any], config_id: str = "custom") -> ChatbotConfig:
"""
Load chatbot configuration from a dictionary.
Useful for testing or when config data is already available.
Args:
config_data: Configuration dictionary
config_id: Identifier for this config
Returns:
ChatbotConfig instance
"""
return ChatbotConfig.from_dict(config_data, config_id=config_id)
def clear_config_cache(instance_id: Optional[str] = None):
"""
Clear the configuration cache.
Args:
instance_id: Optional instance ID to clear specific cache entry.
If None, clears entire cache.
"""
global _config_cache
if instance_id:
cache_key = f"instance_{instance_id}"
if cache_key in _config_cache:
del _config_cache[cache_key]
logger.debug(f"Cleared chatbot config cache for instance {instance_id}")
else:
_config_cache.clear()
logger.debug("Cleared all chatbot config cache")