gateway/modules/features/chatbot/config.py

382 lines
13 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Configuration system for chatbot instances.
Supports loading from:
1. Database (FeatureInstance.config JSONB field) - primary method
2. JSON files from configs/ directory - fallback/legacy method
"""
import logging
import json
import warnings
from pathlib import Path
from dataclasses import dataclass, field
from typing import Optional, Dict, Any, TYPE_CHECKING
if TYPE_CHECKING:
from modules.datamodels.datamodelFeatures import FeatureInstance
logger = logging.getLogger(__name__)
# Cache for loaded configs (by instance ID for database configs)
_config_cache: Dict[str, 'ChatbotConfig'] = {}
# Default system prompt when none is configured
DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant. You have access to SQL query tools and web search tools. Use them to help answer user questions."
@dataclass
class DatabaseConfig:
"""Database configuration for a chatbot instance."""
schema: Dict[str, Any] = field(default_factory=dict)
connector: str = "preprocessor"
def is_sql_enabled(self) -> bool:
"""Check if SQL queries are possible (has connector)."""
return bool(self.connector)
@dataclass
class ToolConfig:
"""Tool configuration for a chatbot instance."""
sql: Dict[str, Any] = field(default_factory=lambda: {"enabled": True})
tavily: Optional[Dict[str, Any]] = None
streaming: Dict[str, Any] = field(default_factory=lambda: {"enabled": True})
def is_sql_enabled(self) -> bool:
"""Check if SQL tool is enabled."""
if self.sql is None:
return True # Default enabled
return self.sql.get("enabled", True)
def is_tavily_enabled(self) -> bool:
"""Check if Tavily web search tool is enabled."""
if self.tavily is None:
return False # Default disabled
return self.tavily.get("enabled", False)
def is_streaming_enabled(self) -> bool:
"""Check if streaming status tool is enabled."""
if self.streaming is None:
return True # Default enabled
return self.streaming.get("enabled", True)
@dataclass
class ModelConfig:
"""Model configuration for a chatbot instance."""
operationType: str = "DATA_ANALYSE"
processingMode: str = "BASIC" # Changed from DETAILED for faster responses
@dataclass
class ChatbotConfig:
"""Configuration for a chatbot instance."""
id: str
name: str
systemPrompt: str
database: DatabaseConfig
tools: ToolConfig
model: ModelConfig
@classmethod
def from_dict(cls, data: Dict[str, Any], config_id: str = "default") -> 'ChatbotConfig':
"""
Create ChatbotConfig from dictionary.
Supports two config formats:
1. New format (file-based): systemPrompt, database, tools, model
2. Legacy frontend format: connector, prompts, behavior
Args:
data: Configuration dictionary (from JSON file or FeatureInstance.config)
config_id: Identifier for this config (instance ID or file name)
Returns:
ChatbotConfig instance with validated values
"""
# Detect config format and normalize
if "prompts" in data or "connector" in data or "behavior" in data:
# Legacy frontend format - convert to new format
data = cls._convert_legacy_config(data)
# Get system prompt - required field, use default if not provided
system_prompt = data.get("systemPrompt")
if not system_prompt:
logger.warning(f"Config {config_id}: No systemPrompt provided, using default")
system_prompt = DEFAULT_SYSTEM_PROMPT
# Parse database config
db_data = data.get("database", {})
database_config = DatabaseConfig(
schema=db_data.get("schema", {}),
connector=db_data.get("connector", "preprocessor")
)
# Parse tools config with defaults
tools_data = data.get("tools", {})
tools_config = ToolConfig(
sql=tools_data.get("sql", {"enabled": True}),
tavily=tools_data.get("tavily", {"enabled": False}),
streaming=tools_data.get("streaming", {"enabled": True})
)
# Parse model config with defaults
model_data = data.get("model", {})
model_config = ModelConfig(
operationType=model_data.get("operationType", "DATA_ANALYSE"),
processingMode=model_data.get("processingMode", "DETAILED")
)
return cls(
id=data.get("id", config_id),
name=data.get("name", "Chatbot"),
systemPrompt=system_prompt,
database=database_config,
tools=tools_config,
model=model_config
)
@staticmethod
def _convert_legacy_config(data: Dict[str, Any]) -> Dict[str, Any]:
"""
Convert legacy frontend config format to new format.
Legacy format (from AdminFeatureAccessPage.tsx):
{
"connector": {"types": [...], "type": "preprocessor"},
"prompts": {"customAnalysisPrompt": "...", "customFinalAnswerPrompt": "..."},
"behavior": {"enableWebResearch": true, ...}
}
New format:
{
"systemPrompt": "...",
"database": {"connector": "preprocessor"},
"tools": {"sql": {"enabled": true}, "tavily": {"enabled": true}}
}
"""
converted = {}
# Extract system prompt from prompts section
prompts = data.get("prompts", {})
system_prompt = prompts.get("customAnalysisPrompt") or prompts.get("customFinalAnswerPrompt")
if system_prompt:
converted["systemPrompt"] = system_prompt
# Extract connector from connector section
connector_data = data.get("connector", {})
connector_type = connector_data.get("type") or "preprocessor"
if isinstance(connector_data.get("types"), list) and connector_data["types"]:
connector_type = connector_data["types"][0] # Use first connector as primary
converted["database"] = {
"connector": connector_type,
"schema": {}
}
# Extract tool settings from behavior section
behavior = data.get("behavior", {})
enable_web_research = behavior.get("enableWebResearch", False)
converted["tools"] = {
"sql": {"enabled": True}, # SQL always enabled if connector is set
"tavily": {"enabled": enable_web_research},
"streaming": {"enabled": True} # Streaming always enabled
}
# Model config defaults - use BASIC for faster responses
converted["model"] = {
"operationType": "DATA_ANALYSE",
"processingMode": "BASIC"
}
# Copy other fields
if "id" in data:
converted["id"] = data["id"]
if "name" in data:
converted["name"] = data["name"]
logger.debug(f"Converted legacy config format to new format")
return converted
def to_dict(self) -> Dict[str, Any]:
"""Convert config to dictionary for serialization."""
return {
"id": self.id,
"name": self.name,
"systemPrompt": self.systemPrompt,
"database": {
"schema": self.database.schema,
"connector": self.database.connector
},
"tools": {
"sql": self.tools.sql,
"tavily": self.tools.tavily,
"streaming": self.tools.streaming
},
"model": {
"operationType": self.model.operationType,
"processingMode": self.model.processingMode
}
}
def load_chatbot_config_from_instance(instance: 'FeatureInstance') -> ChatbotConfig:
"""
Load chatbot configuration from a FeatureInstance's config field.
This is the primary method for loading chatbot configuration.
The config is stored in the FeatureInstance.config JSONB field.
Args:
instance: FeatureInstance object with config field
Returns:
ChatbotConfig instance
Raises:
ValueError: If instance has no config and no fallback available
"""
instance_id = instance.id
# Check cache first (by instance ID)
cache_key = f"instance_{instance_id}"
if cache_key in _config_cache:
logger.debug(f"Returning cached config for instance {instance_id}")
return _config_cache[cache_key]
# Get config from instance
config_data = instance.config
if not config_data:
# No config in instance - try to load default from file as fallback
logger.warning(f"Instance {instance_id} has no config, loading default from file")
try:
return load_chatbot_config_from_file("default")
except FileNotFoundError:
# Create minimal default config
logger.warning(f"No default config file found, using minimal defaults")
config_data = {}
# Create config from dictionary
config = ChatbotConfig.from_dict(config_data, config_id=instance_id)
# Cache the config
_config_cache[cache_key] = config
logger.info(f"Loaded chatbot config from instance {instance_id}: {config.name}")
return config
def load_chatbot_config_from_dict(config_data: Dict[str, Any], config_id: str = "custom") -> ChatbotConfig:
"""
Load chatbot configuration from a dictionary.
Useful for testing or when config data is already available.
Args:
config_data: Configuration dictionary
config_id: Identifier for this config
Returns:
ChatbotConfig instance
"""
return ChatbotConfig.from_dict(config_data, config_id=config_id)
def load_chatbot_config_from_file(config_id: str) -> ChatbotConfig:
"""
Load chatbot configuration from JSON file.
This is the legacy/fallback method for loading configuration.
Prefer load_chatbot_config_from_instance() for production use.
Args:
config_id: Configuration ID (e.g., "althaus", "default")
Returns:
ChatbotConfig instance
Raises:
FileNotFoundError: If config file not found
ValueError: If config file is invalid
"""
# Check cache first (by file ID)
cache_key = f"file_{config_id}"
if cache_key in _config_cache:
logger.debug(f"Returning cached config for file {config_id}")
return _config_cache[cache_key]
# Get path to configs directory
current_dir = Path(__file__).parent
configs_dir = current_dir / "configs"
config_file = configs_dir / f"{config_id}.json"
if not config_file.exists():
# Try default config if requested config not found
if config_id != "default":
logger.warning(f"Config file {config_id} not found, trying default")
return load_chatbot_config_from_file("default")
raise FileNotFoundError(f"Chatbot config file not found: {config_file}")
try:
with open(config_file, 'r', encoding='utf-8') as f:
data = json.load(f)
config = ChatbotConfig.from_dict(data, config_id=config_id)
# Cache the config
_config_cache[cache_key] = config
logger.info(f"Loaded chatbot config from file: {config_id} ({config.name})")
return config
except json.JSONDecodeError as e:
logger.error(f"Error parsing chatbot config JSON {config_file}: {e}")
raise ValueError(f"Invalid JSON in config file {config_file}: {e}")
except Exception as e:
logger.error(f"Error loading chatbot config {config_file}: {e}")
raise
def load_chatbot_config(config_id: str) -> ChatbotConfig:
"""
Load chatbot configuration from JSON file.
DEPRECATED: Use load_chatbot_config_from_instance() for database configs
or load_chatbot_config_from_file() for file-based configs.
Args:
config_id: Configuration ID (e.g., "althaus", "default")
Returns:
ChatbotConfig instance
"""
warnings.warn(
"load_chatbot_config() is deprecated. Use load_chatbot_config_from_instance() "
"for database configs or load_chatbot_config_from_file() for file-based configs.",
DeprecationWarning,
stacklevel=2
)
return load_chatbot_config_from_file(config_id)
def clear_config_cache(instance_id: Optional[str] = None):
"""
Clear the configuration cache.
Args:
instance_id: Optional instance ID to clear specific cache entry.
If None, clears entire cache.
"""
global _config_cache
if instance_id:
cache_key = f"instance_{instance_id}"
if cache_key in _config_cache:
del _config_cache[cache_key]
logger.debug(f"Cleared chatbot config cache for instance {instance_id}")
else:
_config_cache.clear()
logger.debug("Cleared all chatbot config cache")