# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Configuration system for chatbot instances. Supports loading from: 1. Database (FeatureInstance.config JSONB field) - primary method 2. JSON files from configs/ directory - fallback/legacy method """ import logging import json import warnings from pathlib import Path from dataclasses import dataclass, field from typing import Optional, Dict, Any, TYPE_CHECKING if TYPE_CHECKING: from modules.datamodels.datamodelFeatures import FeatureInstance logger = logging.getLogger(__name__) # Cache for loaded configs (by instance ID for database configs) _config_cache: Dict[str, 'ChatbotConfig'] = {} # Default system prompt when none is configured DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant. You have access to SQL query tools and web search tools. Use them to help answer user questions." @dataclass class DatabaseConfig: """Database configuration for a chatbot instance.""" schema: Dict[str, Any] = field(default_factory=dict) connector: str = "preprocessor" def is_sql_enabled(self) -> bool: """Check if SQL queries are possible (has connector).""" return bool(self.connector) @dataclass class ToolConfig: """Tool configuration for a chatbot instance.""" sql: Dict[str, Any] = field(default_factory=lambda: {"enabled": True}) tavily: Optional[Dict[str, Any]] = None streaming: Dict[str, Any] = field(default_factory=lambda: {"enabled": True}) def is_sql_enabled(self) -> bool: """Check if SQL tool is enabled.""" if self.sql is None: return True # Default enabled return self.sql.get("enabled", True) def is_tavily_enabled(self) -> bool: """Check if Tavily web search tool is enabled.""" if self.tavily is None: return False # Default disabled return self.tavily.get("enabled", False) def is_streaming_enabled(self) -> bool: """Check if streaming status tool is enabled.""" if self.streaming is None: return True # Default enabled return self.streaming.get("enabled", True) @dataclass class ModelConfig: """Model configuration for a chatbot instance.""" operationType: str = "DATA_ANALYSE" processingMode: str = "BASIC" # Changed from DETAILED for faster responses @dataclass class ChatbotConfig: """Configuration for a chatbot instance.""" id: str name: str systemPrompt: str database: DatabaseConfig tools: ToolConfig model: ModelConfig @classmethod def from_dict(cls, data: Dict[str, Any], config_id: str = "default") -> 'ChatbotConfig': """ Create ChatbotConfig from dictionary. Supports two config formats: 1. New format (file-based): systemPrompt, database, tools, model 2. Legacy frontend format: connector, prompts, behavior Args: data: Configuration dictionary (from JSON file or FeatureInstance.config) config_id: Identifier for this config (instance ID or file name) Returns: ChatbotConfig instance with validated values """ # Detect config format and normalize if "prompts" in data or "connector" in data or "behavior" in data: # Legacy frontend format - convert to new format data = cls._convert_legacy_config(data) # Get system prompt - required field, use default if not provided system_prompt = data.get("systemPrompt") if not system_prompt: logger.warning(f"Config {config_id}: No systemPrompt provided, using default") system_prompt = DEFAULT_SYSTEM_PROMPT # Parse database config db_data = data.get("database", {}) database_config = DatabaseConfig( schema=db_data.get("schema", {}), connector=db_data.get("connector", "preprocessor") ) # Parse tools config with defaults tools_data = data.get("tools", {}) tools_config = ToolConfig( sql=tools_data.get("sql", {"enabled": True}), tavily=tools_data.get("tavily", {"enabled": False}), streaming=tools_data.get("streaming", {"enabled": True}) ) # Parse model config with defaults model_data = data.get("model", {}) model_config = ModelConfig( operationType=model_data.get("operationType", "DATA_ANALYSE"), processingMode=model_data.get("processingMode", "DETAILED") ) return cls( id=data.get("id", config_id), name=data.get("name", "Chatbot"), systemPrompt=system_prompt, database=database_config, tools=tools_config, model=model_config ) @staticmethod def _convert_legacy_config(data: Dict[str, Any]) -> Dict[str, Any]: """ Convert legacy frontend config format to new format. Legacy format (from AdminFeatureAccessPage.tsx): { "connector": {"types": [...], "type": "preprocessor"}, "prompts": {"customAnalysisPrompt": "...", "customFinalAnswerPrompt": "..."}, "behavior": {"enableWebResearch": true, ...} } New format: { "systemPrompt": "...", "database": {"connector": "preprocessor"}, "tools": {"sql": {"enabled": true}, "tavily": {"enabled": true}} } """ converted = {} # Extract system prompt from prompts section prompts = data.get("prompts", {}) system_prompt = prompts.get("customAnalysisPrompt") or prompts.get("customFinalAnswerPrompt") if system_prompt: converted["systemPrompt"] = system_prompt # Extract connector from connector section connector_data = data.get("connector", {}) connector_type = connector_data.get("type") or "preprocessor" if isinstance(connector_data.get("types"), list) and connector_data["types"]: connector_type = connector_data["types"][0] # Use first connector as primary converted["database"] = { "connector": connector_type, "schema": {} } # Extract tool settings from behavior section behavior = data.get("behavior", {}) enable_web_research = behavior.get("enableWebResearch", False) converted["tools"] = { "sql": {"enabled": True}, # SQL always enabled if connector is set "tavily": {"enabled": enable_web_research}, "streaming": {"enabled": True} # Streaming always enabled } # Model config defaults - use BASIC for faster responses converted["model"] = { "operationType": "DATA_ANALYSE", "processingMode": "BASIC" } # Copy other fields if "id" in data: converted["id"] = data["id"] if "name" in data: converted["name"] = data["name"] logger.debug(f"Converted legacy config format to new format") return converted def to_dict(self) -> Dict[str, Any]: """Convert config to dictionary for serialization.""" return { "id": self.id, "name": self.name, "systemPrompt": self.systemPrompt, "database": { "schema": self.database.schema, "connector": self.database.connector }, "tools": { "sql": self.tools.sql, "tavily": self.tools.tavily, "streaming": self.tools.streaming }, "model": { "operationType": self.model.operationType, "processingMode": self.model.processingMode } } def load_chatbot_config_from_instance(instance: 'FeatureInstance') -> ChatbotConfig: """ Load chatbot configuration from a FeatureInstance's config field. This is the primary method for loading chatbot configuration. The config is stored in the FeatureInstance.config JSONB field. Args: instance: FeatureInstance object with config field Returns: ChatbotConfig instance Raises: ValueError: If instance has no config and no fallback available """ instance_id = instance.id # Check cache first (by instance ID) cache_key = f"instance_{instance_id}" if cache_key in _config_cache: logger.debug(f"Returning cached config for instance {instance_id}") return _config_cache[cache_key] # Get config from instance config_data = instance.config if not config_data: # No config in instance - try to load default from file as fallback logger.warning(f"Instance {instance_id} has no config, loading default from file") try: return load_chatbot_config_from_file("default") except FileNotFoundError: # Create minimal default config logger.warning(f"No default config file found, using minimal defaults") config_data = {} # Create config from dictionary config = ChatbotConfig.from_dict(config_data, config_id=instance_id) # Cache the config _config_cache[cache_key] = config logger.info(f"Loaded chatbot config from instance {instance_id}: {config.name}") return config def load_chatbot_config_from_dict(config_data: Dict[str, Any], config_id: str = "custom") -> ChatbotConfig: """ Load chatbot configuration from a dictionary. Useful for testing or when config data is already available. Args: config_data: Configuration dictionary config_id: Identifier for this config Returns: ChatbotConfig instance """ return ChatbotConfig.from_dict(config_data, config_id=config_id) def load_chatbot_config_from_file(config_id: str) -> ChatbotConfig: """ Load chatbot configuration from JSON file. This is the legacy/fallback method for loading configuration. Prefer load_chatbot_config_from_instance() for production use. Args: config_id: Configuration ID (e.g., "althaus", "default") Returns: ChatbotConfig instance Raises: FileNotFoundError: If config file not found ValueError: If config file is invalid """ # Check cache first (by file ID) cache_key = f"file_{config_id}" if cache_key in _config_cache: logger.debug(f"Returning cached config for file {config_id}") return _config_cache[cache_key] # Get path to configs directory current_dir = Path(__file__).parent configs_dir = current_dir / "configs" config_file = configs_dir / f"{config_id}.json" if not config_file.exists(): # Try default config if requested config not found if config_id != "default": logger.warning(f"Config file {config_id} not found, trying default") return load_chatbot_config_from_file("default") raise FileNotFoundError(f"Chatbot config file not found: {config_file}") try: with open(config_file, 'r', encoding='utf-8') as f: data = json.load(f) config = ChatbotConfig.from_dict(data, config_id=config_id) # Cache the config _config_cache[cache_key] = config logger.info(f"Loaded chatbot config from file: {config_id} ({config.name})") return config except json.JSONDecodeError as e: logger.error(f"Error parsing chatbot config JSON {config_file}: {e}") raise ValueError(f"Invalid JSON in config file {config_file}: {e}") except Exception as e: logger.error(f"Error loading chatbot config {config_file}: {e}") raise def load_chatbot_config(config_id: str) -> ChatbotConfig: """ Load chatbot configuration from JSON file. DEPRECATED: Use load_chatbot_config_from_instance() for database configs or load_chatbot_config_from_file() for file-based configs. Args: config_id: Configuration ID (e.g., "althaus", "default") Returns: ChatbotConfig instance """ warnings.warn( "load_chatbot_config() is deprecated. Use load_chatbot_config_from_instance() " "for database configs or load_chatbot_config_from_file() for file-based configs.", DeprecationWarning, stacklevel=2 ) return load_chatbot_config_from_file(config_id) def clear_config_cache(instance_id: Optional[str] = None): """ Clear the configuration cache. Args: instance_id: Optional instance ID to clear specific cache entry. If None, clears entire cache. """ global _config_cache if instance_id: cache_key = f"instance_{instance_id}" if cache_key in _config_cache: del _config_cache[cache_key] logger.debug(f"Cleared chatbot config cache for instance {instance_id}") else: _config_cache.clear() logger.debug("Cleared all chatbot config cache")