gateway/modules/features/chatbotV2/config.py

98 lines
3.4 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Configuration system for Chatbot V2 instances.
Loads configuration from FeatureInstance.config JSONB field.
"""
import logging
from dataclasses import dataclass
from typing import Optional, Dict, Any, List, TYPE_CHECKING
if TYPE_CHECKING:
from modules.datamodels.datamodelFeatures import FeatureInstance
logger = logging.getLogger(__name__)
_config_cache: Dict[str, 'ChatbotV2Config'] = {}
DEFAULT_SYSTEM_PROMPT = (
"You are a helpful assistant. Answer questions based on the provided context documents. "
"When the user asks about the documents, use the extracted content to provide accurate answers. "
"If the context does not contain relevant information, say so."
)
@dataclass
class ModelConfig:
"""Model configuration for Chatbot V2."""
operationType: str = "DATA_ANALYSE"
processingMode: str = "BASIC"
allowedProviders: List[str] = None
def __post_init__(self):
if self.allowedProviders is None:
self.allowedProviders = []
def _parse_int(val: Optional[Any], default: Optional[int] = None) -> Optional[int]:
"""Parse int from config value."""
if val is None:
return default
if isinstance(val, int):
return val
try:
return int(val)
except (TypeError, ValueError):
return default
@dataclass
class ChatbotV2Config:
"""Configuration for a Chatbot V2 instance."""
id: str
name: str
systemPrompt: str
model: ModelConfig
maxContextChars: Optional[int] = None # Max document chars in system prompt (~60k ≈ 20k tokens). None = default 60k.
chunkSize: Optional[int] = None # Chunk size in chars (~15k). None = default.
chunkOverlap: Optional[int] = None # Overlap between chunks in chars (~500). None = default.
@classmethod
def from_dict(cls, data: Dict[str, Any], config_id: str = "default") -> 'ChatbotV2Config':
"""Create ChatbotV2Config from dictionary."""
system_prompt = data.get("systemPrompt") or DEFAULT_SYSTEM_PROMPT
model_data = data.get("model", {})
allowed_providers = model_data.get("allowedProviders") or data.get("allowedProviders", [])
model_config = ModelConfig(
operationType=model_data.get("operationType", "DATA_ANALYSE"),
processingMode=model_data.get("processingMode", "BASIC"),
allowedProviders=allowed_providers if isinstance(allowed_providers, list) else []
)
return cls(
id=data.get("id", config_id),
name=data.get("name", "Chatbot V2"),
systemPrompt=system_prompt,
model=model_config,
maxContextChars=_parse_int(data.get("maxContextChars")),
chunkSize=_parse_int(data.get("chunkSize")),
chunkOverlap=_parse_int(data.get("chunkOverlap"))
)
def load_chatbotv2_config_from_instance(instance: 'FeatureInstance') -> ChatbotV2Config:
"""Load Chatbot V2 configuration from a FeatureInstance's config field."""
instance_id = instance.id
cache_key = f"instance_{instance_id}"
if cache_key in _config_cache:
return _config_cache[cache_key]
config_data = instance.config or {}
config = ChatbotV2Config.from_dict(config_data, config_id=instance_id)
_config_cache[cache_key] = config
logger.info(f"Loaded chatbotv2 config from instance {instance_id}")
return config