From 7a8691404087f39d75c2f1b3845623bb0a049954 Mon Sep 17 00:00:00 2001 From: Ida Dittrich Date: Fri, 30 Jan 2026 11:59:21 +0100 Subject: [PATCH] changes to chatbot --- modules/features/chatbot/aiCenterAdapter.py | 170 - modules/features/chatbot/chatbotConfig.py | 231 -- modules/features/chatbot/chatbotUtils.py | 160 - .../chatbot/datamodelFeatureChatbot.py | 1024 ------- modules/features/chatbot/eventManager.py | 243 -- modules/features/chatbot/langgraphChatbot.py | 345 --- modules/features/chatbot/langgraphTools.py | 166 - modules/features/chatbot/mainChatbot.py | 2722 ----------------- .../features/chatbot/routeFeatureChatbot.py | 160 +- requirements.txt | 3 +- 10 files changed, 82 insertions(+), 5142 deletions(-) delete mode 100644 modules/features/chatbot/aiCenterAdapter.py delete mode 100644 modules/features/chatbot/chatbotConfig.py delete mode 100644 modules/features/chatbot/chatbotUtils.py delete mode 100644 modules/features/chatbot/datamodelFeatureChatbot.py delete mode 100644 modules/features/chatbot/eventManager.py delete mode 100644 modules/features/chatbot/langgraphChatbot.py delete mode 100644 modules/features/chatbot/langgraphTools.py delete mode 100644 modules/features/chatbot/mainChatbot.py diff --git a/modules/features/chatbot/aiCenterAdapter.py b/modules/features/chatbot/aiCenterAdapter.py deleted file mode 100644 index 2e638edd..00000000 --- a/modules/features/chatbot/aiCenterAdapter.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -Adapter to use AI Center as a LangChain-compatible chat model. -Maps LangChain message format to AI Center requests and responses. -""" - -import logging -from typing import Any, AsyncIterator, Iterator, List, Optional - -from langchain_core.language_models.chat_models import BaseChatModel -from langchain_core.messages import ( - AIMessage, - BaseMessage, - HumanMessage, - SystemMessage, -) -from langchain_core.outputs import ChatGeneration, ChatResult -from langchain_core.callbacks import AsyncCallbackHandlerForLLMRun, CallbackManagerForLLMRun - -from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, ProcessingModeEnum - -logger = logging.getLogger(__name__) - - -class AICenterChatModel(BaseChatModel): - """ - Adapter to use AI center as LangChain chat model. - Converts LangChain messages to AI center format and back. - """ - - def __init__( - self, - services, - system_prompt: str = "", - temperature: float = 0.2, - **kwargs - ): - """ - Initialize AI Center chat model adapter. - - Args: - services: Services instance with AI access - system_prompt: System prompt to use - temperature: Temperature for AI calls - """ - super().__init__(**kwargs) - self.services = services - self.system_prompt = system_prompt - self.temperature = temperature - - @property - def _llm_type(self) -> str: - """Return identifier of LLM type.""" - return "ai_center" - - def _generate( - self, - messages: List[BaseMessage], - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> ChatResult: - """ - Synchronous generation - not supported, use async version. - """ - raise NotImplementedError("Use async version: _agenerate") - - async def _agenerate( - self, - messages: List[BaseMessage], - stop: Optional[List[str]] = None, - run_manager: Optional[AsyncCallbackHandlerForLLMRun] = None, - **kwargs: Any, - ) -> ChatResult: - """ - Generate chat response using AI center. - - Args: - messages: List of LangChain messages - stop: Optional list of stop sequences - run_manager: Optional callback manager - **kwargs: Additional arguments - - Returns: - ChatResult with generated message - """ - # Convert LangChain messages to AI center prompt format - prompt_parts = [] - - # Add system prompt if present - if self.system_prompt: - prompt_parts.append(self.system_prompt) - - # Convert messages to text format - for msg in messages: - if isinstance(msg, SystemMessage): - # System messages are already in system_prompt or can be added here - if not self.system_prompt: - prompt_parts.append(f"System: {msg.content}") - elif isinstance(msg, HumanMessage): - prompt_parts.append(f"User: {msg.content}") - elif isinstance(msg, AIMessage): - prompt_parts.append(f"Assistant: {msg.content}") - else: - # Generic message - prompt_parts.append(str(msg.content)) - - # Combine into single prompt - full_prompt = "\n\n".join(prompt_parts) - - # Create AI center request - ai_request = AiCallRequest( - prompt=full_prompt, - options=AiCallOptions( - resultFormat="txt", - operationType=OperationTypeEnum.DATA_ANALYSE, - processingMode=ProcessingModeEnum.DETAILED, - temperature=self.temperature - ) - ) - - # Call AI center - try: - await self.services.ai.ensureAiObjectsInitialized() - ai_response = await self.services.ai.callAi(ai_request) - - # Extract content - content = ai_response.content if hasattr(ai_response, 'content') else str(ai_response) - - # Create AIMessage from response - ai_message = AIMessage(content=content) - - # Create ChatGeneration - generation = ChatGeneration(message=ai_message) - - # Return ChatResult - return ChatResult(generations=[generation]) - - except Exception as e: - logger.error(f"Error calling AI center: {e}", exc_info=True) - # Return error message - error_message = AIMessage(content=f"Error: {str(e)}") - generation = ChatGeneration(message=error_message) - return ChatResult(generations=[generation]) - - async def astream( - self, - messages: List[BaseMessage], - stop: Optional[List[str]] = None, - run_manager: Optional[AsyncCallbackHandlerForLLMRun] = None, - **kwargs: Any, - ) -> AsyncIterator[BaseMessage]: - """ - Stream chat response (not fully supported by AI center, returns single chunk). - - Args: - messages: List of LangChain messages - stop: Optional list of stop sequences - run_manager: Optional callback manager - **kwargs: Additional arguments - - Yields: - BaseMessage chunks - """ - # For now, just return the full response as a single chunk - # TODO: Implement proper streaming if AI center supports it - result = await self._agenerate(messages, stop, run_manager, **kwargs) - if result.generations: - yield result.generations[0].message diff --git a/modules/features/chatbot/chatbotConfig.py b/modules/features/chatbot/chatbotConfig.py deleted file mode 100644 index 89345712..00000000 --- a/modules/features/chatbot/chatbotConfig.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -Chatbot instance configuration management. -Handles loading and applying instance-specific configurations. -""" - -import logging -from typing import Optional, Dict, Any, List -from modules.interfaces.interfaceFeatures import getFeatureInterface -from modules.interfaces.interfaceDbApp import getRootInterface - -logger = logging.getLogger(__name__) - - -class ChatbotConfig: - """ - Chatbot instance configuration structure. - Provides defaults and validation for chatbot instance configs. - """ - - # Default configuration - DEFAULT_CONFIG = { - "connector": { - "types": ["preprocessor"], # Array of database connector types: "preprocessor", "custom" - "type": "preprocessor", # Legacy: single connector type (for backward compatibility) - "customConnectorClass": None # For custom connectors - }, - "prompts": { - "useCustomPrompts": False, - "customAnalysisPrompt": None, - "customFinalAnswerPrompt": None, - "customSystemPrompt": None # For LangGraph workflow (single system prompt) - }, - "behavior": { - "maxQueries": 5, - "enableWebResearch": True, - "enableRetryOnEmpty": True, - "maxRetryAttempts": 2 - }, - "database": { - "schema": None, # Custom schema info if needed - "tablePrefix": None # Custom table prefix if needed - } - } - - def __init__(self, config: Optional[Dict[str, Any]] = None): - """ - Initialize chatbot config with defaults and overrides. - - Args: - config: Instance-specific config dict (from FeatureInstance.config) - """ - self.config = self._merge_config(config or {}) - - def _merge_config(self, instance_config: Dict[str, Any]) -> Dict[str, Any]: - """ - Merge instance config with defaults, handling nested dicts. - - Args: - instance_config: Instance-specific config - - Returns: - Merged configuration dict - """ - merged = self.DEFAULT_CONFIG.copy() - - # Deep merge nested dicts - for key, value in instance_config.items(): - if key in merged and isinstance(merged[key], dict) and isinstance(value, dict): - merged[key] = {**merged[key], **value} - else: - merged[key] = value - - return merged - - @property - def connector_types(self) -> List[str]: - """Get connector types as list (supports multiple connectors).""" - connector_config = self.config.get("connector", {}) - # Support new array format - types = [] - if "types" in connector_config and isinstance(connector_config["types"], list): - types = connector_config["types"] - # Fallback to legacy single type format - elif "type" in connector_config: - types = [connector_config["type"]] - else: - types = ["preprocessor"] - - # Filter out 'websearch' (not a database connector, handled separately via enableWebResearch) - types = [t for t in types if t != "websearch"] - - # Ensure at least one connector - if not types: - types = ["preprocessor"] - - return types - - @property - def connector_type(self) -> str: - """Get primary connector type (preprocessor, custom).""" - # For backward compatibility, return first connector type - types = self.connector_types - return types[0] if types else "preprocessor" - - @property - def custom_connector_class(self) -> Optional[str]: - """Get custom connector class name if using custom connector.""" - return self.config.get("connector", {}).get("customConnectorClass") - - @property - def use_custom_prompts(self) -> bool: - """Check if custom prompts should be used. Always true since prompts are required.""" - # Prompts are now required, so this is always true if prompts are configured - return bool(self.config.get("prompts", {}).get("customAnalysisPrompt") or - self.config.get("prompts", {}).get("customFinalAnswerPrompt")) - - @property - def custom_analysis_prompt(self) -> Optional[str]: - """Get custom analysis prompt (required for chatbot instances).""" - prompt = self.config.get("prompts", {}).get("customAnalysisPrompt") - if not prompt: - logger.warning("custom_analysis_prompt is not configured - this is required for chatbot instances") - return prompt - - @property - def custom_final_answer_prompt(self) -> Optional[str]: - """Get custom final answer prompt (required for chatbot instances).""" - prompt = self.config.get("prompts", {}).get("customFinalAnswerPrompt") - if not prompt: - logger.warning("custom_final_answer_prompt is not configured - this is required for chatbot instances") - return prompt - - @property - def custom_system_prompt(self) -> Optional[str]: - """Get custom system prompt for LangGraph workflow.""" - # Prefer customSystemPrompt, fallback to customAnalysisPrompt - prompt = self.config.get("prompts", {}).get("customSystemPrompt") - if not prompt: - prompt = self.config.get("prompts", {}).get("customAnalysisPrompt") - return prompt - - @property - def max_queries(self) -> int: - """Get maximum number of queries allowed.""" - return self.config.get("behavior", {}).get("maxQueries", 5) - - @property - def enable_web_research(self) -> bool: - """Check if web research is enabled.""" - return self.config.get("behavior", {}).get("enableWebResearch", True) - - @property - def enable_retry_on_empty(self) -> bool: - """Check if retry on empty results is enabled.""" - return self.config.get("behavior", {}).get("enableRetryOnEmpty", True) - - @property - def max_retry_attempts(self) -> int: - """Get maximum retry attempts.""" - return self.config.get("behavior", {}).get("maxRetryAttempts", 2) - - def get_connector_instance(self): - """ - Get connector instance based on configuration. - Uses the primary (first) connector type from the configured connectors. - - Returns: - Connector instance (PreprocessorConnector, or custom connector if configured) - """ - # Use primary connector type (first in the list) - connector_type = self.connector_type.lower() - - if connector_type == "preprocessor": - from modules.connectors.connectorPreprocessor import PreprocessorConnector - return PreprocessorConnector() - elif connector_type == "custom" and self.custom_connector_class: - # Dynamic import for custom connectors - try: - module_path, class_name = self.custom_connector_class.rsplit(".", 1) - module = __import__(module_path, fromlist=[class_name]) - connector_class = getattr(module, class_name) - return connector_class() - except Exception as e: - logger.error(f"Failed to load custom connector {self.custom_connector_class}: {e}") - raise ValueError(f"Invalid custom connector: {self.custom_connector_class}") - else: - # Default to PreprocessorConnector - logger.warning(f"Unknown connector type '{connector_type}', using PreprocessorConnector") - from modules.connectors.connectorPreprocessor import PreprocessorConnector - return PreprocessorConnector() - - -def get_chatbot_config(instance_id: Optional[str]) -> ChatbotConfig: - """ - Load chatbot configuration for a feature instance. - - Args: - instance_id: FeatureInstance ID (None for default config) - - Returns: - ChatbotConfig instance with merged defaults and instance config - """ - if not instance_id: - # Return default config if no instance ID provided - return ChatbotConfig() - - try: - rootInterface = getRootInterface() - featureInterface = getFeatureInterface(rootInterface.db) - - instance = featureInterface.getFeatureInstance(instance_id) - if not instance: - logger.warning(f"Feature instance {instance_id} not found, using default config") - return ChatbotConfig() - - # Verify it's a chatbot instance - if instance.featureCode != "chatbot": - logger.warning(f"Instance {instance_id} is not a chatbot instance, using default config") - return ChatbotConfig() - - # Load config from instance - instance_config = instance.config if hasattr(instance, 'config') and instance.config else {} - - return ChatbotConfig(instance_config) - - except Exception as e: - logger.error(f"Error loading chatbot config for instance {instance_id}: {e}") - # Return default config on error - return ChatbotConfig() diff --git a/modules/features/chatbot/chatbotUtils.py b/modules/features/chatbot/chatbotUtils.py deleted file mode 100644 index dba0498c..00000000 --- a/modules/features/chatbot/chatbotUtils.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -Utility functions for the chatbot module. -Contains conversation name generation and other utilities. -""" - -import logging -import re -from typing import Optional - -from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, ProcessingModeEnum - -logger = logging.getLogger(__name__) - - -async def generate_conversation_name( - services, - userPrompt: str, - userLanguage: str = "en" -) -> str: - """ - Generate a short, descriptive conversation name based on user's prompt. - - Args: - services: Services instance with AI access - userPrompt: The user's input prompt - userLanguage: User's preferred language (for prompt localization) - - Returns: - Short conversation name (max 60 characters) - """ - try: - truncated_prompt = userPrompt[:200] if len(userPrompt) > 200 else userPrompt - - name_prompt = f"""Create a professional conversation title in THE SAME LANGUAGE as the user's question. - -Question: "{truncated_prompt}" - -Rules: -- Title MUST be in the same language as the question (German→German, French→French, English→English) -- Max 60 characters, no punctuation (?, !, .) -- Professional and concise -- Respond ONLY with the title, nothing else""" - - await services.ai.ensureAiObjectsInitialized() - - nameRequest = AiCallRequest( - prompt=name_prompt, - options=AiCallOptions( - resultFormat="txt", - operationType=OperationTypeEnum.DATA_GENERATE, - processingMode=ProcessingModeEnum.DETAILED, - temperature=0.7 - ) - ) - - nameResponse = await services.ai.callAi(nameRequest) - generated_name = nameResponse.content.strip() - - # Extract first line and clean up - generated_name = generated_name.split('\n')[0].strip() - generated_name = re.sub(r'^(Title|Titel|Titre|Name|Name:):\s*', '', generated_name, flags=re.IGNORECASE) - generated_name = re.sub(r'^["\']|["\']$', '', generated_name) - generated_name = re.sub(r'[?!.]+$', '', generated_name) # Remove trailing punctuation - - # Apply title case - if generated_name: - words = generated_name.split() - capitalized_words = [] - for word in words: - if word.isupper() and len(word) > 1: - capitalized_words.append(word) # Keep acronyms - else: - capitalized_words.append(word.capitalize()) - generated_name = " ".join(capitalized_words).strip() - - # Validate and truncate if needed - if not generated_name or len(generated_name) < 3: - if userLanguage == "de": - generated_name = "Chatbot Konversation" - elif userLanguage == "fr": - generated_name = "Conversation Chatbot" - else: - generated_name = "Chatbot Conversation" - - if len(generated_name) > 60: - truncated = generated_name[:57] - last_space = truncated.rfind(' ') - generated_name = truncated[:last_space] + "..." if last_space > 30 else truncated + "..." - - logger.info(f"Generated conversation name: '{generated_name}'") - return generated_name - - except Exception as e: - logger.error(f"Error generating conversation name: {e}", exc_info=True) - if userLanguage == "de": - return "Chatbot Konversation" - elif userLanguage == "fr": - return "Conversation Chatbot" - else: - return "Chatbot Conversation" - - -def get_empty_results_retry_instructions(empty_count: int) -> str: - """ - Get retry instructions when empty results are detected. - - Args: - empty_count: Number of queries that returned empty results - - Returns: - Formatted instructions string - """ - if empty_count == 0: - return "" - - return f""" -⚠️ LEERE ERGEBNISSE ERKANNT ⚠️ - -Es wurden {empty_count} Query(s) ausgeführt, die 0 Zeilen zurückgegeben haben. Versuche alternative Strategien. - -⚠️ WICHTIG - MAXIMAL 5 QUERIES FÜR PERFORMANCE ⚠️ - -Erstelle MAXIMAL 5 alternative SQL-Queries mit komplett anderen Strategien: - -1. **Breitere Suche ohne Zertifizierung**: Entferne Zertifizierungsfilter komplett - - Beispiel: Suche nur nach Netzgerät + einphasig + 10A (ohne UL) - - Suche in Artikelbezeichnung, Artikelbeschrieb, Keywords - -2. **Erweiterte Suche nach Netzgeräten mit Ampere-Angaben**: Breitere Ampere-Patterns - - Beispiel: (Netzteil OR Netzgerät) AND (10A OR 15A OR 20A OR Ampere) - - Suche auch nach "Ampere" als Begriff, nicht nur Zahlen - -3. **Breitere UL-Suche bei Netzgeräten**: Suche UL in allen Feldern - - Beispiel: (UL OR UL-zertifiziert) AND (Netzgerät OR Netzteil OR Power Supply) - - Suche auch in Keywords-Feld - -4. **Netzgeräte mit ≥10A ohne weitere Filter**: Minimaler Filter - - Beispiel: (Netzgerät OR Netzteil) AND (10A OR 15A OR 20A) - - Keine Filter auf einphasig oder Zertifizierung - -5. **Zertifizierte Netzgeräte allgemein**: Breite Zertifizierungs-Suche - - Beispiel: (UL OR CE OR TÜV OR certified OR zertifiziert) AND (Netzgerät OR Netzteil) - -6. **COUNT-Abfrage für Statistik**: Prüfe ob überhaupt Artikel existieren - - SELECT COUNT(*) WHERE (Netzgerät OR Netzteil) AND (10A OR 15A OR 20A) - -7. **Spezifische Suche nach einphasigen Netzgeräten**: Ohne Zertifizierung - - Beispiel: (einphasig OR 1-phasig OR single phase) AND (Netzgerät OR Netzteil) - -8. **Fallback mit minimalen Filtern**: Nur Hauptkriterien - - Beispiel: Netzgerät AND (10A OR 15A OR 20A) - keine weiteren Filter - -WICHTIG: -- Erstelle MAXIMAL 5 Queries mit unterschiedlichen Strategien (für Performance) -- Verwende breitere OR-Bedingungen für alternative Begriffe -- Entferne zu spezifische Filter, die möglicherweise keine Treffer finden -- Suche in Artikelbezeichnung, Artikelbeschrieb UND Keywords-Feld -""" diff --git a/modules/features/chatbot/datamodelFeatureChatbot.py b/modules/features/chatbot/datamodelFeatureChatbot.py deleted file mode 100644 index c2838ad3..00000000 --- a/modules/features/chatbot/datamodelFeatureChatbot.py +++ /dev/null @@ -1,1024 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -"""Chat models: ChatWorkflow, ChatMessage, ChatLog, ChatStat, ChatDocument.""" - -from typing import List, Dict, Any, Optional -from enum import Enum -from pydantic import BaseModel, Field -from modules.shared.attributeUtils import registerModelLabels -from modules.shared.timeUtils import getUtcTimestamp -import uuid - - -class ChatStat(BaseModel): - id: str = Field( - default_factory=lambda: str(uuid.uuid4()), description="Primary key" - ) - mandateId: str = Field( - description="ID of the mandate this stat belongs to" - ) - featureInstanceId: str = Field( - description="ID of the feature instance this stat belongs to" - ) - workflowId: Optional[str] = Field( - None, description="Foreign key to workflow (for workflow stats)" - ) - processingTime: Optional[float] = Field( - None, description="Processing time in seconds" - ) - bytesSent: Optional[int] = Field(None, description="Number of bytes sent") - bytesReceived: Optional[int] = Field(None, description="Number of bytes received") - errorCount: Optional[int] = Field(None, description="Number of errors encountered") - process: Optional[str] = Field(None, description="The process that delivers the stats data (e.g. 'action.outlook.readMails', 'ai.process.document.name')") - engine: Optional[str] = Field(None, description="The engine used (e.g. 'ai.anthropic.35', 'ai.tavily.basic', 'renderer.docx')") - priceUsd: Optional[float] = Field(None, description="Calculated price in USD for the operation") - - -registerModelLabels( - "ChatStat", - {"en": "Chat Statistics", "fr": "Statistiques de chat"}, - { - "id": {"en": "ID", "fr": "ID"}, - "mandateId": {"en": "Mandate ID", "fr": "ID du mandat"}, - "featureInstanceId": {"en": "Feature Instance ID", "fr": "ID de l'instance de fonctionnalité"}, - "workflowId": {"en": "Workflow ID", "fr": "ID du workflow"}, - "processingTime": {"en": "Processing Time", "fr": "Temps de traitement"}, - "bytesSent": {"en": "Bytes Sent", "fr": "Octets envoyés"}, - "bytesReceived": {"en": "Bytes Received", "fr": "Octets reçus"}, - "errorCount": {"en": "Error Count", "fr": "Nombre d'erreurs"}, - "process": {"en": "Process", "fr": "Processus"}, - "engine": {"en": "Engine", "fr": "Moteur"}, - "priceUsd": {"en": "Price USD", "fr": "Prix USD"}, - }, -) - - -class ChatLog(BaseModel): - id: str = Field( - default_factory=lambda: str(uuid.uuid4()), description="Primary key" - ) - mandateId: str = Field( - description="ID of the mandate this log belongs to" - ) - featureInstanceId: str = Field( - description="ID of the feature instance this log belongs to" - ) - workflowId: str = Field(description="Foreign key to workflow") - message: str = Field(description="Log message") - type: str = Field(description="Log type (info, warning, error, etc.)") - timestamp: float = Field( - default_factory=getUtcTimestamp, - description="When the log entry was created (UTC timestamp in seconds)", - ) - status: Optional[str] = Field(None, description="Status of the log entry") - progress: Optional[float] = Field( - None, description="Progress indicator (0.0 to 1.0)" - ) - performance: Optional[Dict[str, Any]] = Field( - None, description="Performance metrics" - ) - parentId: Optional[str] = Field( - None, description="Parent operation ID (operationId of parent operation) for hierarchical display" - ) - operationId: Optional[str] = Field( - None, description="Operation ID to group related log entries" - ) - roundNumber: Optional[int] = Field(None, description="Round number in workflow") - taskNumber: Optional[int] = Field(None, description="Task number within round") - actionNumber: Optional[int] = Field(None, description="Action number within task") - - -registerModelLabels( - "ChatLog", - {"en": "Chat Log", "fr": "Journal de chat"}, - { - "id": {"en": "ID", "fr": "ID"}, - "mandateId": {"en": "Mandate ID", "fr": "ID du mandat"}, - "featureInstanceId": {"en": "Feature Instance ID", "fr": "ID de l'instance de fonctionnalité"}, - "workflowId": {"en": "Workflow ID", "fr": "ID du flux de travail"}, - "message": {"en": "Message", "fr": "Message"}, - "type": {"en": "Type", "fr": "Type"}, - "timestamp": {"en": "Timestamp", "fr": "Horodatage"}, - "status": {"en": "Status", "fr": "Statut"}, - "progress": {"en": "Progress", "fr": "Progression"}, - "performance": {"en": "Performance", "fr": "Performance"}, - }, -) - - -class ChatDocument(BaseModel): - id: str = Field( - default_factory=lambda: str(uuid.uuid4()), description="Primary key" - ) - mandateId: str = Field( - description="ID of the mandate this document belongs to" - ) - featureInstanceId: str = Field( - description="ID of the feature instance this document belongs to" - ) - messageId: str = Field(description="Foreign key to message") - fileId: str = Field(description="Foreign key to file") - fileName: str = Field(description="Name of the file") - fileSize: int = Field(description="Size of the file") - mimeType: str = Field(description="MIME type of the file") - roundNumber: Optional[int] = Field(None, description="Round number in workflow") - taskNumber: Optional[int] = Field(None, description="Task number within round") - actionNumber: Optional[int] = Field(None, description="Action number within task") - actionId: Optional[str] = Field( - None, description="ID of the action that created this document" - ) - - -registerModelLabels( - "ChatDocument", - {"en": "Chat Document", "fr": "Document de chat"}, - { - "id": {"en": "ID", "fr": "ID"}, - "mandateId": {"en": "Mandate ID", "fr": "ID du mandat"}, - "featureInstanceId": {"en": "Feature Instance ID", "fr": "ID de l'instance de fonctionnalité"}, - "messageId": {"en": "Message ID", "fr": "ID du message"}, - "fileId": {"en": "File ID", "fr": "ID du fichier"}, - "fileName": {"en": "File Name", "fr": "Nom du fichier"}, - "fileSize": {"en": "File Size", "fr": "Taille du fichier"}, - "mimeType": {"en": "MIME Type", "fr": "Type MIME"}, - "roundNumber": {"en": "Round Number", "fr": "Numéro de tour"}, - "taskNumber": {"en": "Task Number", "fr": "Numéro de tâche"}, - "actionNumber": {"en": "Action Number", "fr": "Numéro d'action"}, - "actionId": {"en": "Action ID", "fr": "ID de l'action"}, - }, -) - - -class ContentMetadata(BaseModel): - size: int = Field(description="Content size in bytes") - pages: Optional[int] = Field( - None, description="Number of pages for multi-page content" - ) - error: Optional[str] = Field(None, description="Processing error if any") - width: Optional[int] = Field(None, description="Width in pixels for images/videos") - height: Optional[int] = Field( - None, description="Height in pixels for images/videos" - ) - colorMode: Optional[str] = Field(None, description="Color mode") - fps: Optional[float] = Field(None, description="Frames per second for videos") - durationSec: Optional[float] = Field( - None, description="Duration in seconds for media" - ) - mimeType: str = Field(description="MIME type of the content") - base64Encoded: bool = Field(description="Whether the data is base64 encoded") - - -registerModelLabels( - "ContentMetadata", - {"en": "Content Metadata", "fr": "Métadonnées du contenu"}, - { - "size": {"en": "Size", "fr": "Taille"}, - "pages": {"en": "Pages", "fr": "Pages"}, - "error": {"en": "Error", "fr": "Erreur"}, - "width": {"en": "Width", "fr": "Largeur"}, - "height": {"en": "Height", "fr": "Hauteur"}, - "colorMode": {"en": "Color Mode", "fr": "Mode de couleur"}, - "fps": {"en": "FPS", "fr": "IPS"}, - "durationSec": {"en": "Duration", "fr": "Durée"}, - "mimeType": {"en": "MIME Type", "fr": "Type MIME"}, - "base64Encoded": {"en": "Base64 Encoded", "fr": "Encodé en Base64"}, - }, -) - - -class ContentItem(BaseModel): - label: str = Field(description="Content label") - data: str = Field(description="Extracted text content") - metadata: ContentMetadata = Field(description="Content metadata") - - -registerModelLabels( - "ContentItem", - {"en": "Content Item", "fr": "Élément de contenu"}, - { - "label": {"en": "Label", "fr": "Étiquette"}, - "data": {"en": "Data", "fr": "Données"}, - "metadata": {"en": "Metadata", "fr": "Métadonnées"}, - }, -) - - -class ChatContentExtracted(BaseModel): - id: str = Field(description="Reference to source ChatDocument") - contents: List[ContentItem] = Field( - default_factory=list, description="List of content items" - ) - - -registerModelLabels( - "ChatContentExtracted", - {"en": "Extracted Content", "fr": "Contenu extrait"}, - { - "id": {"en": "Object ID", "fr": "ID de l'objet"}, - "contents": {"en": "Contents", "fr": "Contenus"}, - }, -) - - -class ChatMessage(BaseModel): - id: str = Field( - default_factory=lambda: str(uuid.uuid4()), description="Primary key" - ) - mandateId: str = Field( - description="ID of the mandate this message belongs to" - ) - featureInstanceId: str = Field( - description="ID of the feature instance this message belongs to" - ) - workflowId: str = Field(description="Foreign key to workflow") - parentMessageId: Optional[str] = Field( - None, description="Parent message ID for threading" - ) - documents: List[ChatDocument] = Field( - default_factory=list, description="Associated documents" - ) - documentsLabel: Optional[str] = Field( - None, description="Label for the set of documents" - ) - message: Optional[str] = Field(None, description="Message content") - summary: Optional[str] = Field( - None, description="Short summary of this message for planning/history" - ) - role: str = Field(description="Role of the message sender") - status: str = Field(description="Status of the message (first, step, last)") - sequenceNr: int = Field( - description="Sequence number of the message (set automatically)" - ) - publishedAt: float = Field( - default_factory=getUtcTimestamp, - description="When the message was published (UTC timestamp in seconds)", - ) - success: Optional[bool] = Field( - None, description="Whether the message processing was successful" - ) - actionId: Optional[str] = Field( - None, description="ID of the action that produced this message" - ) - actionMethod: Optional[str] = Field( - None, description="Method of the action that produced this message" - ) - actionName: Optional[str] = Field( - None, description="Name of the action that produced this message" - ) - roundNumber: Optional[int] = Field(None, description="Round number in workflow") - taskNumber: Optional[int] = Field(None, description="Task number within round") - actionNumber: Optional[int] = Field(None, description="Action number within task") - taskProgress: Optional[str] = Field( - None, description="Task progress status: pending, running, success, fail, retry" - ) - actionProgress: Optional[str] = Field( - None, description="Action progress status: pending, running, success, fail" - ) - - -registerModelLabels( - "ChatMessage", - {"en": "Chat Message", "fr": "Message de chat"}, - { - "id": {"en": "ID", "fr": "ID"}, - "mandateId": {"en": "Mandate ID", "fr": "ID du mandat"}, - "featureInstanceId": {"en": "Feature Instance ID", "fr": "ID de l'instance de fonctionnalité"}, - "workflowId": {"en": "Workflow ID", "fr": "ID du flux de travail"}, - "parentMessageId": {"en": "Parent Message ID", "fr": "ID du message parent"}, - "documents": {"en": "Documents", "fr": "Documents"}, - "documentsLabel": {"en": "Documents Label", "fr": "Label des documents"}, - "message": {"en": "Message", "fr": "Message"}, - "summary": {"en": "Summary", "fr": "Résumé"}, - "role": {"en": "Role", "fr": "Rôle"}, - "status": {"en": "Status", "fr": "Statut"}, - "sequenceNr": {"en": "Sequence Number", "fr": "Numéro de séquence"}, - "publishedAt": {"en": "Published At", "fr": "Publié le"}, - "success": {"en": "Success", "fr": "Succès"}, - "actionId": {"en": "Action ID", "fr": "ID de l'action"}, - "actionMethod": {"en": "Action Method", "fr": "Méthode de l'action"}, - "actionName": {"en": "Action Name", "fr": "Nom de l'action"}, - "roundNumber": {"en": "Round Number", "fr": "Numéro de tour"}, - "taskNumber": {"en": "Task Number", "fr": "Numéro de tâche"}, - "actionNumber": {"en": "Action Number", "fr": "Numéro d'action"}, - "taskProgress": {"en": "Task Progress", "fr": "Progression de la tâche"}, - "actionProgress": {"en": "Action Progress", "fr": "Progression de l'action"}, - }, -) - - -class WorkflowModeEnum(str, Enum): - WORKFLOW_DYNAMIC = "Dynamic" - WORKFLOW_AUTOMATION = "Automation" - WORKFLOW_CHATBOT = "Chatbot" - WORKFLOW_REACT = "React" # Legacy mode - kept for backward compatibility - - -registerModelLabels( - "WorkflowModeEnum", - {"en": "Workflow Mode", "fr": "Mode de workflow"}, - { - "WORKFLOW_DYNAMIC": {"en": "Dynamic", "fr": "Dynamique"}, - "WORKFLOW_AUTOMATION": {"en": "Automation", "fr": "Automatisation"}, - "WORKFLOW_CHATBOT": {"en": "Chatbot", "fr": "Chatbot"}, - "WORKFLOW_REACT": {"en": "React (Legacy)", "fr": "React (Hérité)"}, - }, -) - - -class ChatWorkflow(BaseModel): - id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) - mandateId: str = Field(description="ID of the mandate this workflow belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) - featureInstanceId: str = Field(description="ID of the feature instance this workflow belongs to", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) - status: str = Field(default="running", description="Current status of the workflow", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [ - {"value": "running", "label": {"en": "Running", "fr": "En cours"}}, - {"value": "completed", "label": {"en": "Completed", "fr": "Terminé"}}, - {"value": "stopped", "label": {"en": "Stopped", "fr": "Arrêté"}}, - {"value": "error", "label": {"en": "Error", "fr": "Erreur"}}, - ]}) - name: Optional[str] = Field(None, description="Name of the workflow", json_schema_extra={"frontend_type": "text", "frontend_readonly": False, "frontend_required": True}) - currentRound: int = Field(default=0, description="Current round number", json_schema_extra={"frontend_type": "integer", "frontend_readonly": True, "frontend_required": False}) - currentTask: int = Field(default=0, description="Current task number", json_schema_extra={"frontend_type": "integer", "frontend_readonly": True, "frontend_required": False}) - currentAction: int = Field(default=0, description="Current action number", json_schema_extra={"frontend_type": "integer", "frontend_readonly": True, "frontend_required": False}) - totalTasks: int = Field(default=0, description="Total number of tasks in the workflow", json_schema_extra={"frontend_type": "integer", "frontend_readonly": True, "frontend_required": False}) - totalActions: int = Field(default=0, description="Total number of actions in the workflow", json_schema_extra={"frontend_type": "integer", "frontend_readonly": True, "frontend_required": False}) - lastActivity: float = Field(default_factory=getUtcTimestamp, description="Timestamp of last activity (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False}) - startedAt: float = Field(default_factory=getUtcTimestamp, description="When the workflow started (UTC timestamp in seconds)", json_schema_extra={"frontend_type": "timestamp", "frontend_readonly": True, "frontend_required": False}) - logs: List[ChatLog] = Field(default_factory=list, description="Workflow logs", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) - messages: List[ChatMessage] = Field(default_factory=list, description="Messages in the workflow", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) - stats: List[ChatStat] = Field(default_factory=list, description="Workflow statistics list", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) - tasks: list = Field(default_factory=list, description="List of tasks in the workflow", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) - workflowMode: WorkflowModeEnum = Field(default=WorkflowModeEnum.WORKFLOW_DYNAMIC, description="Workflow mode selector", json_schema_extra={"frontend_type": "select", "frontend_readonly": False, "frontend_required": False, "frontend_options": [ - { - "value": WorkflowModeEnum.WORKFLOW_DYNAMIC.value, - "label": {"en": "Dynamic", "fr": "Dynamique"}, - }, - { - "value": WorkflowModeEnum.WORKFLOW_AUTOMATION.value, - "label": {"en": "Automation", "fr": "Automatisation"}, - }, - { - "value": WorkflowModeEnum.WORKFLOW_CHATBOT.value, - "label": {"en": "Chatbot", "fr": "Chatbot"}, - }, - { - "value": WorkflowModeEnum.WORKFLOW_REACT.value, - "label": {"en": "React (Legacy)", "fr": "React (Hérité)"}, - }, - ]}) - maxSteps: int = Field(default=10, description="Maximum number of iterations in dynamic mode", json_schema_extra={"frontend_type": "integer", "frontend_readonly": False, "frontend_required": False}) - expectedFormats: Optional[List[str]] = Field(None, description="List of expected file format extensions from user request (e.g., ['xlsx', 'pdf']). Extracted during intent analysis.", json_schema_extra={"frontend_type": "text", "frontend_readonly": True, "frontend_required": False}) - - # Helper methods for execution state management - def getRoundIndex(self) -> int: - """Get current round index""" - return self.currentRound - - def getTaskIndex(self) -> int: - """Get current task index""" - return self.currentTask - - def getActionIndex(self) -> int: - """Get current action index""" - return self.currentAction - - def incrementRound(self): - """Increment round when new user input received""" - self.currentRound += 1 - self.currentTask = 0 - self.currentAction = 0 - - def incrementTask(self): - """Increment task when starting new task in current round""" - self.currentTask += 1 - self.currentAction = 0 - - def incrementAction(self): - """Increment action when executing new action in current task""" - self.currentAction += 1 - - -registerModelLabels( - "ChatWorkflow", - {"en": "Chat Workflow", "fr": "Flux de travail de chat"}, - { - "id": {"en": "ID", "fr": "ID"}, - "mandateId": {"en": "Mandate ID", "fr": "ID du mandat"}, - "featureInstanceId": {"en": "Feature Instance ID", "fr": "ID de l'instance de fonctionnalité"}, - "status": {"en": "Status", "fr": "Statut"}, - "name": {"en": "Name", "fr": "Nom"}, - "currentRound": {"en": "Current Round", "fr": "Tour actuel"}, - "currentTask": {"en": "Current Task", "fr": "Tâche actuelle"}, - "currentAction": {"en": "Current Action", "fr": "Action actuelle"}, - "totalTasks": {"en": "Total Tasks", "fr": "Total des tâches"}, - "totalActions": {"en": "Total Actions", "fr": "Total des actions"}, - "lastActivity": {"en": "Last Activity", "fr": "Dernière activité"}, - "startedAt": {"en": "Started At", "fr": "Démarré le"}, - "logs": {"en": "Logs", "fr": "Journaux"}, - "messages": {"en": "Messages", "fr": "Messages"}, - "stats": {"en": "Statistics", "fr": "Statistiques"}, - "tasks": {"en": "Tasks", "fr": "Tâches"}, - "workflowMode": {"en": "Workflow Mode", "fr": "Mode de workflow"}, - "maxSteps": {"en": "Max Steps", "fr": "Étapes max"}, - "expectedFormats": {"en": "Expected Formats", "fr": "Formats attendus"}, - }, -) - - -class UserInputRequest(BaseModel): - prompt: str = Field(description="Prompt for the user") - listFileId: List[str] = Field(default_factory=list, description="List of file IDs") - userLanguage: str = Field(default="en", description="User's preferred language") - workflowId: Optional[str] = Field(None, description="Optional ID of the workflow to continue") - - -registerModelLabels( - "UserInputRequest", - {"en": "User Input Request", "fr": "Demande de saisie utilisateur"}, - { - "prompt": {"en": "Prompt", "fr": "Invite"}, - "listFileId": {"en": "File IDs", "fr": "IDs des fichiers"}, - "userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"}, - }, -) - - -class ActionDocument(BaseModel): - """Clear document structure for action results""" - - documentName: str = Field(description="Name of the document") - documentData: Any = Field(description="Content/data of the document") - mimeType: str = Field(description="MIME type of the document") - sourceJson: Optional[Dict[str, Any]] = Field( - None, - description="Source JSON structure (preserved when rendering to xlsx/docx/pdf)" - ) - validationMetadata: Optional[Dict[str, Any]] = Field( - None, - description="Action-specific metadata for content validation (e.g., email recipients, attachments, SharePoint paths)" - ) - - -registerModelLabels( - "ActionDocument", - {"en": "Action Document", "fr": "Document d'action"}, - { - "documentName": {"en": "Document Name", "fr": "Nom du document"}, - "documentData": {"en": "Document Data", "fr": "Données du document"}, - "mimeType": {"en": "MIME Type", "fr": "Type MIME"}, - }, -) - - -class ActionResult(BaseModel): - """Clean action result with documents as primary output - - IMPORTANT: Action methods should NOT set resultLabel in their return value. - The resultLabel is managed by the action handler using the action's execResultLabel - from the action plan. This ensures consistent document routing throughout the workflow. - """ - - success: bool = Field(description="Whether execution succeeded") - error: Optional[str] = Field(None, description="Error message if failed") - documents: List[ActionDocument] = Field( - default_factory=list, description="Document outputs" - ) - resultLabel: Optional[str] = Field( - None, - description="Label for document routing (set by action handler, not by action methods)", - ) - - @classmethod - def isSuccess(cls, documents: List[ActionDocument] = None) -> "ActionResult": - return cls(success=True, documents=documents or []) - - @classmethod - def isFailure( - cls, error: str, documents: List[ActionDocument] = None - ) -> "ActionResult": - return cls(success=False, documents=documents or [], error=error) - - -registerModelLabels( - "ActionResult", - {"en": "Action Result", "fr": "Résultat de l'action"}, - { - "success": {"en": "Success", "fr": "Succès"}, - "error": {"en": "Error", "fr": "Erreur"}, - "documents": {"en": "Documents", "fr": "Documents"}, - "resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"}, - }, -) - - -class ActionSelection(BaseModel): - method: str = Field(description="Method to execute (e.g., web, document, ai)") - name: str = Field( - description="Action name within the method (e.g., search, extract)" - ) - - -registerModelLabels( - "ActionSelection", - {"en": "Action Selection", "fr": "Sélection d'action"}, - { - "method": {"en": "Method", "fr": "Méthode"}, - "name": {"en": "Action Name", "fr": "Nom de l'action"}, - }, -) - - -class ActionParameters(BaseModel): - parameters: Dict[str, Any] = Field( - default_factory=dict, description="Parameters to execute the selected action" - ) - - -registerModelLabels( - "ActionParameters", - {"en": "Action Parameters", "fr": "Paramètres d'action"}, - { - "parameters": {"en": "Parameters", "fr": "Paramètres"}, - }, -) - - -class ObservationPreview(BaseModel): - name: str = Field(description="Document name or URL label") - mime: Optional[str] = Field(default=None, description="MIME type or kind (legacy field)") - snippet: Optional[str] = Field(default=None, description="Short snippet or summary") - # Extended metadata fields - mimeType: Optional[str] = Field(default=None, description="MIME type") - size: Optional[str] = Field(default=None, description="File size") - created: Optional[str] = Field(default=None, description="Creation timestamp") - modified: Optional[str] = Field(default=None, description="Modification timestamp") - typeGroup: Optional[str] = Field(default=None, description="Document type group") - documentId: Optional[str] = Field(default=None, description="Document ID") - reference: Optional[str] = Field(default=None, description="Document reference") - contentSize: Optional[str] = Field(default=None, description="Content size indicator") - - -registerModelLabels( - "ObservationPreview", - {"en": "Observation Preview", "fr": "Aperçu d'observation"}, - { - "name": {"en": "Name", "fr": "Nom"}, - "mime": {"en": "MIME", "fr": "MIME"}, - "snippet": {"en": "Snippet", "fr": "Extrait"}, - }, -) - - -class Observation(BaseModel): - success: bool = Field(description="Action execution success flag") - resultLabel: str = Field(description="Deterministic label for produced documents") - documentsCount: int = Field(description="Number of produced documents") - previews: List[ObservationPreview] = Field( - default_factory=list, description="Compact previews of outputs" - ) - notes: List[str] = Field( - default_factory=list, description="Short notes or key facts" - ) - # Extended fields for enhanced validation - contentValidation: Optional[Dict[str, Any]] = Field( - default=None, description="Content validation results" - ) - contentAnalysis: Optional[Dict[str, Any]] = Field( - default=None, description="Content analysis results" - ) - - -registerModelLabels( - "Observation", - {"en": "Observation", "fr": "Observation"}, - { - "success": {"en": "Success", "fr": "Succès"}, - "resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"}, - "documentsCount": {"en": "Documents Count", "fr": "Nombre de documents"}, - "previews": {"en": "Previews", "fr": "Aperçus"}, - "notes": {"en": "Notes", "fr": "Notes"}, - }, -) - - -class TaskStatus(str, Enum): - PENDING = "pending" - RUNNING = "running" - COMPLETED = "completed" - FAILED = "failed" - CANCELLED = "cancelled" - - -registerModelLabels( - "TaskStatus", - {"en": "Task Status", "fr": "Statut de la tâche"}, - { - "PENDING": {"en": "Pending", "fr": "En attente"}, - "RUNNING": {"en": "Running", "fr": "En cours"}, - "COMPLETED": {"en": "Completed", "fr": "Terminé"}, - "FAILED": {"en": "Failed", "fr": "Échec"}, - "CANCELLED": {"en": "Cancelled", "fr": "Annulé"}, - }, -) - - -class DocumentExchange(BaseModel): - documentsLabel: str = Field(description="Label for the set of documents") - documents: List[str] = Field( - default_factory=list, description="List of document references" - ) - - -registerModelLabels( - "DocumentExchange", - {"en": "Document Exchange", "fr": "Échange de documents"}, - { - "documentsLabel": {"en": "Documents Label", "fr": "Label des documents"}, - "documents": {"en": "Documents", "fr": "Documents"}, - }, -) - - -class ActionItem(BaseModel): - id: str = Field(..., description="Action ID") - execMethod: str = Field(..., description="Method to execute") - execAction: str = Field(..., description="Action to perform") - execParameters: Dict[str, Any] = Field( - default_factory=dict, description="Action parameters" - ) - execResultLabel: Optional[str] = Field( - None, description="Label for the set of result documents" - ) - expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field( - None, description="Expected document formats (optional)" - ) - userMessage: Optional[str] = Field( - None, description="User-friendly message in user's language" - ) - status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status") - error: Optional[str] = Field(None, description="Error message if action failed") - retryCount: int = Field(default=0, description="Number of retries attempted") - retryMax: int = Field(default=3, description="Maximum number of retries") - processingTime: Optional[float] = Field( - None, description="Processing time in seconds" - ) - timestamp: float = Field( - ..., description="When the action was executed (UTC timestamp in seconds)" - ) - result: Optional[str] = Field(None, description="Result of the action") - - def setSuccess(self, result: str = None) -> None: - """Set the action as successful with optional result""" - self.status = TaskStatus.COMPLETED - self.error = None - if result is not None: - self.result = result - - def setError(self, error_message: str) -> None: - """Set the action as failed with error message""" - self.status = TaskStatus.FAILED - self.error = error_message - - -registerModelLabels( - "ActionItem", - {"en": "Task Action", "fr": "Action de tâche"}, - { - "id": {"en": "Action ID", "fr": "ID de l'action"}, - "execMethod": {"en": "Method", "fr": "Méthode"}, - "execAction": {"en": "Action", "fr": "Action"}, - "execParameters": {"en": "Parameters", "fr": "Paramètres"}, - "execResultLabel": {"en": "Result Label", "fr": "Label du résultat"}, - "expectedDocumentFormats": { - "en": "Expected Document Formats", - "fr": "Formats de documents attendus", - }, - "userMessage": {"en": "User Message", "fr": "Message utilisateur"}, - "status": {"en": "Status", "fr": "Statut"}, - "error": {"en": "Error", "fr": "Erreur"}, - "retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"}, - "retryMax": {"en": "Max Retries", "fr": "Tentatives max"}, - "processingTime": {"en": "Processing Time", "fr": "Temps de traitement"}, - "timestamp": {"en": "Timestamp", "fr": "Horodatage"}, - "result": {"en": "Result", "fr": "Résultat"}, - }, -) - - -class TaskResult(BaseModel): - taskId: str = Field(..., description="Task ID") - status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status") - success: bool = Field(..., description="Whether the task was successful") - feedback: Optional[str] = Field(None, description="Task feedback message") - error: Optional[str] = Field(None, description="Error message if task failed") - - -registerModelLabels( - "TaskResult", - {"en": "Task Result", "fr": "Résultat de tâche"}, - { - "taskId": {"en": "Task ID", "fr": "ID de la tâche"}, - "status": {"en": "Status", "fr": "Statut"}, - "success": {"en": "Success", "fr": "Succès"}, - "feedback": {"en": "Feedback", "fr": "Retour"}, - "error": {"en": "Error", "fr": "Erreur"}, - }, -) - - -class TaskItem(BaseModel): - id: str = Field(..., description="Task ID") - workflowId: str = Field(..., description="Workflow ID") - userInput: str = Field(..., description="User input that triggered the task") - status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status") - error: Optional[str] = Field(None, description="Error message if task failed") - startedAt: Optional[float] = Field( - None, description="When the task started (UTC timestamp in seconds)" - ) - finishedAt: Optional[float] = Field( - None, description="When the task finished (UTC timestamp in seconds)" - ) - actionList: List[ActionItem] = Field( - default_factory=list, description="List of actions to execute" - ) - retryCount: int = Field(default=0, description="Number of retries attempted") - retryMax: int = Field(default=3, description="Maximum number of retries") - rollbackOnFailure: bool = Field( - default=True, description="Whether to rollback on failure" - ) - dependencies: List[str] = Field( - default_factory=list, description="List of task IDs this task depends on" - ) - feedback: Optional[str] = Field(None, description="Task feedback message") - processingTime: Optional[float] = Field( - None, description="Total processing time in seconds" - ) - resultLabels: Optional[Dict[str, Any]] = Field( - default_factory=dict, description="Map of result labels to their values" - ) - - -registerModelLabels( - "TaskItem", - {"en": "Task", "fr": "Tâche"}, - { - "id": {"en": "Task ID", "fr": "ID de la tâche"}, - "workflowId": {"en": "Workflow ID", "fr": "ID du workflow"}, - "userInput": {"en": "User Input", "fr": "Entrée utilisateur"}, - "status": {"en": "Status", "fr": "Statut"}, - "error": {"en": "Error", "fr": "Erreur"}, - "startedAt": {"en": "Started At", "fr": "Démarré à"}, - "finishedAt": {"en": "Finished At", "fr": "Terminé à"}, - "actionList": {"en": "Actions", "fr": "Actions"}, - "retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"}, - "retryMax": {"en": "Max Retries", "fr": "Tentatives max"}, - "processingTime": {"en": "Processing Time", "fr": "Temps de traitement"}, - }, -) - - -class TaskStep(BaseModel): - id: str - objective: str - dependencies: Optional[list[str]] = Field(default_factory=list) - successCriteria: Optional[list[str]] = Field(default_factory=list) - estimatedComplexity: Optional[str] = None - userMessage: Optional[str] = Field( - None, description="User-friendly message in user's language" - ) - # Format details extracted from intent analysis - dataType: Optional[str] = Field( - None, description="Expected data type (text, numbers, documents, etc.)" - ) - expectedFormats: Optional[List[str]] = Field( - None, description="Expected output file format extensions (e.g., ['docx', 'pdf', 'xlsx']). Use actual file extensions, not conceptual terms." - ) - qualityRequirements: Optional[Dict[str, Any]] = Field( - None, description="Quality requirements and constraints" - ) - - -registerModelLabels( - "TaskStep", - {"en": "Task Step", "fr": "Étape de tâche"}, - { - "id": {"en": "ID", "fr": "ID"}, - "objective": {"en": "Objective", "fr": "Objectif"}, - "dependencies": {"en": "Dependencies", "fr": "Dépendances"}, - "successCriteria": {"en": "Success Criteria", "fr": "Critères de succès"}, - "estimatedComplexity": { - "en": "Estimated Complexity", - "fr": "Complexité estimée", - }, - "userMessage": {"en": "User Message", "fr": "Message utilisateur"}, - "expectedFormats": {"en": "Expected Formats", "fr": "Formats attendus"}, - }, -) - - -class TaskHandover(BaseModel): - taskId: str = Field(description="Target task ID") - sourceTask: Optional[str] = Field(None, description="Source task ID") - inputDocuments: List[DocumentExchange] = Field( - default_factory=list, description="Available input documents" - ) - outputDocuments: List[DocumentExchange] = Field( - default_factory=list, description="Produced output documents" - ) - context: Dict[str, Any] = Field(default_factory=dict, description="Task context") - previousResults: List[str] = Field( - default_factory=list, description="Previous result summaries" - ) - improvements: List[str] = Field( - default_factory=list, description="Improvement suggestions" - ) - workflowSummary: Optional[str] = Field( - None, description="Summarized workflow context" - ) - messageHistory: List[str] = Field( - default_factory=list, description="Key message summaries" - ) - timestamp: float = Field( - ..., description="When the handover was created (UTC timestamp in seconds)" - ) - handoverType: str = Field( - default="task", description="Type of handover: task, phase, or workflow" - ) - - -registerModelLabels( - "TaskHandover", - {"en": "Task Handover", "fr": "Transfert de tâche"}, - { - "taskId": {"en": "Task ID", "fr": "ID de la tâche"}, - "sourceTask": {"en": "Source Task", "fr": "Tâche source"}, - "inputDocuments": {"en": "Input Documents", "fr": "Documents d'entrée"}, - "outputDocuments": {"en": "Output Documents", "fr": "Documents de sortie"}, - "context": {"en": "Context", "fr": "Contexte"}, - "previousResults": {"en": "Previous Results", "fr": "Résultats précédents"}, - "improvements": {"en": "Improvements", "fr": "Améliorations"}, - "workflowSummary": {"en": "Workflow Summary", "fr": "Résumé du workflow"}, - "messageHistory": {"en": "Message History", "fr": "Historique des messages"}, - "timestamp": {"en": "Timestamp", "fr": "Horodatage"}, - "handoverType": {"en": "Handover Type", "fr": "Type de transfert"}, - }, -) - - -class TaskContext(BaseModel): - taskStep: TaskStep - workflow: Optional[ChatWorkflow] = None - workflowId: Optional[str] = None - availableDocuments: Optional[str] = "No documents available" - availableConnections: Optional[list[str]] = Field(default_factory=list) - previousResults: Optional[list[str]] = Field(default_factory=list) - previousHandover: Optional[TaskHandover] = None - improvements: Optional[list[str]] = Field(default_factory=list) - retryCount: Optional[int] = 0 - previousActionResults: Optional[list] = Field(default_factory=list) - previousReviewResult: Optional[dict] = None - isRegeneration: Optional[bool] = False - failurePatterns: Optional[list[str]] = Field(default_factory=list) - failedActions: Optional[list] = Field(default_factory=list) - successfulActions: Optional[list] = Field(default_factory=list) - executedActions: Optional[list] = Field(default_factory=list, description="List of executed actions with action name, parameters, and step number") - criteriaProgress: Optional[dict] = None - - # Stage 2 context fields (NEW) - actionObjective: Optional[str] = Field(None, description="Objective for current action") - parametersContext: Optional[str] = Field(None, description="Context for parameter generation") - learnings: Optional[list[str]] = Field(default_factory=list, description="Learnings from previous actions") - stage1Selection: Optional[dict] = Field(None, description="Stage 1 selection data") - nextActionGuidance: Optional[Dict[str, Any]] = Field(None, description="Guidance for the next action from previous refinement") - - def updateFromSelection(self, selection: Any): - """Update context from Stage 1 selection - - Args: - selection: ActionDefinition instance from Stage 1 - """ - from modules.datamodels.datamodelWorkflow import ActionDefinition - - if isinstance(selection, ActionDefinition): - self.actionObjective = selection.actionObjective - self.parametersContext = selection.parametersContext - self.learnings = selection.learnings if selection.learnings else [] - self.stage1Selection = selection.model_dump() - - def getDocumentReferences(self) -> List[str]: - docs = [] - if self.previousHandover: - for doc_exchange in self.previousHandover.inputDocuments: - docs.extend(doc_exchange.documents) - return list(set(docs)) - - def addImprovement(self, improvement: str) -> None: - if improvement not in (self.improvements or []): - if self.improvements is None: - self.improvements = [] - self.improvements.append(improvement) - - -class ReviewContext(BaseModel): - taskStep: TaskStep - taskActions: Optional[list] = Field(default_factory=list) - actionResults: Optional[list] = Field(default_factory=list) - stepResult: Optional[dict] = Field(default_factory=dict) - workflowId: Optional[str] = None - previousResults: Optional[list[str]] = Field(default_factory=list) - - -class ReviewResult(BaseModel): - status: str - reason: Optional[str] = None - improvements: Optional[list[str]] = Field(default_factory=list) - qualityScore: Optional[float] = Field(default=5.0, description="Quality score (0-10)") - missingOutputs: Optional[list[str]] = Field(default_factory=list) - metCriteria: Optional[list[str]] = Field(default_factory=list) - unmetCriteria: Optional[list[str]] = Field(default_factory=list) - confidence: Optional[float] = 0.5 - userMessage: Optional[str] = Field( - None, description="User-friendly message in user's language" - ) - # NEW: Concrete next action guidance (when status is "continue") - nextAction: Optional[str] = Field( - None, description="Specific action to execute next (e.g., 'ai.convert', 'ai.process', 'ai.reformat')" - ) - nextActionParameters: Optional[Dict[str, Any]] = Field( - None, description="Parameters for the next action (e.g., {'fromFormat': 'json', 'toFormat': 'csv'})" - ) - nextActionObjective: Optional[str] = Field( - None, description="What this specific action will achieve" - ) - - -registerModelLabels( - "ReviewResult", - {"en": "Review Result", "fr": "Résultat de l'évaluation"}, - { - "status": {"en": "Status", "fr": "Statut"}, - "reason": {"en": "Reason", "fr": "Raison"}, - "improvements": {"en": "Improvements", "fr": "Améliorations"}, - "qualityScore": {"en": "Quality Score", "fr": "Score de qualité"}, - "missingOutputs": {"en": "Missing Outputs", "fr": "Sorties manquantes"}, - "metCriteria": {"en": "Met Criteria", "fr": "Critères respectés"}, - "unmetCriteria": {"en": "Unmet Criteria", "fr": "Critères non respectés"}, - "confidence": {"en": "Confidence", "fr": "Confiance"}, - "userMessage": {"en": "User Message", "fr": "Message utilisateur"}, - }, -) - - -class TaskPlan(BaseModel): - overview: str - tasks: list[TaskStep] - userMessage: Optional[str] = Field( - None, description="Overall user-friendly message for the task plan" - ) - - -registerModelLabels( - "TaskPlan", - {"en": "Task Plan", "fr": "Plan de tâches"}, - { - "overview": {"en": "Overview", "fr": "Aperçu"}, - "tasks": {"en": "Tasks", "fr": "Tâches"}, - "userMessage": {"en": "User Message", "fr": "Message utilisateur"}, - }, -) - -# Forward references resolved automatically since ChatWorkflow is defined above - - -class PromptPlaceholder(BaseModel): - label: str - content: str - summaryAllowed: bool = Field( - default=False, - description="Whether host may summarize content before sending to AI", - ) - - -registerModelLabels( - "PromptPlaceholder", - {"en": "Prompt Placeholder", "fr": "Espace réservé d'invite"}, - { - "label": {"en": "Label", "fr": "Libellé"}, - "content": {"en": "Content", "fr": "Contenu"}, - "summaryAllowed": {"en": "Summary Allowed", "fr": "Résumé autorisé"}, - }, -) - - -class PromptBundle(BaseModel): - prompt: str - placeholders: List[PromptPlaceholder] = Field(default_factory=list) - - -registerModelLabels( - "PromptBundle", - {"en": "Prompt Bundle", "fr": "Lot d'invite"}, - { - "prompt": {"en": "Prompt", "fr": "Invite"}, - "placeholders": {"en": "Placeholders", "fr": "Espaces réservés"}, - }, -) diff --git a/modules/features/chatbot/eventManager.py b/modules/features/chatbot/eventManager.py deleted file mode 100644 index 8780ef4f..00000000 --- a/modules/features/chatbot/eventManager.py +++ /dev/null @@ -1,243 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -Generic streaming event manager for real-time updates. -Manages event queues for SSE streaming across all features (chatbot, workflows, documents, etc.). -Supports event-driven streaming instead of polling. -""" - -import logging -import asyncio -from typing import Dict, Optional, Any, List, AsyncIterator, Set -from datetime import datetime - -logger = logging.getLogger(__name__) - - -class StreamingEventManager: - """ - Generic event manager for real-time streaming across all features. - Supports multiple event types and contexts (workflows, documents, tasks, etc.). - Thread-safe event emission and queue management. - """ - - def __init__(self): - """Initialize the event manager.""" - self._queues: Dict[str, asyncio.Queue] = {} - self._locks: Dict[str, asyncio.Lock] = {} - self._cleanup_tasks: Dict[str, asyncio.Task] = {} - self._subscribers: Dict[str, Set[str]] = {} # context_id -> set of queue_ids (for future multi-subscriber support) - - def create_queue(self, context_id: str) -> asyncio.Queue: - """ - Create a new event queue for a context. - - Args: - context_id: Context ID (workflow_id, document_id, task_id, etc.) - - Returns: - Event queue for the context - """ - if context_id not in self._queues: - self._queues[context_id] = asyncio.Queue() - self._locks[context_id] = asyncio.Lock() - self._subscribers[context_id] = set() - logger.debug(f"Created event queue for context {context_id}") - return self._queues[context_id] - - def get_queue(self, context_id: str) -> Optional[asyncio.Queue]: - """ - Get existing event queue for a context. - - Args: - context_id: Context ID - - Returns: - Event queue if exists, None otherwise - """ - return self._queues.get(context_id) - - async def emit_event( - self, - context_id: str, - event_type: str, - data: Dict[str, Any], - event_category: str = "default", - message: Optional[str] = None, - step: Optional[str] = None - ): - """ - Emit an event to the context's event queue. - - Args: - context_id: Context ID (workflow_id, document_id, etc.) - event_type: Type of event ("message", "log", "status", "progress", "complete", "error", "chatdata") - data: Event data dictionary (will be included in event) - event_category: Category of event for filtering ("chat", "workflow", "document", etc.) - message: Optional event message (for backward compatibility) - step: Optional processing step (for backward compatibility) - """ - queue = self.get_queue(context_id) - if not queue: - logger.debug(f"No event queue found for context {context_id}, skipping event") - return - - event = { - "type": event_type, - "category": event_category, - "timestamp": datetime.now().timestamp(), - "data": data, - "message": message, # For backward compatibility - "step": step # For backward compatibility - } - - try: - await queue.put(event) - logger.debug(f"Emitted {event_type} event (category: {event_category}) for context {context_id}") - except Exception as e: - logger.error(f"Error emitting event for context {context_id}: {e}") - - async def stream_events( - self, - context_id: str, - event_categories: Optional[List[str]] = None, - timeout: Optional[float] = None - ) -> AsyncIterator[Dict[str, Any]]: - """ - Async generator for streaming events from a context. - - Args: - context_id: Context ID to stream events from - event_categories: Optional list of event categories to filter by - timeout: Optional timeout in seconds (None = no timeout, default: 300s for long-running streams) - - Yields: - Event dictionaries - """ - queue = self.get_queue(context_id) - if not queue: - logger.warning(f"No queue found for context {context_id}") - return - - # Default timeout of 5 minutes for long-running streams if not specified - effective_timeout = timeout if timeout is not None else 300.0 - start_time = asyncio.get_event_loop().time() - last_event_time = start_time - heartbeat_interval = 30.0 # Send heartbeat every 30 seconds to keep connection alive - - while True: - # Check timeout - elapsed = asyncio.get_event_loop().time() - start_time - if elapsed > effective_timeout: - logger.debug(f"Stream timeout for context {context_id} after {effective_timeout}s") - break - - try: - # Wait for event with longer timeout to avoid premature closure - wait_timeout = heartbeat_interval # Check every 30 seconds - if effective_timeout: - remaining = effective_timeout - elapsed - if remaining <= 0: - break - wait_timeout = min(wait_timeout, remaining) - - event = await asyncio.wait_for(queue.get(), timeout=wait_timeout) - last_event_time = asyncio.get_event_loop().time() - - # Filter by category if specified - if event_categories and event.get("category") not in event_categories: - continue - - yield event - - except asyncio.TimeoutError: - # Send heartbeat to keep connection alive if no events - time_since_last_event = asyncio.get_event_loop().time() - last_event_time - if time_since_last_event >= heartbeat_interval: - # Send heartbeat event to keep stream alive - heartbeat_event = { - "type": "heartbeat", - "category": "system", - "timestamp": datetime.now().timestamp(), - "data": {"status": "alive"}, - "message": None, - "step": None - } - yield heartbeat_event - last_event_time = asyncio.get_event_loop().time() - - # Check if we should continue or timeout - elapsed = asyncio.get_event_loop().time() - start_time - if elapsed >= effective_timeout: - break - continue - except Exception as e: - logger.error(f"Error in stream_events for context {context_id}: {e}") - break - - async def cleanup(self, context_id: str, delay: float = 60.0): - """ - Schedule cleanup of event queue after delay. - This allows time for any remaining events to be consumed. - - Args: - context_id: Context ID - delay: Delay in seconds before cleanup (default: 60 seconds) - """ - if context_id in self._cleanup_tasks: - # Cancel existing cleanup task - self._cleanup_tasks[context_id].cancel() - - async def _cleanup(): - try: - await asyncio.sleep(delay) - if context_id in self._queues: - # Drain remaining events - queue = self._queues[context_id] - while not queue.empty(): - try: - queue.get_nowait() - except asyncio.QueueEmpty: - break - - del self._queues[context_id] - if context_id in self._locks: - del self._locks[context_id] - if context_id in self._subscribers: - del self._subscribers[context_id] - logger.info(f"Cleaned up event queue for context {context_id}") - except asyncio.CancelledError: - pass - except Exception as e: - logger.error(f"Error during cleanup for context {context_id}: {e}") - finally: - if context_id in self._cleanup_tasks: - del self._cleanup_tasks[context_id] - - self._cleanup_tasks[context_id] = asyncio.create_task(_cleanup()) - - def has_queue(self, context_id: str) -> bool: - """ - Check if a queue exists for a context. - - Args: - context_id: Context ID - - Returns: - True if queue exists, False otherwise - """ - return context_id in self._queues - - -# Backward compatibility: ChatbotEventManager is an alias -ChatbotEventManager = StreamingEventManager - -# Global singleton instance -_event_manager = StreamingEventManager() - - -def get_event_manager() -> StreamingEventManager: - """Get the global event manager instance.""" - return _event_manager - - diff --git a/modules/features/chatbot/langgraphChatbot.py b/modules/features/chatbot/langgraphChatbot.py deleted file mode 100644 index 09154a1b..00000000 --- a/modules/features/chatbot/langgraphChatbot.py +++ /dev/null @@ -1,345 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -LangGraph-based chatbot implementation. -Uses LangGraph workflow with AI Center integration and connector tools. -""" - -import logging -from dataclasses import dataclass -from typing import Annotated, AsyncIterator, Any, Optional, List -from pydantic import BaseModel - -from langchain_core.messages import ( - BaseMessage, - HumanMessage, - SystemMessage, - trim_messages, -) -from langgraph.graph.message import add_messages -from langgraph.graph import StateGraph, START, END -from langgraph.graph.state import CompiledStateGraph -from langgraph.prebuilt import ToolNode -from langgraph.checkpoint.memory import MemorySaver - -from modules.features.chatbot.aiCenterAdapter import AICenterChatModel -from modules.features.chatbot.langgraphTools import ( - send_streaming_message, - create_sql_tool, - create_tavily_tools, -) -from modules.shared.configuration import APP_CONFIG - -logger = logging.getLogger(__name__) - - -class ChatState(BaseModel): - """Represents the state of a chat session.""" - - messages: Annotated[List[BaseMessage], add_messages] - - -@dataclass -class LangGraphChatbot: - """LangGraph-based chatbot with AI Center integration.""" - - model: AICenterChatModel - memory: Any - app: Optional[CompiledStateGraph] = None - system_prompt: str = "You are a helpful assistant." - - @classmethod - async def create( - cls, - services, - system_prompt: str, - connector_instance, - enable_web_research: bool = True, - tavily_api_key: Optional[str] = None, - context_window_size: int = 8000, - ) -> "LangGraphChatbot": - """ - Factory method to create and configure a LangGraphChatbot instance. - - Args: - services: Services instance with AI access - system_prompt: The system prompt to initialize the chatbot - connector_instance: Database connector instance (PreprocessorConnector) - enable_web_research: Whether to enable web research tools - tavily_api_key: Tavily API key for web research (if None, uses APP_CONFIG) - context_window_size: Maximum context window size in tokens - - Returns: - A configured LangGraphChatbot instance - """ - # Get Tavily API key from config if not provided - if tavily_api_key is None: - tavily_api_key = APP_CONFIG.get("Connector_AiTavily_API_SECRET") - - # Create AI Center chat model adapter - model = AICenterChatModel( - services=services, - system_prompt=system_prompt, - temperature=0.2 - ) - - # Create memory/checkpointer - memory = MemorySaver() - - instance = LangGraphChatbot( - model=model, - memory=memory, - system_prompt=system_prompt, - ) - - # Configure tools - configured_tools = await instance._configure_tools( - connector_instance, - enable_web_research, - tavily_api_key - ) - - # Build LangGraph app - instance.app = instance._build_app(memory, configured_tools, context_window_size) - - return instance - - async def _configure_tools( - self, - connector_instance, - enable_web_research: bool, - tavily_api_key: Optional[str] - ) -> List: - """ - Configure tools for the chatbot. - - Args: - connector_instance: Database connector instance - enable_web_research: Whether web research is enabled - tavily_api_key: Tavily API key - - Returns: - List of configured tools - """ - tools = [] - - # SQL tool using connector - sql_tool = create_sql_tool(connector_instance) - tools.append(sql_tool) - - # Streaming message tool - tools.append(send_streaming_message) - - # Tavily tools (if enabled) - if enable_web_research: - tavily_tools = create_tavily_tools(tavily_api_key, enable_web_research) - tools.extend(tavily_tools) - - logger.info(f"Configured {len(tools)} tools for LangGraph chatbot") - return tools - - def _build_app( - self, - memory: Any, - tools: List, - context_window_size: int - ) -> CompiledStateGraph[ChatState, None, ChatState, ChatState]: - """ - Builds the chatbot application workflow using LangGraph. - - Args: - memory: The chat memory/checkpointer to use - tools: The list of tools the chatbot can use - context_window_size: Maximum context window size - - Returns: - A compiled state graph representing the chatbot application - """ - # Bind tools to model - llm_with_tools = self.model.bind_tools(tools=tools) - - def select_window(msgs: List[BaseMessage]) -> List[BaseMessage]: - """Selects a window of messages that fit within the context window size. - - Args: - msgs: The list of messages to select from. - - Returns: - A list of messages that fit within the context window size. - """ - def approx_counter(items: List[BaseMessage]) -> int: - """Approximate token counter for messages. - - Args: - items: List of messages to count tokens for. - - Returns: - Approximate number of tokens in the messages. - """ - return sum(len(getattr(m, "content", "") or "") for m in items) - - return trim_messages( - msgs, - strategy="last", - token_counter=approx_counter, - max_tokens=context_window_size, - start_on="human", - end_on=("human", "tool"), - include_system=True, - ) - - def agent_node(state: ChatState) -> dict: - """Agent node for the chatbot workflow. - - Args: - state: The current chat state. - - Returns: - The updated chat state after processing. - """ - # Select the message window to fit in context (trim if needed) - window = select_window(state.messages) - - # Ensure the system prompt is present at the start - if not window or not isinstance(window[0], SystemMessage): - window = [SystemMessage(content=self.system_prompt)] + window - - # Call the LLM with tools - response = llm_with_tools.invoke(window) - - # Return the new state - return {"messages": [response]} - - def should_continue(state: ChatState) -> str: - """Determines whether to continue the workflow or end it. - - This conditional edge is called after the agent node to decide - whether to continue to the tools node (if the last message contains - tool calls) or to end the workflow (if no tool calls are present). - - Args: - state: The current chat state. - - Returns: - The next node to transition to ("tools" or END). - """ - # Get the last message - last_message = state.messages[-1] - - # Check if the last message contains tool calls - # If so, continue to the tools node; otherwise, end the workflow - return "tools" if getattr(last_message, "tool_calls", None) else END - - # Compose the workflow - workflow = StateGraph(ChatState) - workflow.add_node("agent", agent_node) - workflow.add_node("tools", ToolNode(tools=tools)) - workflow.add_edge(START, "agent") - workflow.add_conditional_edges("agent", should_continue) - workflow.add_edge("tools", "agent") - - return workflow.compile(checkpointer=memory) - - async def chat(self, message: str, chat_id: str = "default") -> List[BaseMessage]: - """ - Process a chat message by calling the LLM and tools and returns the chat history. - - Args: - message: The user message to process - chat_id: The chat thread ID - - Returns: - The list of messages in the chat history - """ - if not self.app: - raise RuntimeError("Chatbot app not initialized. Call create() first.") - - # Set the right thread ID for memory - config = {"configurable": {"thread_id": chat_id}} - - # Single-turn chat (non-streaming) - result = await self.app.ainvoke( - {"messages": [HumanMessage(content=message)]}, config=config - ) - - # Extract and return the messages from the result - return result["messages"] - - async def stream_events( - self, *, message: str, chat_id: str = "default" - ) -> AsyncIterator[dict]: - """ - Stream UI-focused events using astream_events v2. - - Args: - message: The user message to process - chat_id: Logical thread identifier; forwarded in the runnable config so - memory and tools are scoped per thread - - Yields: - dict: One of: - - ``{"type": "status", "label": str}`` for short progress updates. - - ``{"type": "final", "response": {"thread": str, "chat_history": list[dict]}}`` - where ``chat_history`` only includes ``user``/``assistant`` roles. - - ``{"type": "error", "message": str}`` if an exception occurs. - """ - if not self.app: - raise RuntimeError("Chatbot app not initialized. Call create() first.") - - # Thread-aware config for LangGraph/LangChain - config = {"configurable": {"thread_id": chat_id}} - - def _is_root(ev: dict) -> bool: - """Return True if the event is from the root run (v2: empty parent_ids).""" - return not ev.get("parent_ids") - - try: - async for event in self.app.astream_events( - {"messages": [HumanMessage(content=message)]}, - config=config, - version="v2", - ): - etype = event.get("event") - ename = event.get("name") or "" - edata = event.get("data") or {} - - # Stream human-readable progress via the special send_streaming_message tool - if etype == "on_tool_start" and ename == "send_streaming_message": - tool_in = edata.get("input") or {} - msg = tool_in.get("message") - if isinstance(msg, str) and msg.strip(): - yield {"type": "status", "label": msg.strip()} - continue - - # Emit the final payload when the root run finishes - if etype == "on_chain_end" and _is_root(event): - output_obj = edata.get("output") - - # Extract message list from the graph's final output - final_msgs = output_obj.get("messages", []) if isinstance(output_obj, dict) else [] - - # Normalize for the frontend (only user/assistant with text content) - chat_history_payload: List[dict] = [] - for m in final_msgs: - if isinstance(m, BaseMessage): - role = "user" if isinstance(m, HumanMessage) else "assistant" if isinstance(m, BaseMessage) else None - content = getattr(m, "content", "") - if role and content: - chat_history_payload.append({ - "role": role, - "content": content - }) - - yield { - "type": "final", - "response": { - "thread": chat_id, - "chat_history": chat_history_payload, - }, - } - return - - except Exception as exc: - # Emit a single error envelope and end the stream - logger.error(f"Exception in stream_events: {exc}", exc_info=True) - yield {"type": "error", "message": f"Fehler beim Verarbeiten: {exc}"} diff --git a/modules/features/chatbot/langgraphTools.py b/modules/features/chatbot/langgraphTools.py deleted file mode 100644 index 29773981..00000000 --- a/modules/features/chatbot/langgraphTools.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -LangGraph-compatible tools for chatbot. -Wraps connectors and external services as LangGraph tools. -""" - -import logging -import json -from typing import Optional - -from langchain_core.tools import tool - -logger = logging.getLogger(__name__) - - -@tool -def send_streaming_message(message: str) -> str: - """Send a streaming message to the user to provide updates during processing. - - Use this tool to send short status updates to the user while you are working - on their request. This helps keep the user informed about what you are doing. - - Args: - message: A short German message describing what you are currently doing. - Examples: "Durchsuche Datenbank nach Lampen, LED, Leuchten, und Ähnlichem." - "Suche im Internet nach Produktinformationen." - "Analysiere Suchergebnisse." - - Returns: - A confirmation that the message was sent. - """ - # This tool doesn't actually do anything - it's just for the AI to signal - # what it's doing to the frontend via the tool call mechanism - return f"Status-Update gesendet: {message}" - - -def create_sql_tool(connector_instance): - """ - Create a LangGraph-compatible SQL tool using a connector instance. - - Args: - connector_instance: PreprocessorConnector or similar connector instance - - Returns: - LangChain tool for SQL queries - """ - # Store connector in closure - connector = connector_instance - - @tool - async def execute_sql_query(query: str) -> str: - """Execute a SQL SELECT query on the database. - - This tool allows you to query the database to find articles, prices, - inventory levels, and other information. - - Args: - query: A valid SQL SELECT query. Only SELECT queries are allowed. - Use double quotes for column names with spaces or special characters. - Example: SELECT "Artikelnummer", "Artikelbezeichnung" FROM Artikel - WHERE "Artikelbezeichnung" LIKE '%Lampe%' LIMIT 20 - - Returns: - Query results as formatted string with data rows - """ - try: - logger.info(f"Executing SQL query via connector: {query[:100]}...") - - # Ensure connector is initialized - if connector is None: - return "Error: Database connector not initialized" - - # Execute query - result = await connector.executeQuery(query, return_json=True) - - if isinstance(result, dict): - # Return formatted text result - text_result = result.get("text", "Query executed successfully but returned no results.") - # Also include data count if available - data = result.get("data", []) - if data: - text_result += f"\n\nFound {len(data)} row(s)." - return text_result - else: - # Return string result directly - return str(result) - - except Exception as e: - error_msg = f"Error executing SQL query: {str(e)}" - logger.error(error_msg, exc_info=True) - return error_msg - - # Set tool metadata for better AI understanding - execute_sql_query.name = "execute_sql_query" - execute_sql_query.description = """Execute a SQL SELECT query on the database. - - Use this tool to search for articles, check prices, inventory levels, suppliers, etc. - Only SELECT queries are allowed. Use double quotes for column names with spaces. - - Database tables: Artikel, Einkaufspreis_neu, Lagerplatz_Artikel, Lagerplatz - - Example queries: - - SELECT "Artikelnummer", "Artikelbezeichnung" FROM Artikel WHERE "Artikelbezeichnung" LIKE '%Lampe%' - - SELECT a."Artikelnummer", e."EP_CHF" FROM Artikel a LEFT JOIN Einkaufspreis_neu e ON a."I_ID" = e."ARTIKEL" - - SELECT a."Artikelnummer", l."S_IST_BESTAND" FROM Artikel a LEFT JOIN Lagerplatz_Artikel l ON a."I_ID" = l."R_ARTIKEL" - """ - - return execute_sql_query - - -def create_tavily_tools(tavily_api_key: Optional[str] = None, enable_web_research: bool = True): - """ - Create Tavily search tools for web research. - - Args: - tavily_api_key: Tavily API key (if None, tools will return error messages) - enable_web_research: Whether web research is enabled - - Returns: - List of Tavily tools (search and extract) - """ - tools = [] - - if not enable_web_research or not tavily_api_key: - # Return dummy tools that explain web research is disabled - @tool - def tavily_search_disabled(query: str) -> str: - """Web research is disabled for this chatbot instance.""" - return "Web research is not enabled for this chatbot instance." - - @tool - def tavily_extract_disabled(urls: str) -> str: - """Web research is disabled for this chatbot instance.""" - return "Web research is not enabled for this chatbot instance." - - return [tavily_search_disabled, tavily_extract_disabled] - - try: - from langchain_tavily import TavilySearchResults, TavilyExtract - - # Create Tavily search tool - tavily_search = TavilySearchResults( - tavily_api_key=tavily_api_key, - max_results=5 - ) - - # Create Tavily extract tool - tavily_extract = TavilyExtract(tavily_api_key=tavily_api_key) - - return [tavily_search, tavily_extract] - - except ImportError: - logger.warning("langchain_tavily not available, creating dummy tools") - - @tool - def tavily_search_fallback(query: str) -> str: - """Tavily search tool (not available - langchain_tavily not installed).""" - return "Tavily search is not available. Please install langchain_tavily package." - - @tool - def tavily_extract_fallback(urls: str) -> str: - """Tavily extract tool (not available - langchain_tavily not installed).""" - return "Tavily extract is not available. Please install langchain_tavily package." - - return [tavily_search_fallback, tavily_extract_fallback] diff --git a/modules/features/chatbot/mainChatbot.py b/modules/features/chatbot/mainChatbot.py deleted file mode 100644 index b539ea9e..00000000 --- a/modules/features/chatbot/mainChatbot.py +++ /dev/null @@ -1,2722 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -Simple chatbot feature - basic implementation. -User input is processed by AI to create list of needed queries. -Those queries get streamed back. - -This module also handles feature initialization and RBAC catalog registration. -""" - -import logging -from typing import Dict, List, Any - -# Feature metadata for RBAC catalog -FEATURE_CODE = "chatbot" -FEATURE_LABEL = {"en": "Chatbot", "de": "Chatbot", "fr": "Chatbot"} -FEATURE_ICON = "mdi-robot" - -# UI Objects for RBAC catalog -UI_OBJECTS = [ - { - "objectKey": "ui.feature.chatbot.conversations", - "label": {"en": "Conversations", "de": "Konversationen", "fr": "Conversations"}, - "meta": {"area": "conversations"} - }, - { - "objectKey": "ui.feature.chatbot.settings", - "label": {"en": "Settings", "de": "Einstellungen", "fr": "Paramètres"}, - "meta": {"area": "settings"} - }, -] - -# Resource Objects for RBAC catalog -RESOURCE_OBJECTS = [ - { - "objectKey": "resource.feature.chatbot.start", - "label": {"en": "Start Chatbot", "de": "Chatbot starten", "fr": "Démarrer chatbot"}, - "meta": {"endpoint": "/api/chatbot/{instanceId}/start/stream", "method": "POST"} - }, - { - "objectKey": "resource.feature.chatbot.stop", - "label": {"en": "Stop Chatbot", "de": "Chatbot stoppen", "fr": "Arrêter chatbot"}, - "meta": {"endpoint": "/api/chatbot/{instanceId}/stop/{workflowId}", "method": "POST"} - }, -] - -# DATA Objects for RBAC catalog (tables/entities) -# Used for AccessRules on data-level permissions -DATA_OBJECTS = [ - { - "objectKey": "data.feature.chatbot.ChatWorkflow", - "label": {"en": "Chat Workflow", "de": "Chat-Workflow", "fr": "Workflow de chat"}, - "meta": {"table": "ChatWorkflow", "fields": ["id", "name", "status", "mandateId", "featureInstanceId"]} - }, - { - "objectKey": "data.feature.chatbot.ChatMessage", - "label": {"en": "Chat Message", "de": "Chat-Nachricht", "fr": "Message de chat"}, - "meta": {"table": "ChatMessage", "fields": ["id", "workflowId", "message", "role", "publishedAt"]} - }, - { - "objectKey": "data.feature.chatbot.ChatLog", - "label": {"en": "Chat Log", "de": "Chat-Log", "fr": "Journal de chat"}, - "meta": {"table": "ChatLog", "fields": ["id", "workflowId", "message", "type", "timestamp"]} - }, - { - "objectKey": "data.feature.chatbot.ChatDocument", - "label": {"en": "Chat Document", "de": "Chat-Dokument", "fr": "Document de chat"}, - "meta": {"table": "ChatDocument", "fields": ["id", "messageId", "fileId", "fileName", "fileSize", "mimeType"]} - }, - { - "objectKey": "data.feature.chatbot.ChatStat", - "label": {"en": "Chat Statistics", "de": "Chat-Statistiken", "fr": "Statistiques de chat"}, - "meta": {"table": "ChatStat", "fields": ["id", "workflowId", "processingTime", "bytesSent", "bytesReceived", "errorCount"]} - }, - { - "objectKey": "data.feature.chatbot.*", - "label": {"en": "All Chatbot Data", "de": "Alle Chatbot-Daten", "fr": "Toutes les données chatbot"}, - "meta": {"wildcard": True, "description": "Wildcard for all chatbot data tables"} - }, -] - -# Template roles for this feature -TEMPLATE_ROLES = [ - { - "roleLabel": "chatbot-admin", - "description": { - "en": "Chatbot Administrator - Full access to chatbot settings and all conversations", - "de": "Chatbot-Administrator - Vollzugriff auf Chatbot-Einstellungen und alle Konversationen", - "fr": "Administrateur chatbot - Accès complet aux paramètres et conversations" - }, - "accessRules": [ - # Full UI access - {"context": "UI", "item": None, "view": True}, - # Full DATA access - {"context": "DATA", "item": None, "view": True, "read": "a", "create": "a", "update": "a", "delete": "a"}, - # Resource access - {"context": "RESOURCE", "item": "resource.feature.chatbot.start", "view": True}, - ] - }, - { - "roleLabel": "chatbot-user", - "description": { - "en": "Chatbot User - Use chatbot and view own conversations", - "de": "Chatbot-Benutzer - Chatbot nutzen und eigene Konversationen einsehen", - "fr": "Utilisateur chatbot - Utiliser le chatbot et consulter ses conversations" - }, - "accessRules": [ - # UI access to conversations - vollqualifizierte ObjectKeys - {"context": "UI", "item": "ui.feature.chatbot.conversations", "view": True}, - # Own DATA access (my level) - {"context": "DATA", "item": None, "view": True, "read": "m", "create": "m", "update": "m", "delete": "m"}, - # Resource access - {"context": "RESOURCE", "item": "resource.feature.chatbot.start", "view": True}, - ] - }, -] - - -def getFeatureDefinition(): - """Return the feature definition for registration.""" - return { - "code": FEATURE_CODE, - "label": FEATURE_LABEL, - "icon": FEATURE_ICON - } - - -def getUiObjects(): - """Return UI objects for RBAC catalog registration.""" - return UI_OBJECTS - - -def getResourceObjects(): - """Return resource objects for RBAC catalog registration.""" - return RESOURCE_OBJECTS - - -def getTemplateRoles(): - """Return template roles for this feature.""" - return TEMPLATE_ROLES - - -def getDataObjects(): - """Return DATA objects for RBAC catalog registration.""" - return DATA_OBJECTS - - -def registerFeature(catalogService) -> bool: - """ - Register this feature's RBAC objects in the catalog. - - Args: - catalogService: The RBAC catalog service instance - - Returns: - True if registration was successful - """ - try: - # Register UI objects - for uiObj in UI_OBJECTS: - catalogService.registerUiObject( - featureCode=FEATURE_CODE, - objectKey=uiObj["objectKey"], - label=uiObj["label"], - meta=uiObj.get("meta") - ) - - # Register Resource objects - for resObj in RESOURCE_OBJECTS: - catalogService.registerResourceObject( - featureCode=FEATURE_CODE, - objectKey=resObj["objectKey"], - label=resObj["label"], - meta=resObj.get("meta") - ) - - # Register DATA objects (tables/entities) - for dataObj in DATA_OBJECTS: - catalogService.registerDataObject( - featureCode=FEATURE_CODE, - objectKey=dataObj["objectKey"], - label=dataObj["label"], - meta=dataObj.get("meta") - ) - - # Sync template roles to database (with AccessRules) - _syncTemplateRolesToDb() - - logger.info(f"Feature '{FEATURE_CODE}' registered {len(UI_OBJECTS)} UI, {len(RESOURCE_OBJECTS)} resource, {len(DATA_OBJECTS)} data objects") - return True - - except Exception as e: - logger.error(f"Failed to register feature '{FEATURE_CODE}': {e}") - return False - - -def _syncTemplateRolesToDb() -> int: - """ - Sync template roles and their AccessRules to the database. - Creates global template roles (mandateId=None) if they don't exist. - - Returns: - Number of roles created/updated - """ - try: - from modules.interfaces.interfaceDbApp import getRootInterface - from modules.datamodels.datamodelRbac import Role, AccessRule, AccessRuleContext - - rootInterface = getRootInterface() - db = rootInterface.db - - # Get existing template roles for this feature - existingRoles = db.getRecordset( - Role, - recordFilter={"featureCode": FEATURE_CODE, "mandateId": None} - ) - existingRoleLabels = {r.get("roleLabel"): r.get("id") for r in existingRoles} - - createdCount = 0 - for roleTemplate in TEMPLATE_ROLES: - roleLabel = roleTemplate["roleLabel"] - - if roleLabel in existingRoleLabels: - roleId = existingRoleLabels[roleLabel] - logger.debug(f"Template role '{roleLabel}' already exists with ID {roleId}") - - # Ensure AccessRules exist for this role - _ensureAccessRulesForRole(db, roleId, roleTemplate.get("accessRules", [])) - else: - # Create new template role - newRole = Role( - roleLabel=roleLabel, - description=roleTemplate.get("description", {}), - featureCode=FEATURE_CODE, - mandateId=None, # Global template - featureInstanceId=None, - isSystemRole=False - ) - createdRole = db.recordCreate(Role, newRole.model_dump()) - roleId = createdRole.get("id") - - # Create AccessRules for this role - _ensureAccessRulesForRole(db, roleId, roleTemplate.get("accessRules", [])) - - logger.info(f"Created template role '{roleLabel}' with ID {roleId}") - createdCount += 1 - - if createdCount > 0: - logger.info(f"Feature '{FEATURE_CODE}': Created {createdCount} template roles") - - # Repair instance-specific roles that are missing AccessRules - _repairInstanceRolesAccessRules(db, existingRoleLabels) - - return createdCount - - except Exception as e: - logger.error(f"Error syncing template roles for feature '{FEATURE_CODE}': {e}") - return 0 - - -def _repairInstanceRolesAccessRules(db, templateRoleLabels: Dict[str, str]) -> int: - """ - Repair instance-specific roles by copying AccessRules from their template roles. - This ensures instance roles created before AccessRules were defined get updated. - - Args: - db: Database connector - templateRoleLabels: Dict mapping roleLabel to template role ID - - Returns: - Number of instance roles repaired - """ - from modules.datamodels.datamodelRbac import Role, AccessRule, AccessRuleContext - - repairedCount = 0 - - # Get all instance-specific roles for this feature (mandateId is NOT None) - allRoles = db.getRecordset(Role, recordFilter={"featureCode": FEATURE_CODE}) - instanceRoles = [r for r in allRoles if r.get("mandateId") is not None] - - for instanceRole in instanceRoles: - roleLabel = instanceRole.get("roleLabel") - instanceRoleId = instanceRole.get("id") - - # Find matching template role - templateRoleId = templateRoleLabels.get(roleLabel) - if not templateRoleId: - continue - - # Check if instance role has AccessRules - existingRules = db.getRecordset(AccessRule, recordFilter={"roleId": instanceRoleId}) - if existingRules: - continue # Already has rules, skip - - # Copy AccessRules from template role - templateRules = db.getRecordset(AccessRule, recordFilter={"roleId": templateRoleId}) - if not templateRules: - continue # Template has no rules - - for rule in templateRules: - newRule = AccessRule( - roleId=instanceRoleId, - context=rule.get("context"), - item=rule.get("item"), - view=rule.get("view", False), - read=rule.get("read"), - create=rule.get("create"), - update=rule.get("update"), - delete=rule.get("delete"), - ) - db.recordCreate(AccessRule, newRule.model_dump()) - - logger.info(f"Repaired instance role '{roleLabel}' (ID: {instanceRoleId}): copied {len(templateRules)} AccessRules from template") - repairedCount += 1 - - if repairedCount > 0: - logger.info(f"Feature '{FEATURE_CODE}': Repaired {repairedCount} instance roles with missing AccessRules") - - return repairedCount - - -def _ensureAccessRulesForRole(db, roleId: str, ruleTemplates: List[Dict[str, Any]]) -> int: - """ - Ensure AccessRules exist for a role based on templates. - - Args: - db: Database connector - roleId: Role ID - ruleTemplates: List of rule templates - - Returns: - Number of rules created - """ - from modules.datamodels.datamodelRbac import AccessRule, AccessRuleContext - - # Get existing rules for this role - existingRules = db.getRecordset(AccessRule, recordFilter={"roleId": roleId}) - - # Create a set of existing rule signatures to avoid duplicates - existingSignatures = set() - for rule in existingRules: - sig = (rule.get("context"), rule.get("item")) - existingSignatures.add(sig) - - createdCount = 0 - for template in ruleTemplates: - context = template.get("context", "UI") - item = template.get("item") - sig = (context, item) - - if sig in existingSignatures: - continue - - # Map context string to enum - if context == "UI": - contextEnum = AccessRuleContext.UI - elif context == "DATA": - contextEnum = AccessRuleContext.DATA - elif context == "RESOURCE": - contextEnum = AccessRuleContext.RESOURCE - else: - contextEnum = context - - newRule = AccessRule( - roleId=roleId, - context=contextEnum, - item=item, - view=template.get("view", False), - read=template.get("read"), - create=template.get("create"), - update=template.get("update"), - delete=template.get("delete"), - ) - db.recordCreate(AccessRule, newRule.model_dump()) - createdCount += 1 - - if createdCount > 0: - logger.debug(f"Created {createdCount} AccessRules for role {roleId}") - - return createdCount -import json -import uuid -import asyncio -import re -from typing import Optional, Dict, Any, List - -from modules.features.chatbot.datamodelFeatureChatbot import ChatWorkflow, UserInputRequest, WorkflowModeEnum, ChatLog, ChatDocument -from modules.datamodels.datamodelUam import User -from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, ProcessingModeEnum -from modules.datamodels.datamodelDocref import DocumentReferenceList, DocumentItemReference -from modules.shared.timeUtils import getUtcTimestamp, parseTimestamp -from modules.services import getInterface as getServices -from modules.features.chatbot import interfaceFeatureChatbot -from modules.features.chatbot.eventManager import get_event_manager -from modules.features.chatbot.chatbotUtils import ( - generate_conversation_name, -) -from modules.features.chatbot.chatbotConfig import get_chatbot_config, ChatbotConfig -from modules.features.chatbot.langgraphChatbot import LangGraphChatbot -from langchain_core.messages import HumanMessage -import base64 - -logger = logging.getLogger(__name__) - - -def _extractJsonFromResponse(content: str) -> Optional[dict]: - """Extract JSON from AI response, handling markdown code blocks.""" - # Try direct JSON parse first - try: - return json.loads(content.strip()) - except json.JSONDecodeError: - pass - - # Try to extract JSON from markdown code blocks - json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', content, re.DOTALL) - if json_match: - try: - return json.loads(json_match.group(1)) - except json.JSONDecodeError: - pass - - # Try to find JSON object in the text - json_match = re.search(r'\{.*\}', content, re.DOTALL) - if json_match: - try: - return json.loads(json_match.group(0)) - except json.JSONDecodeError: - pass - - return None - - -async def chatProcess( - currentUser: User, - mandateId: str, - userInput: UserInputRequest, - workflowId: Optional[str] = None, - featureInstanceId: Optional[str] = None -) -> ChatWorkflow: - """ - Simple chatbot processing - analyze user input and generate queries. - - Flow: - 1. Create or load workflow - 2. Store user message - 3. AI analyzes user input to create list of needed queries - 4. Stream queries back - - Args: - currentUser: Current user - mandateId: Mandate context (from RequestContext / X-Mandate-Id header) - userInput: User input request - workflowId: Optional workflow ID to continue existing conversation - featureInstanceId: Optional feature instance ID for instance-level isolation - - Returns: - ChatWorkflow instance - """ - try: - # Load chatbot configuration for this instance - chatbot_config = get_chatbot_config(featureInstanceId) - logger.info(f"Loaded chatbot config for instance {featureInstanceId}: connector={chatbot_config.connector_type}, maxQueries={chatbot_config.max_queries}") - - # Validate that required system prompt is configured - if not chatbot_config.custom_system_prompt: - error_msg = f"Chatbot instance {featureInstanceId} is missing required customSystemPrompt configuration" - logger.error(error_msg) - raise ValueError(error_msg) - - # Get services normally (for other services like chat, ai, etc.) - services = getServices(currentUser, None, mandateId=mandateId) - - # Replace interfaceDbChat with chatbot-specific interface that supports featureInstanceId - # This ensures instance-level data isolation - interfaceDbChat = interfaceFeatureChatbot.getInterface( - currentUser, - mandateId=mandateId, - featureInstanceId=featureInstanceId - ) - - # Update services to use the chatbot-specific interface - services.interfaceDbChat = interfaceDbChat - - # Get event manager and create queue if needed - event_manager = get_event_manager() - - # Create or load workflow - if workflowId: - workflow = interfaceDbChat.getWorkflow(workflowId) - if not workflow: - raise ValueError(f"Workflow {workflowId} not found") - - # Verify workflow belongs to this instance if instanceId is provided - if featureInstanceId and workflow.featureInstanceId != featureInstanceId: - raise ValueError(f"Workflow {workflowId} does not belong to instance '{featureInstanceId}'") - - # Resume workflow: increment round number - new_round = workflow.currentRound + 1 - interfaceDbChat.updateWorkflow(workflowId, { - "status": "running", - "currentRound": new_round, - "lastActivity": getUtcTimestamp() - }) - workflow = interfaceDbChat.getWorkflow(workflowId) - logger.info(f"Resumed workflow {workflowId}, round incremented to {new_round}") - - # Create event queue if it doesn't exist (for streaming) - if not event_manager.has_queue(workflowId): - event_manager.create_queue(workflowId) - else: - # Generate conversation name based on user's prompt - conversation_name = await generate_conversation_name( - services, - userInput.prompt, - userInput.userLanguage - ) - - # Create new workflow - workflowData = { - "id": str(uuid.uuid4()), - "mandateId": mandateId, - "featureInstanceId": featureInstanceId, - "status": "running", - "name": conversation_name, - "currentRound": 1, - "currentTask": 0, - "currentAction": 0, - "totalTasks": 0, - "totalActions": 0, - "workflowMode": WorkflowModeEnum.WORKFLOW_CHATBOT.value, - "startedAt": getUtcTimestamp(), - "lastActivity": getUtcTimestamp() - } - workflow = interfaceDbChat.createWorkflow(workflowData) - logger.info(f"Created new chatbot workflow: {workflow.id} with name: {conversation_name}") - - # Create event queue for new workflow (for streaming) - event_manager.create_queue(workflow.id) - - # Reload workflow to get current message count - workflow_id = workflow.id - workflow = interfaceDbChat.getWorkflow(workflow_id) - if not workflow: - raise ValueError(f"Failed to reload workflow {workflow_id}") - - # Process uploaded files and create ChatDocuments - user_documents = [] - if userInput.listFileId and len(userInput.listFileId) > 0: - logger.info(f"Processing {len(userInput.listFileId)} uploaded file(s) for user message") - for fileId in userInput.listFileId: - try: - # Get file info from chat service - fileInfo = services.chat.getFileInfo(fileId) - if not fileInfo: - logger.warning(f"No file info found for file ID {fileId}") - continue - - originalFileName = fileInfo.get("fileName", "unknown") - originalMimeType = fileInfo.get("mimeType", "application/octet-stream") - fileSizeToUse = fileInfo.get("size", 0) - - # Create ChatDocument for the file - document = ChatDocument( - id=str(uuid.uuid4()), - messageId="", # Will be set when message is created - fileId=fileId, - fileName=originalFileName, - fileSize=fileSizeToUse, - mimeType=originalMimeType, - roundNumber=workflow.currentRound, - taskNumber=0, - actionNumber=0 - ) - user_documents.append(document) - logger.info(f"Created ChatDocument for file {fileId} -> {originalFileName}") - except Exception as e: - logger.error(f"Error processing file ID {fileId}: {e}", exc_info=True) - - # Store user message - # Get message count safely (workflow.messages might be None or empty) - message_count = len(workflow.messages) if workflow.messages else 0 - userMessageData = { - "id": f"msg_{uuid.uuid4()}", - "workflowId": workflow.id, - "message": userInput.prompt, - "role": "user", - "status": "first" if workflowId is None else "step", - "sequenceNr": message_count + 1, - "publishedAt": getUtcTimestamp(), - "roundNumber": workflow.currentRound, - "taskNumber": 0, - "actionNumber": 0 - } - - userMessage = interfaceDbChat.createMessage(userMessageData) - logger.info(f"Stored user message: {userMessage.id} with {len(user_documents)} document(s)") - - # Emit message event for streaming (exact chatData format) - message_timestamp = parseTimestamp(userMessage.publishedAt, default=getUtcTimestamp()) - await event_manager.emit_event( - context_id=workflow.id, - event_type="chatdata", - data={ - "type": "message", - "createdAt": message_timestamp, - "item": userMessage.dict() - }, - event_category="chat" - ) - - # Update workflow status - interfaceDbChat.updateWorkflow(workflow.id, { - "status": "running", - "lastActivity": getUtcTimestamp() - }) - - # Process in background (async) - asyncio.create_task(_processChatbotMessage( - services, - workflow.id, - userInput, - userMessage.id, - chatbot_config - )) - - # Reload workflow to include new message - workflow = interfaceDbChat.getWorkflow(workflow.id) - return workflow - - except Exception as e: - logger.error(f"Error in chatProcess: {str(e)}", exc_info=True) - raise - - -async def _execute_queries_parallel(queries: List[Dict[str, Any]], chatbot_config: ChatbotConfig) -> Dict[str, Any]: - """ - Execute multiple SQL queries in parallel with shared connector. - - Args: - queries: List of query dictionaries, each containing: - - "query": SQL query string - - "purpose": Description of what the query retrieves - - "table": Primary table name - chatbot_config: ChatbotConfig instance for connector selection - - Returns: - Dictionary mapping query indices to results: - - "query_1", "query_2", etc.: Success result text - - "query_1_data", "query_2_data", etc.: Raw data arrays - - "query_1_error", "query_2_error", etc.: Error messages if query failed - """ - # Create connector instance based on configuration - connector = chatbot_config.get_connector_instance() - try: - async def execute_single_query(idx: int, query_info: Dict[str, Any]): - """Execute a single query using shared connector.""" - try: - query_text = query_info.get("query", "") - result = await connector.executeQuery(query_text, return_json=True) - return idx, result, None - except Exception as e: - return idx, None, str(e) - - # Execute all queries in parallel with shared connector - tasks = [execute_single_query(i, q) for i, q in enumerate(queries)] - results = await asyncio.gather(*tasks, return_exceptions=True) - finally: - # Close connector once after all queries complete - await connector.close() - - # Process results into dictionary - query_results = {} - for result in results: - if isinstance(result, Exception): - # Handle exceptions from gather - logger.error(f"Exception in parallel query execution: {result}") - continue - - idx, result_data, error = result - - if error: - query_results[f"query_{idx+1}_error"] = error - logger.error(f"Query {idx+1} failed: {error}") - else: - if result_data and not result_data.get("text", "").startswith(("Error:", "Query failed:")): - query_results[f"query_{idx+1}"] = result_data.get("text", "") - query_results[f"query_{idx+1}_data"] = result_data.get("data", []) - row_count = len(result_data.get('data', [])) - logger.info(f"Query {idx+1} executed successfully, returned {row_count} rows") - else: - error_text = result_data.get("text", "Query failed") if result_data else "Query failed: No response" - query_results[f"query_{idx+1}_error"] = error_text - logger.error(f"Query {idx+1} failed: {error_text}") - - return query_results - - -async def _emit_log_and_event( - interfaceDbChat, - workflowId: str, - event_manager, - message: str, - log_type: str = "info", - status: str = "running", - round_number: Optional[int] = None -) -> None: - """ - Store log in database and emit event for streaming. - - Args: - interfaceDbChat: Database interface - workflowId: Workflow ID - event_manager: Event manager for streaming - message: Log message - log_type: Log type (info, warning, error) - status: Status string - round_number: Optional round number (will be fetched from workflow if not provided) - """ - try: - # Get round number from workflow if not provided - if round_number is None: - workflow = interfaceDbChat.getWorkflow(workflowId) - if workflow: - round_number = workflow.currentRound - - log_timestamp = getUtcTimestamp() - log_data = { - "id": f"log_{uuid.uuid4()}", - "workflowId": workflowId, - "message": message, - "type": log_type, - "timestamp": log_timestamp, - "status": status, - "roundNumber": round_number - } - # Store log in database - created_log = interfaceDbChat.createLog(log_data) - - # Emit event directly for streaming (using correct signature) - if created_log and event_manager: - try: - # Convert to dict if it's a Pydantic model - if hasattr(created_log, "model_dump"): - log_dict = created_log.model_dump() - elif hasattr(created_log, "dict"): - log_dict = created_log.dict() - else: - log_dict = log_data - - await event_manager.emit_event( - context_id=workflowId, - event_type="chatdata", - data={ - "type": "log", - "createdAt": log_timestamp, - "item": log_dict - }, - event_category="chat", - message="New log", - step="log" - ) - except Exception as emit_error: - logger.warning(f"Error emitting log event: {emit_error}") - except Exception as e: - logger.error(f"Error storing log: {e}", exc_info=True) - - -async def _check_workflow_stopped(interfaceDbChat, workflowId: str) -> bool: - """ - Check if workflow was stopped. - - Args: - interfaceDbChat: Database interface - workflowId: Workflow ID - - Returns: - True if workflow is stopped, False otherwise - """ - try: - workflow = interfaceDbChat.getWorkflow(workflowId) - return workflow and workflow.status == "stopped" - except Exception as e: - logger.warning(f"Error checking workflow status: {e}") - return False - - -def _build_final_answer_prompt_with_results( - system_prompt: str, - user_prompt: str, - context: str, - db_results_part: str, - web_results_part: str, - is_resumed: bool = False, - has_db_results: bool = False, - has_web_results: bool = False -) -> str: - """ - Build the complete prompt for generating the final answer with database and web results. - Uses the provided system_prompt from configuration instead of hardcoded prompts. - - Args: - system_prompt: System prompt from chatbot configuration - user_prompt: User's original prompt - context: Conversation context - db_results_part: Formatted database results section - web_results_part: Formatted web research results section - is_resumed: If True, exclude system prompt (already in context from previous messages) - has_db_results: Whether database results are available - has_web_results: Whether web research results are available - - Returns: - Complete formatted prompt string - """ - if is_resumed: - # System prompt already in context, don't repeat it - # Emphasize that the current question is primary - if context: - context_section = f""" -⚠️⚠️⚠️ KONTEXT (NUR FÜR REFERENZ - IGNORIEREN WENN NICHT BENÖTIGT) ⚠️⚠️⚠️ -{context} -⚠️⚠️⚠️ ENDE KONTEXT ⚠️⚠️⚠️ - -""" - else: - context_section = "" - - # Build instructions based on what data sources are available - if has_web_results and not has_db_results: - # Only web research - emphasize web research - instructions = f"""⚠️⚠️⚠️ WICHTIG - NUR INTERNET-RECHERCHE VERFÜGBAR ⚠️⚠️⚠️ -- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss -- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf -- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext - -{db_results_part}{web_results_part} - -KRITISCH: Verwende NUR die oben angegebenen Daten aus der INTERNET-RECHERCHE. Erfinde KEINE Werte. - -⚠️⚠️⚠️ WICHTIG - INTERNET-RECHERCHE VERWENDEN ⚠️⚠️⚠️ -- ✓ OBLIGATORISCH: Verwende die Informationen aus der INTERNET-RECHERCHE oben -- ✓ OBLIGATORISCH: Beginne mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..." -- ✓ OBLIGATORISCH: Gib Quellen an: [Info] ([Quelle: Name](URL)) -- ✓ OBLIGATORISCH: Präsentiere die Informationen ausführlich und strukturiert -- ❌ ABSOLUT VERBOTEN: Erwähne Datenbank-Ergebnisse, wenn keine vorhanden sind -- ❌ ABSOLUT VERBOTEN: Daten erfinden - -WICHTIG: -- Beginne DIREKT mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..." -- Klare, strukturierte Antwort mit Quellenangaben -- Präsentiere die gefundenen Informationen ausführlich""" - elif has_db_results and not has_web_results: - # Only database - use existing database-focused instructions - instructions = f"""⚠️⚠️⚠️ WICHTIG ⚠️⚠️⚠️ -- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss -- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf -- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext - -{db_results_part}{web_results_part} - -KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar". - -⚠️⚠️⚠️ ABSOLUT KRITISCH - ALLE ARTIKEL ZURÜCKGEBEN ⚠️⚠️⚠️ -- ✓ OBLIGATORISCH: Du MUSST ALLE Artikel zurückgeben, die die Kriterien erfüllen -- ✓ OBLIGATORISCH: Kombiniere Ergebnisse aus ALLEN erfolgreichen Abfragen -- ✓ OBLIGATORISCH: Zähle ALLE Artikel in den DATENBANK-ERGEBNISSEN oben -- ✓ OBLIGATORISCH: Zeige ALLE gefundenen Artikel in deiner Antwort (bis zu 20 in der Tabelle) -- ❌ ABSOLUT VERBOTEN: Nur einen Artikel zurückgeben, wenn mehrere gefunden wurden -- ❌ ABSOLUT VERBOTEN: Nur den ersten Artikel zeigen -- ❌ ABSOLUT VERBOTEN: Artikel auslassen, die in den DATENBANK-ERGEBNISSEN stehen - -WICHTIG: -- Beginne DIREKT mit "Aus der Datenbank habe ich..." (keine Planungsschritte!) -- Klare, strukturierte Antwort -- Markdown-Tabellen (max 20 Zeilen) -- Artikelnummern als Link: [ARTIKELNUMMER](/details/ARTIKELNUMMER)""" - elif not has_db_results and not has_web_results: - # No results from either source - but database query was executed - instructions = f"""⚠️⚠️⚠️ KRITISCH - DATENBANKABFRAGE WURDE AUSGEFÜHRT ⚠️⚠️⚠️ -- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss -- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf -- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext - -{db_results_part}{web_results_part} - -⚠️⚠️⚠️ ABSOLUT KRITISCH - DATENBANKABFRAGE WURDE AUSGEFÜHRT ⚠️⚠️⚠️ -Die Datenbankabfrage wurde AUSGEFÜHRT, hat aber keine Ergebnisse zurückgegeben. -DU HAST ZUGRIFF AUF DIE DATENBANK - die Abfrage wurde durchgeführt! - -VERBOTEN - NIEMALS SAGEN: -- "Ich habe keinen Zugriff auf die Datenbank" -- "Ich kann nicht auf die Datenbank zugreifen" -- "Es tut mir leid, aber ich habe keinen Zugriff" -- "Ich habe keinen Zugriff auf Echtzeit-Datenbanken" -- Jede andere Formulierung, die suggeriert, dass du keinen Zugriff hast! - -RICHTIG - SAGE STATTDESSEN: -- "Es wurden keine Artikel gefunden" -- "Keine passenden Artikel in der Datenbank gefunden" -- "Die Datenbanksuche ergab keine Treffer" -- "Ich habe in der Datenbank gesucht, aber keine passenden Artikel gefunden" - -WICHTIG: Die Datenbank wurde durchsucht - es wurden nur keine passenden Artikel gefunden! -Beginne deine Antwort mit: "Ich habe in der Datenbank gesucht, aber..." oder "Es wurden keine Artikel gefunden..." oder ähnlich.""" - else: - # Both database and web research - instructions = f"""⚠️⚠️⚠️ WICHTIG ⚠️⚠️⚠️ -- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss -- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf -- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext - -{db_results_part}{web_results_part} - -KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar". - -⚠️⚠️⚠️ WICHTIG - BEIDE QUELLEN VERWENDEN ⚠️⚠️⚠️ -- ✓ OBLIGATORISCH: Verwende sowohl DATENBANK-ERGEBNISSE als auch INTERNET-RECHERCHE -- ✓ OBLIGATORISCH: Beginne mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse -- ✓ OBLIGATORISCH: Verwende "Aus meiner Web-Recherche..." für Internet-Informationen -- ✓ OBLIGATORISCH: Gib Quellen für Web-Informationen an: [Info] ([Quelle: Name](URL)) -- ✓ OBLIGATORISCH: Zeige ALLE Artikel aus den DATENBANK-ERGEBNISSEN (bis zu 20 in Tabelle) - -WICHTIG: -- Beginne DIREKT mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse -- Dann "Aus meiner Web-Recherche..." für Internet-Informationen -- Klare, strukturierte Antwort mit Quellenangaben""" - - return f"""⚠️⚠️⚠️ AKTUELLE FRAGE (PRIMÄR - DIESE MUSS BEANTWORTET WERDEN) ⚠️⚠️⚠️ -Antworte auf die folgende Frage des Nutzers: {user_prompt} -{context_section}{instructions}""" - else: - # New chat: include system prompt - # Build instructions based on what data sources are available - if has_web_results and not has_db_results: - # Only web research - emphasize web research - return f"""{system_prompt} - -Antworte auf die folgende Frage des Nutzers: {user_prompt}{context} - -{db_results_part}{web_results_part} - -KRITISCH: Verwende NUR die oben angegebenen Daten aus der INTERNET-RECHERCHE. Erfinde KEINE Werte. - -⚠️⚠️⚠️ WICHTIG - INTERNET-RECHERCHE VERWENDEN ⚠️⚠️⚠️ -- ✓ OBLIGATORISCH: Verwende die Informationen aus der INTERNET-RECHERCHE oben -- ✓ OBLIGATORISCH: Beginne mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..." -- ✓ OBLIGATORISCH: Gib Quellen an: [Info] ([Quelle: Name](URL)) -- ✓ OBLIGATORISCH: Präsentiere die Informationen ausführlich und strukturiert -- ❌ ABSOLUT VERBOTEN: Erwähne Datenbank-Ergebnisse, wenn keine vorhanden sind -- ❌ ABSOLUT VERBOTEN: Daten erfinden - -WICHTIG: -- Beginne DIREKT mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..." -- Klare, strukturierte Antwort mit Quellenangaben -- Präsentiere die gefundenen Informationen ausführlich""" - elif has_db_results and not has_web_results: - # Only database - use existing database-focused instructions - return f"""{system_prompt} - -Antworte auf die folgende Frage des Nutzers: {user_prompt}{context} - -{db_results_part}{web_results_part} - -KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar". - -⚠️⚠️⚠️ ABSOLUT KRITISCH - ALLE ARTIKEL ZURÜCKGEBEN ⚠️⚠️⚠️ -- ✓ OBLIGATORISCH: Du MUSST ALLE Artikel zurückgeben, die die Kriterien erfüllen -- ✓ OBLIGATORISCH: Kombiniere Ergebnisse aus ALLEN erfolgreichen Abfragen -- ✓ OBLIGATORISCH: Zähle ALLE Artikel in den DATENBANK-ERGEBNISSEN oben -- ✓ OBLIGATORISCH: Zeige ALLE gefundenen Artikel in deiner Antwort (bis zu 20 in der Tabelle) -- ❌ ABSOLUT VERBOTEN: Nur einen Artikel zurückgeben, wenn mehrere gefunden wurden -- ❌ ABSOLUT VERBOTEN: Nur den ersten Artikel zeigen -- ❌ ABSOLUT VERBOTEN: Artikel auslassen, die in den DATENBANK-ERGEBNISSEN stehen - -WICHTIG: -- Beginne DIREKT mit "Aus der Datenbank habe ich..." (keine Planungsschritte!) -- Klare, strukturierte Antwort -- Markdown-Tabellen (max 20 Zeilen) -- Artikelnummern als Link: [ARTIKELNUMMER](/details/ARTIKELNUMMER)""" - elif not has_db_results and not has_web_results: - # No results from either source - but database query was executed - return f"""{system_prompt} - -Antworte auf die folgende Frage des Nutzers: {user_prompt}{context} - -{db_results_part}{web_results_part} - -⚠️⚠️⚠️ KRITISCH - DATENBANKABFRAGE WURDE AUSGEFÜHRT ⚠️⚠️⚠️ -Die Datenbankabfrage wurde AUSGEFÜHRT, hat aber keine Ergebnisse zurückgegeben. -DU HAST ZUGRIFF AUF DIE DATENBANK - die Abfrage wurde durchgeführt! - -VERBOTEN - NIEMALS SAGEN: -- "Ich habe keinen Zugriff auf die Datenbank" -- "Ich kann nicht auf die Datenbank zugreifen" -- "Es tut mir leid, aber ich habe keinen Zugriff" -- "Ich habe keinen Zugriff auf Echtzeit-Datenbanken" -- Jede andere Formulierung, die suggeriert, dass du keinen Zugriff hast! - -RICHTIG - SAGE STATTDESSEN: -- "Es wurden keine Artikel gefunden" -- "Keine passenden Artikel in der Datenbank gefunden" -- "Die Datenbanksuche ergab keine Treffer" -- "Ich habe in der Datenbank gesucht, aber keine passenden Artikel gefunden" - -WICHTIG: Die Datenbank wurde durchsucht - es wurden nur keine passenden Artikel gefunden! -Beginne deine Antwort mit: "Ich habe in der Datenbank gesucht, aber..." oder "Es wurden keine Artikel gefunden..." oder ähnlich.""" - else: - # Both database and web research - return f"""{system_prompt} - -Antworte auf die folgende Frage des Nutzers: {user_prompt}{context} - -{db_results_part}{web_results_part} - -KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar". - -⚠️⚠️⚠️ WICHTIG - BEIDE QUELLEN VERWENDEN ⚠️⚠️⚠️ -- ✓ OBLIGATORISCH: Verwende sowohl DATENBANK-ERGEBNISSE als auch INTERNET-RECHERCHE -- ✓ OBLIGATORISCH: Beginne mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse -- ✓ OBLIGATORISCH: Verwende "Aus meiner Web-Recherche..." für Internet-Informationen -- ✓ OBLIGATORISCH: Gib Quellen für Web-Informationen an: [Info] ([Quelle: Name](URL)) -- ✓ OBLIGATORISCH: Zeige ALLE Artikel aus den DATENBANK-ERGEBNISSEN (bis zu 20 in Tabelle) - -WICHTIG: -- Beginne DIREKT mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse -- Dann "Aus meiner Web-Recherche..." für Internet-Informationen -- Klare, strukturierte Antwort mit Quellenangaben""" - - -def _buildWebResearchQuery(userPrompt: str, workflowMessages: List, queryResults: Optional[Dict[str, Any]] = None) -> str: - """ - Build enriched web research query by extracting product context from conversation history and current prompt. - - Extracts product information from: - 1. Current user prompt (article numbers, product mentions) - 2. Database query results (if available) - 3. Previous assistant messages (conversation history) - - Args: - userPrompt: Current user prompt - workflowMessages: List of workflow messages (conversation history) - queryResults: Optional database query results to extract product info from - - Returns: - Enriched search query string - """ - # Normalize user prompt for detection - prompt_lower = userPrompt.lower().strip() - - # Patterns that indicate a search request - search_patterns = [ - "ja", "yes", "oui", "si", - "such", "suche", "search", "recherche", "recherchier", - "internet", "web", "online", - "datenblatt", "datasheet", "fiche technique", - "mehr informationen", "more information", "plus d'information", - "weitere informationen", "further information", "additional information" - ] - - # Certification patterns that require web research - certification_patterns = [ - "ul", "ce", "tüv", "vde", "iec", "en", "iso", - "zertifiziert", "certified", "certification", "zertifizierung", - "geprüft", "approved", "compliance" - ] - - # Check if current prompt contains search-related keywords - has_search_intent = any(pattern in prompt_lower for pattern in search_patterns) - - # Check if prompt contains certification-related keywords - has_certification_intent = any(pattern in prompt_lower for pattern in certification_patterns) - - # Extract product information - try multiple sources - article_number = None - article_description = None - supplier = None - - # Pattern for article numbers like "6AV2 181-8XP00-0AX0" or "6AV2181-8XP00-0AX0" - article_patterns = [ - r'\b[A-Z0-9]{2,}\s+[0-9]{3,}-[A-Z0-9-]+\b', # With space: "6AV2 181-8XP00-0AX0" - r'\b[A-Z0-9]{4,}[\s-][A-Z0-9-]{6,}\b', # General pattern - r'\b[A-Z]{2,}[0-9]+\s+[0-9]+-[A-Z0-9-]+\b', # Specific Siemens pattern - ] - - # 1. First, try to extract from current user prompt - for pattern in article_patterns: - matches = re.findall(pattern, userPrompt) - if matches: - article_number = matches[0] - logger.info(f"Extracted article number from user prompt: {article_number}") - break - - # 2. Try to extract from database query results if available - # Always check queryResults to enrich with product description and supplier, even if article_number was already found - if queryResults: - # Look for article numbers in query result text (if not already found) - if not article_number: - for key in queryResults.keys(): - if key.startswith("query_") and not key.endswith("_error") and not key.endswith("_data"): - result_text = queryResults.get(key, "") - if isinstance(result_text, str): - for pattern in article_patterns: - matches = re.findall(pattern, result_text) - if matches: - article_number = matches[0] - logger.info(f"Extracted article number from query results: {article_number}") - break - if article_number: - break - - # Always check data arrays for product description and supplier (even if article_number already found) - for key in queryResults.keys(): - if key.startswith("query_") and not key.endswith("_error") and not key.endswith("_data"): - data_key = f"{key}_data" - if data_key in queryResults: - data_array = queryResults[data_key] - if isinstance(data_array, list) and len(data_array) > 0: - # Look for article number in first row (if not already found) - first_row = data_array[0] - if isinstance(first_row, dict): - # Check common article number fields (if not already found) - if not article_number: - for field in ["Artikelnummer", "Artikelkürzel", "article_number", "articleNumber"]: - if field in first_row and first_row[field]: - article_number = str(first_row[field]) - logger.info(f"Extracted article number from query data: {article_number}") - break - - # Always check article description (can enrich even if article_number already found) - if not article_description: - for field in ["Artikelbezeichnung", "Bezeichnung", "article_description", "description"]: - if field in first_row and first_row[field]: - article_description = str(first_row[field]) - logger.info(f"Extracted article description from query data: {article_description}") - break - - # Always check supplier (can enrich even if article_number already found) - if not supplier: - for field in ["Lieferant", "Supplier", "supplier"]: - if field in first_row and first_row[field]: - supplier = str(first_row[field]) - logger.info(f"Extracted supplier from query data: {supplier}") - break - - # If we found all needed info, we can stop - if article_number and article_description and supplier: - break - - # Check if current prompt is an explicit search request that should NOT use context - # If user explicitly asks to search for something, prioritize that over previous messages - explicit_search_patterns = [ - r"recherchier\s+(?:im\s+internet\s+)?nach\s+(.+)", - r"suche\s+(?:im\s+internet\s+)?nach\s+(.+)", - r"search\s+(?:the\s+internet\s+)?for\s+(.+)", - r"find\s+(?:information\s+)?(?:about\s+)?(.+)", - r"recherche\s+(?:sur\s+internet\s+)?(.+)" - ] - - explicit_search_term = None - for pattern in explicit_search_patterns: - match = re.search(pattern, userPrompt, re.IGNORECASE) - if match: - explicit_search_term = match.group(1).strip() - logger.info(f"Found explicit search term in prompt: '{explicit_search_term}'") - break - - # 3. Extract from previous assistant messages (conversation history) - # ONLY if there's no explicit search term (to avoid using old context for new searches) - if not explicit_search_term and (not article_number or not article_description): - for msg in reversed(workflowMessages[-10:]): - if msg.role == "assistant": - message_text = msg.message - - # Extract article number if not found yet - if not article_number: - for pattern in article_patterns: - matches = re.findall(pattern, message_text) - if matches: - article_number = matches[0] - break - - # Extract article description if not found yet - if not article_description: - description_patterns = [ - r'Es handelt sich um\s+([^\.]+)', - r'It is a\s+([^\.]+)', - r'C\'est\s+([^\.]+)', - r'Bezeichnung:\s*([^\n]+)', - r'Description:\s*([^\n]+)', - r'Artikelbezeichnung:\s*([^\n]+)', - r'Artikelbezeichnung:\s*([^\n]+)' - ] - for pattern in description_patterns: - match = re.search(pattern, message_text, re.IGNORECASE) - if match: - article_description = match.group(1).strip() - break - - # Extract supplier if not found yet - if not supplier: - supplier_patterns = [ - r'von\s+([A-Z][A-Za-z\s]+(?:AG|GmbH|Ltd|Inc|Corp)?)', - r'from\s+([A-Z][A-Za-z\s]+(?:AG|GmbH|Ltd|Inc|Corp)?)', - r'Lieferant:\s*([^\n]+)', - r'Supplier:\s*([^\n]+)' - ] - for pattern in supplier_patterns: - match = re.search(pattern, message_text, re.IGNORECASE) - if match: - supplier = match.group(1).strip() - break - - # Stop if we found everything - if article_number and article_description and supplier: - break - - # Build enriched search query - query_parts = [] - - # If we have an explicit search term, use it as the primary query - if explicit_search_term: - query_parts.append(explicit_search_term) - logger.info(f"Using explicit search term as primary query: '{explicit_search_term}'") - # If we have search intent but no product info, try to use the user prompt intelligently - elif has_search_intent and not article_number and not article_description: - # Try to extract meaningful parts from the prompt - # Remove common search phrases and keep the product-related parts - cleaned_prompt = userPrompt - for phrase in ["recherchier", "recherche", "suche nach", "search for", "find", "informationen zu", "information about", "weitere informationen", "further information", "im internet", "the internet", "sur internet"]: - cleaned_prompt = re.sub(phrase, "", cleaned_prompt, flags=re.IGNORECASE) - cleaned_prompt = cleaned_prompt.strip() - - # Use cleaned prompt if it has meaningful content - if cleaned_prompt and len(cleaned_prompt) > 2: - query_parts.append(cleaned_prompt) - - # Add article description if found (but NOT if we have an explicit search term) - if article_description and not explicit_search_term: - query_parts.append(article_description) - - # Add article number if found (but NOT if we have an explicit search term) - if article_number and not explicit_search_term: - query_parts.append(article_number) - - # Add supplier if found (but NOT if we have an explicit search term) - if supplier and not explicit_search_term: - query_parts.append(supplier) - - # Extract certification information from prompt if present - certification_terms = [] - if has_certification_intent: - # Extract specific certification mentions - cert_keywords = { - "ul": "UL certification", - "ce": "CE certification", - "tüv": "TÜV certification", - "vde": "VDE certification", - "iec": "IEC certification", - "iso": "ISO certification" - } - for cert_key, cert_term in cert_keywords.items(): - if cert_key in prompt_lower: - certification_terms.append(cert_term) - - # If no specific certification found but certification intent detected, add generic term - if not certification_terms: - certification_terms.append("certification") - - # Add certification terms to query if found - if certification_terms: - query_parts.extend(certification_terms) - - # Add "Datenblatt" or "datasheet" if user requested it or if we have product info - # But NOT if we have an explicit search term (user wants to search for something specific) - if not explicit_search_term: - if "datenblatt" in prompt_lower or "datasheet" in prompt_lower or "fiche technique" in prompt_lower: - query_parts.append("Datenblatt") - elif query_parts and (article_number or article_description): - # If we have product info but no explicit request for datasheet, add it anyway - query_parts.append("Datenblatt") - - # If we found product information or built a meaningful query, use it - if query_parts: - enriched_query = " ".join(query_parts) - logger.info(f"Built enriched search query: '{enriched_query}' from context (original: '{userPrompt}')") - return enriched_query - else: - # Fall back to original prompt, but try to clean it up - logger.info(f"No product context found, using original prompt: '{userPrompt}'") - return userPrompt - - -async def _convert_file_ids_to_document_references( - services, - file_ids: List[str] -) -> DocumentReferenceList: - """ - Convert file IDs to DocumentReferenceList for use with ai.process. - - Args: - services: Services instance - file_ids: List of file IDs to convert - - Returns: - DocumentReferenceList with docItem references - """ - references = [] - - # Get workflow to search for ChatDocuments - workflow = services.workflow - if not workflow: - logger.error("Cannot convert file IDs to document references: workflow not set in services") - return DocumentReferenceList(references=[]) - - for file_id in file_ids: - try: - # Get file info to verify it exists - file_info = services.chat.getFileInfo(file_id) - if not file_info: - logger.warning(f"File {file_id} not found, skipping") - continue - - # Find ChatDocument that has this fileId - document_id = None - if workflow.messages: - for message in workflow.messages: - if hasattr(message, 'documents') and message.documents: - for doc in message.documents: - if getattr(doc, 'fileId', None) == file_id: - document_id = getattr(doc, 'id', None) - break - if document_id: - break - - # Search database if not found in messages - if not document_id: - try: - from modules.interfaces.interfaceRbac import getRecordsetWithRBAC - documents = getRecordsetWithRBAC( - services.interfaceDbChat.db, - ChatDocument, - services.user, - recordFilter={"fileId": file_id}, - mandateId=services.mandateId - ) - if documents: - workflow_message_ids = {msg.id for msg in workflow.messages} if workflow.messages else set() - for doc in documents: - if doc.get("messageId") in workflow_message_ids: - document_id = doc.get("id") - break - except Exception: - pass # Fallback to fileId - - # Use ChatDocument ID if found, otherwise use fileId as fallback - ref = DocumentItemReference(documentId=document_id if document_id else file_id) - references.append(ref) - except Exception as e: - logger.error(f"Error converting fileId {file_id}: {e}", exc_info=True) - - logger.info(f"Converted {len(references)} file IDs to document references") - return DocumentReferenceList(references=references) - - -def _format_query_results_as_lookup(query_data: Dict[str, List[Dict]]) -> str: - """ - Format database query results as JSON lookup table for Excel matching. - Converts query result data into structured JSON format: {Artikelnummer: {columns...}} - - Args: - query_data: Dict with query_key -> list of row dicts (from connector with return_json=True) - - Returns: - JSON string formatted as lookup table - """ - lookup_table = {} - - for query_key, rows in query_data.items(): - if query_key == "error" or not rows: - logger.warning(f"Skipping query key '{query_key}' - no rows or error") - continue - - logger.info(f"Processing {len(rows)} rows from query '{query_key}'") - - for row in rows: - if not isinstance(row, dict): - logger.warning(f"Skipping non-dict row: {type(row)}") - continue - - # Find Artikelnummer field (case-insensitive) - artikelnummer = None - for key in row.keys(): - if key.lower() in ['artikelnummer', 'artikel_nummer', 'art_nr', 'part_number']: - artikelnummer = str(row[key]) - break - - if artikelnummer: - lookup_table[artikelnummer] = row - else: - logger.warning(f"No Artikelnummer found in row with keys: {list(row.keys())}") - - logger.info(f"Generated lookup table with {len(lookup_table)} entries") - if lookup_table: - sample_keys = list(lookup_table.keys())[:3] - logger.info(f"Sample Artikelnummern: {sample_keys}") - if sample_keys: - sample_entry = lookup_table[sample_keys[0]] - logger.info(f"Sample entry keys: {list(sample_entry.keys())}") - - return json.dumps(lookup_table, ensure_ascii=False, indent=2) - - -async def _create_chat_document_from_action_document( - services, - action_document, - message_id: str, - workflow_id: str, - round_number: int -) -> ChatDocument: - """ - Create a ChatDocument from an ActionDocument by storing the file data. - - Args: - services: Services instance - action_document: ActionDocument from ai.process result - message_id: ID of the message to attach to - workflow_id: Workflow ID - round_number: Round number - - Returns: - ChatDocument instance - """ - try: - # Get file data (could be bytes or string) - document_data = action_document.documentData - - # Convert to bytes if needed - if isinstance(document_data, str): - # Check if it's base64 encoded - try: - # Try to decode as base64 first - file_bytes = base64.b64decode(document_data) - except Exception: - # Not base64, encode as UTF-8 - file_bytes = document_data.encode('utf-8') - elif isinstance(document_data, bytes): - file_bytes = document_data - else: - # Try to convert to bytes - try: - file_bytes = bytes(document_data) - except Exception: - # Last resort: convert to string then encode - file_bytes = str(document_data).encode('utf-8') - - # Get MIME type (default to Excel) - mime_type = action_document.mimeType or "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - - # Get file name - file_name = action_document.documentName or "data_export.xlsx" - # Ensure it has .xlsx extension - if not file_name.lower().endswith('.xlsx'): - # Remove any existing extension and add .xlsx - file_name = file_name.rsplit('.', 1)[0] + '.xlsx' - - # Store file using component interface - file_item = services.interfaceDbComponent.createFile( - name=file_name, - mimeType=mime_type, - content=file_bytes - ) - - # Store file data - success = services.interfaceDbComponent.createFileData(file_item.id, file_bytes) - if not success: - logger.warning(f"Failed to store file data for {file_item.id}, but continuing...") - - # Create ChatDocument - chat_document = ChatDocument( - id=str(uuid.uuid4()), - messageId=message_id, - fileId=file_item.id, - fileName=file_name, - fileSize=len(file_bytes), - mimeType=mime_type, - roundNumber=round_number, - taskNumber=0, - actionNumber=0 - ) - - logger.info(f"Created ChatDocument {chat_document.id} from ActionDocument {file_name} (size: {len(file_bytes)} bytes)") - return chat_document - - except Exception as e: - logger.error(f"Error creating ChatDocument from ActionDocument: {e}", exc_info=True) - raise - - -async def _processChatbotMessage( - services, - workflowId: str, - userInput: UserInputRequest, - userMessageId: str, - chatbot_config: ChatbotConfig -): - """ - Process chatbot message using LangGraph workflow. - Uses LangGraph to handle the conversation flow with tools (SQL, Tavily, streaming). - """ - event_manager = get_event_manager() - - try: - interfaceDbChat = services.interfaceDbChat - - # Reload workflow to get current messages - workflow = interfaceDbChat.getWorkflow(workflowId) - if not workflow: - logger.error(f"Workflow {workflowId} not found during processing") - await event_manager.emit_event( - context_id=workflowId, - event_type="error", - data={"error": f"Workflow {workflowId} nicht gefunden"}, - event_category="workflow", - message=f"Workflow {workflowId} nicht gefunden", - step="error" - ) - return - - # Check if workflow was stopped before starting - if await _check_workflow_stopped(interfaceDbChat, workflowId): - logger.info(f"Workflow {workflowId} was stopped, aborting processing") - return - - await services.ai.ensureAiObjectsInitialized() - - # Get connector instance - connector = chatbot_config.get_connector_instance() - - # Get system prompt - system_prompt = chatbot_config.custom_system_prompt - if not system_prompt: - raise ValueError(f"System prompt not configured for chatbot instance") - - # Create LangGraph chatbot instance - logger.info(f"Creating LangGraph chatbot for workflow {workflowId}") - chatbot = await LangGraphChatbot.create( - services=services, - system_prompt=system_prompt, - connector_instance=connector, - enable_web_research=chatbot_config.enable_web_research, - context_window_size=8000 - ) - - # Process message using LangGraph streaming - logger.info(f"Processing message with LangGraph for workflow {workflowId}") - final_answer = None - chat_history = [] - - async for event in chatbot.stream_events(message=userInput.prompt, chat_id=workflowId): - # Check if workflow was stopped - if await _check_workflow_stopped(interfaceDbChat, workflowId): - logger.info(f"Workflow {workflowId} was stopped during processing") - return - - event_type = event.get("type") - - if event_type == "status": - # Emit status update - label = event.get("label", "") - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, label, log_type="info") - - elif event_type == "final": - # Final response received - response_data = event.get("response", {}) - chat_history = response_data.get("chat_history", []) - # Extract final answer from chat history (last assistant message) - for msg in reversed(chat_history): - if msg.get("role") == "assistant": - final_answer = msg.get("content", "") - break - - elif event_type == "error": - # Error occurred - error_msg = event.get("message", "Unknown error") - logger.error(f"LangGraph error: {error_msg}") - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: {error_msg}", log_type="error") - final_answer = f"Entschuldigung, ein Fehler ist aufgetreten: {error_msg}" - - # Close connector - try: - await connector.close() - except Exception as e: - logger.warning(f"Error closing connector: {e}") - - # Check if workflow was stopped before storing answer - if await _check_workflow_stopped(interfaceDbChat, workflowId): - logger.info(f"Workflow {workflowId} was stopped, not storing final message") - return - - # Store final answer if we have one - if final_answer: - workflow = interfaceDbChat.getWorkflow(workflowId) - message_id = f"msg_{uuid.uuid4()}" - assistantMessageData = { - "id": message_id, - "workflowId": workflowId, - "parentMessageId": userMessageId, - "message": final_answer, - "role": "assistant", - "status": "last", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": getUtcTimestamp(), - "success": True, - "roundNumber": workflow.currentRound, - "taskNumber": 0, - "actionNumber": 0 - } - - assistantMessage = interfaceDbChat.createMessage(assistantMessageData) - logger.info(f"Stored assistant message: {assistantMessage.id}") - - # Emit message event for streaming - message_timestamp = parseTimestamp(assistantMessage.publishedAt, default=getUtcTimestamp()) - await event_manager.emit_event( - context_id=workflowId, - event_type="chatdata", - data={ - "type": "message", - "createdAt": message_timestamp, - "item": assistantMessage.dict() - }, - event_category="chat" - ) - - # Update workflow status to completed - if not await _check_workflow_stopped(interfaceDbChat, workflowId): - interfaceDbChat.updateWorkflow(workflowId, { - "status": "completed", - "lastActivity": getUtcTimestamp() - }) - - await event_manager.emit_event( - context_id=workflowId, - event_type="complete", - data={"workflowId": workflowId}, - event_category="workflow", - message="Chatbot-Verarbeitung abgeschlossen", - step="complete" - ) - - # Schedule cleanup - await event_manager.cleanup(workflowId, delay=300.0) - - logger.info(f"LangGraph processing completed for workflow {workflowId}") - - except Exception as e: - logger.info("Analyzing user input to generate queries...") - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Analysiere Benutzeranfrage...") - - # Use custom prompt from configuration (already validated at start of chatProcess) - analysisPrompt = chatbot_config.custom_analysis_prompt.replace("{userPrompt}", userInput.prompt).replace("{context}", context or "") - - # CRITICAL: Add explicit JSON format requirement to ensure AI returns JSON - json_format_instruction = """ - -⚠️⚠️⚠️ ABSOLUT KRITISCH - JSON-FORMAT ERFORDERLICH ⚠️⚠️⚠️ -DU MUSST DEINE ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT GEBEN! -ANTWORTE NICHT MIT NORMALEM TEXT ODER EINER CHAT-ANTWORT! -DEINE ANTWORT MUSS EIN GÜLTIGES JSON-OBJEKT SEIN! - -Erforderliches JSON-Format: -{ - "needsDatabaseQuery": true/false, - "needsWebResearch": true/false, - "sqlQueries": [ - { - "query": "SQL-Abfrage hier", - "purpose": "Zweck der Abfrage", - "table": "Haupttabelle" - } - ], - "reasoning": "Begründung für die Abfragen" -} - -⚠️⚠️⚠️ KRITISCH - WANN DATENBANKABFRAGE ERFORDERLICH ⚠️⚠️⚠️ -SETZE "needsDatabaseQuery": true, WENN: -- Der Nutzer nach Artikeln, Produkten, Preisen, Lagerbeständen, Lieferanten fragt -- Der Nutzer nach Informationen aus der Datenbank fragt (auch allgemeine Fragen!) -- Der Nutzer eine Frage stellt, die mit Datenbank-Daten beantwortet werden kann -- Du dir nicht sicher bist - dann setze "needsDatabaseQuery": true und führe eine allgemeine Abfrage durch! - -VERBOTEN: -- "needsDatabaseQuery": false setzen, nur weil die Frage allgemein klingt -- "needsDatabaseQuery": false setzen, ohne zu prüfen, ob Datenbank-Daten helfen könnten -- Chat-Antworten geben statt Datenbankabfragen durchzuführen - -WICHTIG: -- Antworte NUR mit dem JSON-Objekt, KEIN zusätzlicher Text davor oder danach! -- KEINE Erklärungen, KEINE Begrüßungen, KEINE Chat-Antworten! -- NUR das JSON-Objekt! -- Bei Unsicherheit: IMMER "needsDatabaseQuery": true setzen! -""" - analysisPrompt = analysisPrompt + json_format_instruction - logger.info("Using custom analysis prompt from instance config with JSON format requirement") - - # AI call for analysis - method_ai = MethodAi(services) - analysis_result = await method_ai.process({ - "aiPrompt": analysisPrompt, - "documentList": None, - "resultType": "json", - "simpleMode": True - }) - - # Check if workflow was stopped during analysis - if await _check_workflow_stopped(interfaceDbChat, workflowId): - logger.info(f"Workflow {workflowId} was stopped during analysis, aborting processing") - return - - # Retry logic for failed analysis (max 3 attempts) - max_analysis_retries = 3 - analysis_retry_count = 0 - analysis = None - analysis_content = None - - while analysis_retry_count < max_analysis_retries: - # Extract content from ActionResult - analysis_content = None - if analysis_result.success and analysis_result.documents: - analysis_content = analysis_result.documents[0].documentData - if isinstance(analysis_content, bytes): - analysis_content = analysis_content.decode('utf-8') - - # Validate analysis was successful - if not analysis_content: - analysis_retry_count += 1 - if analysis_retry_count < max_analysis_retries: - logger.warning(f"Analysis failed (attempt {analysis_retry_count}/{max_analysis_retries}): No content returned from AI, retrying...") - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Analyse fehlgeschlagen, Versuch {analysis_retry_count}/{max_analysis_retries}...", log_type="warning") - # Retry analysis - analysis_result = await method_ai.process({ - "aiPrompt": analysisPrompt, - "documentList": None, - "resultType": "json", - "simpleMode": True - }) - continue - else: - error_msg = "Die Analyse Ihrer Anfrage ist nach mehreren Versuchen fehlgeschlagen. Bitte versuchen Sie es später erneut oder formulieren Sie Ihre Frage anders." - logger.error(f"Analysis failed after {max_analysis_retries} attempts: No content returned from AI") - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: Analyse nach {max_analysis_retries} Versuchen fehlgeschlagen", log_type="error") - # Store error message as assistant response - workflow = interfaceDbChat.getWorkflow(workflowId) - message_id = f"msg_{uuid.uuid4()}" - assistantMessageData = { - "id": message_id, - "workflowId": workflowId, - "parentMessageId": userMessageId, - "message": error_msg, - "role": "assistant", - "status": "last", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": getUtcTimestamp(), - "success": False, - "roundNumber": workflow.currentRound, - "taskNumber": 0, - "actionNumber": 0 - } - assistantMessage = interfaceDbChat.createMessage(assistantMessageData) - logger.info(f"Stored error message due to failed analysis after {max_analysis_retries} attempts: {assistantMessage.id}") - return - - analysis = _extractJsonFromResponse(analysis_content) - if analysis is None: - analysis_retry_count += 1 - if analysis_retry_count < max_analysis_retries: - logger.warning(f"Failed to extract JSON from analysis response (attempt {analysis_retry_count}/{max_analysis_retries}), retrying...") - logger.debug(f"Analysis content: {analysis_content[:500] if analysis_content else 'None'}") - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"JSON-Extraktion fehlgeschlagen, Versuch {analysis_retry_count}/{max_analysis_retries}...", log_type="warning") - # Retry analysis - analysis_result = await method_ai.process({ - "aiPrompt": analysisPrompt, - "documentList": None, - "resultType": "json", - "simpleMode": True - }) - continue - else: - error_msg = "Die Analyse Ihrer Anfrage konnte nach mehreren Versuchen nicht verarbeitet werden. Bitte versuchen Sie es später erneut oder formulieren Sie Ihre Frage anders." - logger.error(f"Failed to extract JSON from analysis response after {max_analysis_retries} attempts. Content: {analysis_content[:500] if analysis_content else 'None'}") - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: JSON-Extraktion nach {max_analysis_retries} Versuchen fehlgeschlagen", log_type="error") - # Store error message as assistant response - workflow = interfaceDbChat.getWorkflow(workflowId) - message_id = f"msg_{uuid.uuid4()}" - assistantMessageData = { - "id": message_id, - "workflowId": workflowId, - "parentMessageId": userMessageId, - "message": error_msg, - "role": "assistant", - "status": "last", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": getUtcTimestamp(), - "success": False, - "roundNumber": workflow.currentRound, - "taskNumber": 0, - "actionNumber": 0 - } - assistantMessage = interfaceDbChat.createMessage(assistantMessageData) - logger.info(f"Stored error message due to failed JSON extraction after {max_analysis_retries} attempts: {assistantMessage.id}") - return - - # Successfully extracted analysis, break retry loop - break - - # Extract analysis results - needsDatabaseQuery = analysis.get("needsDatabaseQuery", False) if analysis else False - needsWebResearch = analysis.get("needsWebResearch", False) if analysis else False - sql_queries = analysis.get("sqlQueries", []) if analysis else [] - # Support legacy single query format for backward compatibility - if not sql_queries and analysis and analysis.get("sqlQuery"): - sql_queries = [{ - "query": analysis.get("sqlQuery", ""), - "purpose": "Database query", - "table": "Unknown" - }] - reasoning = analysis.get("reasoning", "") if analysis else "" - - # CRITICAL: If connectors are configured, ALWAYS use database if user asks about products/articles/inventory - # Override AI decision if it says "no database query" but connectors are available - if chatbot_config.connector_types and len(chatbot_config.connector_types) > 0: - user_prompt_lower = userInput.prompt.lower() - # Keywords that indicate database query is needed - db_keywords = [ - "artikel", "produkt", "ware", "lager", "bestand", "preis", "lieferant", - "led", "lampe", "motor", "kabel", "schraube", "sensor", "netzteil", - "wie viele", "zeig mir", "suche", "finde", "gibt es", "haben wir", - "article", "product", "inventory", "stock", "price", "supplier", - "how many", "show me", "search", "find", "do we have" - ] - has_db_intent = any(keyword in user_prompt_lower for keyword in db_keywords) - - # If user asks about database-related topics but AI said no query needed, force it - if has_db_intent and not needsDatabaseQuery: - logger.warning(f"User asked about database-related topic but AI returned needsDatabaseQuery=false. Forcing needsDatabaseQuery=true because connectors are configured.") - needsDatabaseQuery = True - # Generate a default query if none were provided - if not sql_queries: - # Extract main search term from user prompt - search_terms = [] - for keyword in db_keywords: - if keyword in user_prompt_lower: - # Try to extract the actual product/article name - words = user_prompt_lower.split() - keyword_idx = words.index(keyword) if keyword in words else -1 - if keyword_idx >= 0 and keyword_idx < len(words) - 1: - # Take next word as potential product name - next_word = words[keyword_idx + 1] - if len(next_word) > 2: # Ignore short words like "die", "der", etc. - search_terms.append(next_word) - - # Create a general search query - if search_terms: - search_term = search_terms[0] - else: - # Use the whole prompt as search term (limited) - search_term = userInput.prompt[:50] # Limit length - - sql_queries = [{ - "query": f'SELECT a."Artikelnummer", a."Artikelbezeichnung", a."Lieferant", a."Artikelkürzel" FROM Artikel a WHERE a."Artikelbezeichnung" LIKE \'%{search_term}%\' OR a."Artikelnummer" LIKE \'%{search_term}%\' OR a."Artikelkürzel" LIKE \'%{search_term}%\' LIMIT 20', - "purpose": f"Suche nach Artikeln die '{search_term}' enthalten", - "table": "Artikel" - }] - logger.info(f"Generated default database query for search term: {search_term}") - - # Check if we need web research for certifications (only if enabled in config) - if chatbot_config.enable_web_research: - user_prompt_lower = userInput.prompt.lower() - certification_keywords = ["ul", "ce", "tüv", "vde", "iec", "iso", "zertifiziert", "certified", "certification"] - has_certification = any(keyword in user_prompt_lower for keyword in certification_keywords) - if has_certification and not needsWebResearch: - logger.warning("Certification detected but needsWebResearch is false - forcing to true") - needsWebResearch = True - else: - # Web research disabled in config - if needsWebResearch: - logger.info("Web research disabled in instance config, skipping") - needsWebResearch = False - - # Limit query count based on configuration - max_queries_allowed = chatbot_config.max_queries - if needsDatabaseQuery and len(sql_queries) > max_queries_allowed: - logger.info(f"Limiting queries from {len(sql_queries)} to {max_queries_allowed} for performance") - sql_queries = sql_queries[:max_queries_allowed] - - logger.info(f"Analysis: DB={needsDatabaseQuery}, Web={needsWebResearch}, SQL queries={len(sql_queries)}") - - # Build initial enriched web research query if needed (for logging, will be rebuilt after DB queries) - # Only if web research is enabled in config - enriched_web_query = None - if needsWebResearch and chatbot_config.enable_web_research: - enriched_web_query = _buildWebResearchQuery(userInput.prompt, workflow.messages) - - # Build list of queries to stream back - queries = [] - - if needsDatabaseQuery and sql_queries: - for i, sql_query_info in enumerate(sql_queries, 1): - queries.append({ - "type": "database", - "query": sql_query_info.get("query", ""), - "purpose": sql_query_info.get("purpose", f"Query {i}"), - "table": sql_query_info.get("table", "Unknown"), - "reasoning": reasoning - }) - - if needsWebResearch and chatbot_config.enable_web_research: - queries.append({ - "type": "web", - "query": enriched_web_query or userInput.prompt, - "reasoning": reasoning - }) - - # Format queries as log text - log_lines = [] - if queries: - db_queries = [q for q in queries if q["type"] == "database"] - log_lines.append(f"Generiert: {len(db_queries)} Datenbankabfrage(n) und {len(queries) - len(db_queries)} Web-Recherche(n)\n\n") - for i, q in enumerate(queries, 1): - if q["type"] == "database": - log_lines.append(f"{i}. Datenbankabfrage ({q.get('table', 'Unknown')}):\n") - log_lines.append(f" Zweck: {q.get('purpose', 'Nicht angegeben')}\n") - log_lines.append(f"```sql\n{q['query']}\n```\n") - elif q["type"] == "web": - log_lines.append(f"{i}. Web-Recherche:\n") - log_lines.append(f" Suchbegriff: {q['query']}\n") - if q.get("reasoning"): - log_lines.append(f" Begründung: {q['reasoning']}\n") - log_lines.append("\n") - else: - log_lines.append("Keine Abfragen erforderlich.") - - log_text = "".join(log_lines) - - # Stream queries as a log - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, log_text) - - # Check if workflow was stopped before executing queries - if await _check_workflow_stopped(interfaceDbChat, workflowId): - logger.info(f"Workflow {workflowId} was stopped, aborting query execution") - return - - # Step 2: Execute queries - queryResults = {} - webResearchResults = "" - - # Start web research early in parallel with DB queries if needed (only if enabled) - web_research_task = None - if needsWebResearch and chatbot_config.enable_web_research: - # Start with basic query (will enrich later with DB results if available) - basic_web_query = _buildWebResearchQuery(userInput.prompt, workflow.messages, None) - logger.info(f"Starting web research in parallel with DB queries using basic query: '{basic_web_query}'") - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Suche im Internet nach Informationen...") - - async def perform_web_research(): - """Perform web research and return results.""" - try: - researchResult = await services.web.performWebResearch( - prompt=basic_web_query, - urls=[], - country=None, - language=userInput.userLanguage or "de", - researchDepth="general", - operationId=None - ) - return json.dumps(researchResult, ensure_ascii=False, indent=2) if isinstance(researchResult, dict) else str(researchResult) - except Exception as e: - logger.error(f"Web research failed: {e}", exc_info=True) - return f"Web research error: {str(e)}" - - web_research_task = asyncio.create_task(perform_web_research()) - - # Check if connector is working before executing queries - if needsDatabaseQuery and sql_queries: - logger.info(f"Checking database connector before executing {len(sql_queries)} queries...") - try: - # Test connector with a simple query - test_connector = chatbot_config.get_connector_instance() - try: - # Try a simple test query to verify connector works - test_result = await test_connector.executeQuery("SELECT 1", return_json=True) - await test_connector.close() - if not test_result or test_result.get("text", "").startswith(("Error:", "Query failed:")): - raise Exception("Connector test query failed") - logger.info("Database connector test successful") - except Exception as connector_error: - await test_connector.close() - error_msg = f"Die Datenbankverbindung funktioniert derzeit nicht. Bitte versuchen Sie es später erneut. Fehler: {str(connector_error)}" - logger.error(f"Database connector test failed: {connector_error}", exc_info=True) - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: Datenbankverbindung fehlgeschlagen", log_type="error") - # Store error message as assistant response - workflow = interfaceDbChat.getWorkflow(workflowId) - message_id = f"msg_{uuid.uuid4()}" - assistantMessageData = { - "id": message_id, - "workflowId": workflowId, - "parentMessageId": userMessageId, - "message": error_msg, - "role": "assistant", - "status": "last", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": getUtcTimestamp(), - "success": False, - "roundNumber": workflow.currentRound, - "taskNumber": 0, - "actionNumber": 0 - } - assistantMessage = interfaceDbChat.createMessage(assistantMessageData) - logger.info(f"Stored error message due to connector failure: {assistantMessage.id}") - return - except Exception as e: - error_msg = f"Die Datenbankverbindung konnte nicht hergestellt werden. Bitte versuchen Sie es später erneut. Fehler: {str(e)}" - logger.error(f"Failed to initialize database connector: {e}", exc_info=True) - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: Datenbankverbindung konnte nicht hergestellt werden", log_type="error") - # Store error message as assistant response - workflow = interfaceDbChat.getWorkflow(workflowId) - message_id = f"msg_{uuid.uuid4()}" - assistantMessageData = { - "id": message_id, - "workflowId": workflowId, - "parentMessageId": userMessageId, - "message": error_msg, - "role": "assistant", - "status": "last", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": getUtcTimestamp(), - "success": False, - "roundNumber": workflow.currentRound, - "taskNumber": 0, - "actionNumber": 0 - } - assistantMessage = interfaceDbChat.createMessage(assistantMessageData) - logger.info(f"Stored error message due to connector initialization failure: {assistantMessage.id}") - return - - # Execute database queries in parallel - if needsDatabaseQuery and sql_queries: - logger.info(f"Executing {len(sql_queries)} database queries in parallel...") - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Führe {len(sql_queries)} Datenbankabfrage(n) parallel aus...") - - try: - queryResults = await _execute_queries_parallel(sql_queries, chatbot_config) - - # Log results summary - successful_queries = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")] - failed_queries = [k for k in queryResults.keys() if k.endswith("_error")] - - if successful_queries: - total_rows = sum(len(queryResults.get(f"{k}_data", [])) for k in successful_queries) - logger.info(f"Successfully executed {len(successful_queries)} query/queries, total {total_rows} rows") - await _emit_log_and_event( - interfaceDbChat, - workflowId, - event_manager, - f"Abgeschlossen: {len(successful_queries)} Abfrage(n) erfolgreich, {total_rows} Ergebnis{'e' if total_rows != 1 else ''} gefunden" - ) - - if failed_queries: - logger.warning(f"{len(failed_queries)} query/queries failed") - await _emit_log_and_event( - interfaceDbChat, - workflowId, - event_manager, - f"Warnung: {len(failed_queries)} Abfrage(n) fehlgeschlagen", - log_type="warning" - ) - - # Check if we got empty results and need to retry with alternative strategies - # Robust calculation: check all successful queries for empty data - total_rows = 0 - queries_with_results = 0 - queries_with_empty_results = 0 - - if successful_queries: - for query_key in successful_queries: - data_key = f"{query_key}_data" - if data_key in queryResults: - row_count = len(queryResults[data_key]) - total_rows += row_count - if row_count > 0: - queries_with_results += 1 - else: - queries_with_empty_results += 1 - else: - # Query succeeded but no data key - treat as empty - queries_with_empty_results += 1 - logger.debug(f"Query {query_key} succeeded but has no _data key") - else: - # No successful queries at all - logger.debug("No successful queries found") - - # Also check if we have any query results at all - has_any_results = total_rows > 0 - - # Debug logging - logger.info(f"Query results analysis: total_rows={total_rows}, successful_queries={len(successful_queries)}, " - f"queries_with_results={queries_with_results}, queries_with_empty_results={queries_with_empty_results}, " - f"failed_queries={len(failed_queries)}") - - # Trigger retry if: no results AND we have database queries AND we executed at least one query - # Also trigger if all successful queries returned empty results - # Only retry if enabled in config - should_retry = ( - chatbot_config.enable_retry_on_empty and - not has_any_results and - needsDatabaseQuery and - len(sql_queries) > 0 and - (len(successful_queries) > 0 or len(failed_queries) == 0) # Either we have successful queries or no failures (queries executed but empty) - ) - - # Iterative retry loop: try up to configured max attempts with different strategies - max_empty_retry_attempts = chatbot_config.max_retry_attempts if chatbot_config.enable_retry_on_empty else 0 - empty_retry_attempt = 0 - original_sql_queries_count = len(sql_queries) - previous_retry_rows = 0 - - while should_retry and empty_retry_attempt < max_empty_retry_attempts: - empty_retry_attempt += 1 - logger.info(f"No results found (attempt {empty_retry_attempt}/{max_empty_retry_attempts}), retrying with alternative query strategies...") - await _emit_log_and_event( - interfaceDbChat, - workflowId, - event_manager, - f"Keine Ergebnisse gefunden ({len(successful_queries)} erfolgreiche Abfrage(n), {total_rows} Zeilen). Versuch {empty_retry_attempt}/{max_empty_retry_attempts}: Versuche alternative Abfrage-Strategien...", - log_type="info" - ) - - # Retry analysis with empty results context - create NEW analysis with alternative strategies - - # Build retry prompt with progressively different strategies - empty_count = len(sql_queries) - empty_results_instructions = get_empty_results_retry_instructions(empty_count) - - retry_context = f"{context}\n\n" - if empty_retry_attempt == 1: - retry_context += "⚠️⚠️⚠️ WICHTIG - ALTERNATIVE STRATEGIEN ERFORDERLICH ⚠️⚠️⚠️\n" - retry_context += "Strategie: Breitere Suche, weniger Filter\n" - elif empty_retry_attempt == 2: - retry_context += "⚠️⚠️⚠️ KRITISCH - IMMER NOCH KEINE ERGEBNISSE ⚠️⚠️⚠️\n" - retry_context += "Strategie: Entferne spezifische Filter komplett, verwende nur Hauptkriterien\n" - else: - retry_context += "⚠️⚠️⚠️ LETZTER VERSUCH - MINIMALE FILTER ⚠️⚠️⚠️\n" - retry_context += "Strategie: Nur Hauptbegriffe, keine spezifischen Filter\n" - - retry_context += f"Die bisherigen {len(sql_queries)} Abfragen haben 0 Zeilen zurückgegeben.\n" - retry_context += f"{empty_results_instructions}\n" - retry_context += f"Dies ist bereits Versuch {empty_retry_attempt} von {max_empty_retry_attempts}!\n" - retry_context += "Erstelle JETZT MAXIMAL 5 alternative SQL-Queries mit komplett anderen Strategien (für Performance):\n" - - if empty_retry_attempt == 1: - retry_context += "- Breitere Suche ohne zu spezifische Filter\n" - retry_context += "- Suche ohne Zertifizierungsfilter (falls Zertifizierung nicht in DB)\n" - retry_context += "- Suche nur nach Hauptkriterien (z.B. nur Netzgerät + 10A, ohne einphasig)\n" - retry_context += "- Suche nach alternativen Begriffen (Netzteil statt Netzgerät, etc.)\n" - retry_context += "- COUNT-Queries für Statistik\n" - retry_context += "- Fallback-Queries mit minimalen Filtern\n" - elif empty_retry_attempt == 2: - retry_context += "- ENTFERNE alle Zertifizierungsfilter komplett\n" - retry_context += "- ENTFERNE Phasen-Filter (einphasig/dreiphasig)\n" - retry_context += "- Suche NUR nach: Netzgerät/Netzteil + Ampere-Angaben\n" - retry_context += "- Verwende breitere Ampere-Patterns (5A, 6A, 8A, 10A, 12A, 15A, 20A, etc.)\n" - retry_context += "- Suche auch in Keywords-Feld\n" - else: - retry_context += "- MINIMALE Filter: Nur 'Netzgerät' ODER 'Netzteil' ODER 'Power Supply'\n" - retry_context += "- KEINE spezifischen Filter auf Ampere, Phasen oder Zertifizierung\n" - retry_context += "- COUNT-Query: Wie viele Netzgeräte gibt es insgesamt?\n" - retry_context += "- Suche nach ALLEN verfügbaren Netzgeräten\n" - - # Retry analysis - use custom prompt from configuration (already validated at start of chatProcess) - retry_analysis_prompt = chatbot_config.custom_analysis_prompt.replace("{userPrompt}", userInput.prompt).replace("{context}", retry_context or "") - - # CRITICAL: Add explicit JSON format requirement to ensure AI returns JSON - json_format_instruction = """ - -⚠️⚠️⚠️ ABSOLUT KRITISCH - JSON-FORMAT ERFORDERLICH ⚠️⚠️⚠️ -DU MUSST DEINE ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT GEBEN! -ANTWORTE NICHT MIT NORMALEM TEXT ODER EINER CHAT-ANTWORT! -DEINE ANTWORT MUSS EIN GÜLTIGES JSON-OBJEKT SEIN! - -Erforderliches JSON-Format: -{ - "needsDatabaseQuery": true/false, - "needsWebResearch": true/false, - "sqlQueries": [ - { - "query": "SQL-Abfrage hier", - "purpose": "Zweck der Abfrage", - "table": "Haupttabelle" - } - ], - "reasoning": "Begründung für die Abfragen" -} - -⚠️⚠️⚠️ KRITISCH - WANN DATENBANKABFRAGE ERFORDERLICH ⚠️⚠️⚠️ -SETZE "needsDatabaseQuery": true, WENN: -- Der Nutzer nach Artikeln, Produkten, Preisen, Lagerbeständen, Lieferanten fragt -- Der Nutzer nach Informationen aus der Datenbank fragt (auch allgemeine Fragen!) -- Der Nutzer eine Frage stellt, die mit Datenbank-Daten beantwortet werden kann -- Du dir nicht sicher bist - dann setze "needsDatabaseQuery": true und führe eine allgemeine Abfrage durch! - -VERBOTEN: -- "needsDatabaseQuery": false setzen, nur weil die Frage allgemein klingt -- "needsDatabaseQuery": false setzen, ohne zu prüfen, ob Datenbank-Daten helfen könnten -- Chat-Antworten geben statt Datenbankabfragen durchzuführen - -WICHTIG: -- Antworte NUR mit dem JSON-Objekt, KEIN zusätzlicher Text davor oder danach! -- KEINE Erklärungen, KEINE Begrüßungen, KEINE Chat-Antworten! -- NUR das JSON-Objekt! -- Bei Unsicherheit: IMMER "needsDatabaseQuery": true setzen! -""" - retry_analysis_prompt = retry_analysis_prompt + json_format_instruction - logger.info("Using custom analysis prompt for retry from instance config with JSON format requirement") - - # AI call for retry analysis - retry_analysis_result = await method_ai.process({ - "aiPrompt": retry_analysis_prompt, - "documentList": None, - "resultType": "json", - "simpleMode": True - }) - - # Extract retry analysis - retry_analysis_content = None - if retry_analysis_result.success and retry_analysis_result.documents: - retry_analysis_content = retry_analysis_result.documents[0].documentData - if isinstance(retry_analysis_content, bytes): - retry_analysis_content = retry_analysis_content.decode('utf-8') - - if retry_analysis_content: - retry_analysis = _extractJsonFromResponse(retry_analysis_content) - if retry_analysis is None: - logger.warning("Failed to extract JSON from retry analysis response") - retry_analysis = {} - if retry_analysis and retry_analysis.get("needsDatabaseQuery", False): - retry_sql_queries = retry_analysis.get("sqlQueries", []) - # Limit to maximum 5 queries for performance - if len(retry_sql_queries) > 5: - logger.info(f"Limiting retry queries from {len(retry_sql_queries)} to 5 for performance") - retry_sql_queries = retry_sql_queries[:5] - if retry_sql_queries: - logger.info(f"Executing {len(retry_sql_queries)} retry queries (attempt {empty_retry_attempt}) with alternative strategies...") - await _emit_log_and_event( - interfaceDbChat, - workflowId, - event_manager, - f"Führe {len(retry_sql_queries)} alternative Abfrage(n) mit anderen Strategien aus (Versuch {empty_retry_attempt})...", - log_type="info" - ) - - # Execute retry queries - try: - retry_results = await _execute_queries_parallel(retry_sql_queries, chatbot_config) - - # Merge retry results into main results (renumber to continue sequence) - base_query_num = len(sql_queries) - for key, value in retry_results.items(): - if key.startswith("query_"): - # Extract query number from retry result - try: - query_num = int(key.split("_")[1]) - new_query_num = base_query_num + query_num - new_key = f"query_{new_query_num}" - - if not key.endswith("_data") and not key.endswith("_error"): - queryResults[new_key] = value - if f"{key}_data" in retry_results: - queryResults[f"{new_key}_data"] = retry_results[f"{key}_data"] - elif key.endswith("_error"): - queryResults[f"{new_key}_error"] = value - except (ValueError, IndexError): - # Fallback if parsing fails - new_key = f"query_{base_query_num + 1}" - if not key.endswith("_data") and not key.endswith("_error"): - queryResults[new_key] = value - - # Recalculate results after retry - retry_successful = [k for k in retry_results.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")] - retry_rows = sum(len(retry_results.get(f"{k}_data", [])) for k in retry_successful) if retry_successful else 0 - - # Update successful_queries list to include retry results - successful_queries = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")] - total_rows = sum(len(queryResults.get(f"{k}_data", [])) for k in successful_queries) - - logger.info(f"Retry attempt {empty_retry_attempt}: Found {retry_rows} rows from {len(retry_successful)} queries. Total: {total_rows} rows from {len(successful_queries)} queries") - - if retry_rows > 0: - # Success! Found results - await _emit_log_and_event( - interfaceDbChat, - workflowId, - event_manager, - f"Alternative Abfragen erfolgreich: {len(retry_successful)} Abfrage(n) mit {retry_rows} Ergebnis{'en' if retry_rows != 1 else ''} gefunden", - log_type="info" - ) - should_retry = False # Stop retry loop, we found results - break - elif retry_rows > previous_retry_rows: - # Made some progress (found more rows than before) - continue - previous_retry_rows = retry_rows - await _emit_log_and_event( - interfaceDbChat, - workflowId, - event_manager, - f"Versuch {empty_retry_attempt}: Fortschritt erzielt ({retry_rows} Zeilen gefunden). Versuche weitere Strategie...", - log_type="info" - ) - else: - # No progress made - stop retrying - await _emit_log_and_event( - interfaceDbChat, - workflowId, - event_manager, - f"Versuch {empty_retry_attempt}: Keine Ergebnisse gefunden. Beende Retry-Versuche.", - log_type="warning" - ) - should_retry = False # Stop retry loop, no progress - break - except Exception as retry_error: - logger.error(f"Error executing retry queries (attempt {empty_retry_attempt}): {retry_error}", exc_info=True) - # Continue to next attempt even on error - - # Check if we should continue retrying (already handled in break conditions above) - if empty_retry_attempt >= max_empty_retry_attempts: - logger.warning(f"Reached maximum empty retry attempts ({max_empty_retry_attempts}), stopping retry loop") - await _emit_log_and_event( - interfaceDbChat, - workflowId, - event_manager, - f"⚠️ Maximale Anzahl Retry-Versuche ({max_empty_retry_attempts}) erreicht. Keine Ergebnisse gefunden.", - log_type="warning" - ) - should_retry = False - except Exception as e: - logger.error(f"Error executing parallel queries: {e}") - queryResults["error"] = f"Error executing queries: {str(e)}" - await _emit_log_and_event( - interfaceDbChat, - workflowId, - event_manager, - "Fehler bei parallelen Datenbankabfragen", - log_type="error" - ) - - # Wait for web research to complete (if it was started in parallel) - if web_research_task: - try: - webResearchResults = await web_research_task - if webResearchResults and not webResearchResults.startswith("Web research error"): - logger.info("Web research completed successfully") - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche abgeschlossen") - else: - logger.warning("Web research completed with errors") - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche fehlgeschlagen", log_type="warning") - except Exception as e: - logger.error(f"Error waiting for web research: {e}", exc_info=True) - webResearchResults = f"Web research error: {str(e)}" - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche fehlgeschlagen", log_type="warning") - - # Check if workflow was stopped before generating final answer - if await _check_workflow_stopped(interfaceDbChat, workflowId): - logger.info(f"Workflow {workflowId} was stopped, aborting final answer generation") - return - - # Step 3: Generate final answer using AI - logger.info("Generating final answer with AI...") - await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Formuliere finale Antwort...") - - # Build prompt for final answer - use custom prompt from configuration (already validated at start of chatProcess) - system_prompt = chatbot_config.custom_final_answer_prompt - logger.info("Using custom final answer prompt from instance config") - - # Build answer context with query results using efficient list-based building - answer_context_parts = [f"User question: {userInput.prompt}{context}\n"] - - # Add database results - organize by query with metadata - db_results_parts = [] - if queryResults: - successful_results = [] - error_results = [] - - # Extract query metadata from sql_queries if available - query_metadata = {} - if sql_queries: - for i, q_info in enumerate(sql_queries, 1): - query_metadata[f"query_{i}"] = { - "purpose": q_info.get("purpose", f"Query {i}"), - "table": q_info.get("table", "Unknown") - } - - # Organize results by query number - query_numbers = set() - for key in queryResults.keys(): - if key.startswith("query_") and not key.endswith("_data"): - # Extract query number (e.g., "query_1" -> 1) - try: - num = int(key.split("_")[1]) - query_numbers.add(num) - except (ValueError, IndexError): - pass - - # Build results with metadata - for query_num in sorted(query_numbers): - query_key = f"query_{query_num}" - error_key = f"{query_key}_error" - - if error_key in queryResults: - error_msg = queryResults[error_key] - metadata = query_metadata.get(query_key, {}) - purpose = metadata.get("purpose", f"Query {query_num}") - table = metadata.get("table", "Unknown") - error_results.append(f"Abfrage {query_num} ({table} - {purpose}): {error_msg}") - elif query_key in queryResults: - result_text = queryResults[query_key] - metadata = query_metadata.get(query_key, {}) - purpose = metadata.get("purpose", f"Query {query_num}") - table = metadata.get("table", "Unknown") - successful_results.append(f"=== Abfrage {query_num}: {purpose} (Tabelle: {table}) ===\n{result_text}") - - # Handle general error if present - if "error" in queryResults: - error_results.append(f"Allgemeiner Fehler: {queryResults['error']}") - - # Build db_results_part efficiently - if successful_results: - db_results_parts.append("\n\nDATENBANK-ERGEBNISSE:\n") - db_results_parts.append("\n\n".join(successful_results)) - answer_context_parts.append("DATENBANK-ERGEBNISSE:\n") - answer_context_parts.append("\n\n".join(successful_results)) - answer_context_parts.append("\n") - - if error_results: - db_results_parts.append("\n\nDATENBANK-FEHLER:\n") - db_results_parts.append("\n".join(error_results)) - answer_context_parts.append("DATENBANK-FEHLER:\n") - answer_context_parts.append("\n".join(error_results)) - answer_context_parts.append("\n") - - db_results_part = "".join(db_results_parts) - - # Add web research results - web_results_part = "" - # Check if web research results exist and are valid (not empty and not an error) - if webResearchResults and webResearchResults.strip() and not webResearchResults.startswith("Web research error"): - web_results_part = f"\n\nINTERNET-RECHERCHE:\n{webResearchResults}" - answer_context_parts.append(f"INTERNET-RECHERCHE:\n{webResearchResults}\n") - - # Join answer context efficiently - answerContext = "".join(answer_context_parts) - - # Check if we have any actual data - successful_query_keys = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")] - has_query_results = bool(successful_query_keys) - error_query_keys = [k for k in queryResults.keys() if k.endswith("_error")] - has_only_errors = bool(error_query_keys and not successful_query_keys) - - # Count total number of articles found across all queries - total_articles_found = 0 - if successful_query_keys: - for query_key in successful_query_keys: - data_key = f"{query_key}_data" - if data_key in queryResults: - article_count = len(queryResults[data_key]) - total_articles_found += article_count - logger.info(f"Query {query_key} returned {article_count} articles") - - logger.info(f"Total articles found across all queries: {total_articles_found}") - - # Add explicit article count information to prompt (using efficient list building) - if total_articles_found > 0: - article_count_parts = [ - "\n\n⚠️⚠️⚠️ WICHTIG - ARTIKELANZAHL ⚠️⚠️⚠️\n", - f"In den DATENBANK-ERGEBNISSEN oben wurden INSGESAMT {total_articles_found} Artikel gefunden.\n", - f"DU MUSST ALLE {total_articles_found} Artikel in deiner Antwort zeigen!\n" - ] - if total_articles_found <= 20: - article_count_parts.append(f"Zeige ALLE {total_articles_found} Artikel in einer Tabelle.\n") - else: - article_count_parts.append(f"Zeige die ersten 20 Artikel in einer Tabelle + Hinweis auf weitere {total_articles_found - 20} Artikel.\n") - article_count_parts.extend([ - f"❌ VERBOTEN: Nur einen Artikel zu zeigen, wenn {total_articles_found} gefunden wurden!\n", - f"✓ OBLIGATORISCH: Zeige ALLE {total_articles_found} Artikel!\n" - ]) - article_count_info = "".join(article_count_parts) - - if db_results_part: - db_results_part = article_count_info + db_results_part - else: - db_results_part = article_count_info - - # Add warning messages if needed (using efficient list building) - warning_parts = [] - if not has_query_results and needsDatabaseQuery: - warning_parts.append("\n\n⚠️⚠️⚠️ WICHTIG - DATENBANKABFRAGE AUSGEFÜHRT ⚠️⚠️⚠️\n") - warning_parts.append("Die Datenbankabfrage wurde AUSGEFÜHRT, hat aber KEINE Ergebnisse zurückgegeben.\n") - warning_parts.append("DU HAST ZUGRIFF AUF DIE DATENBANK - die Abfrage wurde durchgeführt!\n") - warning_parts.append("Antworte dem Nutzer: 'Es wurden keine Artikel gefunden' oder 'Keine passenden Artikel in der Datenbank gefunden'\n") - warning_parts.append("VERBOTEN: Sage NIEMALS 'Ich habe keinen Zugriff' oder 'Ich kann nicht auf die Datenbank zugreifen'!\n") - warning_parts.append("VERBOTEN: Sage NIEMALS 'Es tut mir leid, aber ich habe keinen Zugriff auf die Datenbank'!\n") - warning_parts.append("Die Datenbank wurde durchsucht, es wurden nur keine passenden Artikel gefunden.\n") - - if has_only_errors: - warning_parts.extend([ - "\n\n⚠️⚠️⚠️ KRITISCH - ALLE QUERIES FEHLGESCHLAGEN ⚠️⚠️⚠️\n", - "ALLE Datenbankabfragen sind fehlgeschlagen. Es gibt KEINE gültigen Daten aus der Datenbank.\n", - "DU DARFST KEINE DATEN ERFINDEN! Schreibe stattdessen: 'Es wurden keine Artikel gefunden' oder 'Die Datenbankabfrage ist fehlgeschlagen'." - ]) - - if warning_parts: - db_results_part = db_results_part + "".join(warning_parts) if db_results_part else "".join(warning_parts) - - # Determine if we have database results, web results, or both - has_db_results = bool(db_results_part and db_results_part.strip()) - has_web_results = bool(web_results_part and web_results_part.strip()) - - # Add explicit instruction that AI has database access (if connectors are configured) - access_instruction = "" - if chatbot_config.connector_types and len(chatbot_config.connector_types) > 0: - access_instruction = "\n\n⚠️⚠️⚠️ ABSOLUT KRITISCH - DATENBANK-ZUGRIFF VERFÜGBAR ⚠️⚠️⚠️\n" - access_instruction += "DU HAST ZUGRIFF AUF DIE DATENBANK UND KANNST SQL-ABFRAGEN AUSFÜHREN!\n" - access_instruction += "Die Datenbankabfrage wurde bereits AUSGEFÜHRT (siehe DATENBANK-ERGEBNISSE oben)!\n" - access_instruction += "\nVERBOTEN - NIEMALS SAGEN:\n" - access_instruction += "- 'Ich habe keinen Zugriff auf die Datenbank'\n" - access_instruction += "- 'Ich kann nicht auf die Datenbank zugreifen'\n" - access_instruction += "- 'Es tut mir leid, aber ich habe keinen Zugriff'\n" - access_instruction += "- 'Ich habe keinen Zugriff auf Echtzeit-Datenbanken'\n" - access_instruction += "- Jede andere Formulierung, die suggeriert, dass du keinen Zugriff hast!\n" - access_instruction += "\nRICHTIG - WENN KEINE ERGEBNISSE:\n" - access_instruction += "- 'Es wurden keine Artikel gefunden'\n" - access_instruction += "- 'Keine passenden Artikel in der Datenbank gefunden'\n" - access_instruction += "- 'Die Datenbanksuche ergab keine Treffer'\n" - access_instruction += "- 'Ich habe in der Datenbank gesucht, aber keine passenden Artikel gefunden'\n" - access_instruction += "\nWICHTIG: Die Datenbank wurde durchsucht - es wurden nur keine passenden Artikel gefunden!\n" - - if chatbot_config.enable_web_research: - access_instruction += "\n⚠️⚠️⚠️ ABSOLUT KRITISCH - INTERNET-ZUGRIFF VERFÜGBAR ⚠️⚠️⚠️\n" - access_instruction += "DU HAST ZUGRIFF AUF DAS INTERNET (Tavily)!\n" - access_instruction += "VERBOTEN: Sage NIEMALS, dass du keinen Zugriff auf das Internet hast!\n" - access_instruction += "VERBOTEN: Sage NIEMALS 'Ich habe keinen Zugriff auf das Internet'!\n" - - # Build the final answer prompt using custom system prompt from config - answer_prompt = _build_final_answer_prompt_with_results( - system_prompt + access_instruction, - userInput.prompt, - context, - db_results_part, - web_results_part, - is_resumed, - has_db_results, - has_web_results - ) - - answerRequest = AiCallRequest( - prompt=answer_prompt, - context=answerContext if (queryResults or webResearchResults) else None, - options=AiCallOptions( - resultFormat="txt", - operationType=OperationTypeEnum.DATA_ANALYSE, - processingMode=ProcessingModeEnum.DETAILED - ) - ) - - # Double-check workflow wasn't stopped right before AI call - if await _check_workflow_stopped(interfaceDbChat, workflowId): - logger.info(f"Workflow {workflowId} was stopped before final answer AI call, aborting") - return - - answerResponse = await services.ai.callAi(answerRequest) - - # Check immediately after AI call completes - if stopped, abort without processing or storing - if await _check_workflow_stopped(interfaceDbChat, workflowId): - logger.info(f"Workflow {workflowId} was stopped during final answer AI call, aborting without storing message") - return - - # Check for errors in AI response - if answerResponse.errorCount > 0: - logger.error(f"AI call failed with errorCount={answerResponse.errorCount}: {answerResponse.content}") - finalAnswer = "Entschuldigung, ich konnte Ihre Anfrage derzeit nicht verarbeiten. Bitte versuchen Sie es später erneut." - else: - finalAnswer = answerResponse.content - logger.info("Final answer generated") - - # Check again after generating answer (in case it was stopped while generating) - if await _check_workflow_stopped(interfaceDbChat, workflowId): - logger.info(f"Workflow {workflowId} was stopped after final answer generation, not storing message") - return - - # Reload workflow to get current message count - workflow = interfaceDbChat.getWorkflow(workflowId) - - # Double-check workflow wasn't stopped while we were reloading - if workflow and workflow.status == "stopped": - logger.info(f"Workflow {workflowId} was stopped, not storing final message") - return - - # Create assistant message with final answer - message_id = f"msg_{uuid.uuid4()}" - assistantMessageData = { - "id": message_id, - "workflowId": workflowId, - "parentMessageId": userMessageId, - "message": finalAnswer, - "role": "assistant", - "status": "last", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": getUtcTimestamp(), - "success": answerResponse.errorCount == 0 if answerResponse else True, - "roundNumber": workflow.currentRound, - "taskNumber": 0, - "actionNumber": 0 - } - - assistantMessage = interfaceDbChat.createMessage(assistantMessageData) - logger.info(f"Stored assistant message with final answer: {assistantMessage.id}") - - # Emit message event for streaming (exact chatData format) - message_timestamp = parseTimestamp(assistantMessage.publishedAt, default=getUtcTimestamp()) - await event_manager.emit_event( - context_id=workflowId, - event_type="chatdata", - data={ - "type": "message", - "createdAt": message_timestamp, - "item": assistantMessage.dict() - }, - event_category="chat" - ) - - # Update workflow status to completed (only if not stopped) - if not await _check_workflow_stopped(interfaceDbChat, workflowId): - interfaceDbChat.updateWorkflow(workflowId, { - "status": "completed", - "lastActivity": getUtcTimestamp() - }) - else: - logger.info(f"Workflow {workflowId} was stopped, not updating status to completed") - - logger.info(f"Chatbot processing completed for workflow {workflowId}, generated {len(queries)} queries and final answer") - - # Emit completion event only if workflow wasn't stopped - if not await _check_workflow_stopped(interfaceDbChat, workflowId): - await event_manager.emit_event( - context_id=workflowId, - event_type="complete", - data={"workflowId": workflowId}, - event_category="workflow", - message="Chatbot-Verarbeitung abgeschlossen", - step="complete" - ) - - # Schedule cleanup with longer delay to allow stream to stay open - await event_manager.cleanup(workflowId, delay=300.0) # 5 minutes delay - - except Exception as e: - logger.error(f"Error processing chatbot message: {str(e)}", exc_info=True) - - # Check if workflow was stopped - if so, don't store error message - if await _check_workflow_stopped(interfaceDbChat, workflowId): - logger.info(f"Workflow {workflowId} was stopped, not storing error message") - return - - # Store error message - try: - # Reload workflow to get current message count - workflow = interfaceDbChat.getWorkflow(workflowId) - - # Double-check workflow wasn't stopped while we were reloading - if workflow and workflow.status == "stopped": - logger.info(f"Workflow {workflowId} was stopped, not storing error message") - return - - errorMessageData = { - "id": f"msg_{uuid.uuid4()}", - "workflowId": workflowId, - "parentMessageId": userMessageId, - "message": f"Sorry, I encountered an error: {str(e)}", - "role": "assistant", - "status": "last", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": getUtcTimestamp(), - "success": False, - "roundNumber": workflow.currentRound if workflow else 1, - "taskNumber": 0, - "actionNumber": 0 - } - errorMessage = interfaceDbChat.createMessage(errorMessageData) - - # Emit message event for streaming (exact chatData format) - message_timestamp = parseTimestamp(errorMessage.publishedAt, default=getUtcTimestamp()) - await event_manager.emit_event( - context_id=workflowId, - event_type="chatdata", - data={ - "type": "message", - "createdAt": message_timestamp, - "item": errorMessage.dict() - }, - event_category="chat" - ) - - # Update workflow status to error (only if not stopped) - if not await _check_workflow_stopped(interfaceDbChat, workflowId): - interfaceDbChat.updateWorkflow(workflowId, { - "status": "error", - "lastActivity": getUtcTimestamp() - }) - else: - logger.info(f"Workflow {workflowId} was stopped, not updating status to error") - - # Schedule cleanup - await event_manager.cleanup(workflowId) - except Exception as storeError: - logger.error(f"Error storing error message: {storeError}") diff --git a/modules/features/chatbot/routeFeatureChatbot.py b/modules/features/chatbot/routeFeatureChatbot.py index e6e9c626..290df48e 100644 --- a/modules/features/chatbot/routeFeatureChatbot.py +++ b/modules/features/chatbot/routeFeatureChatbot.py @@ -2,7 +2,7 @@ # All rights reserved. """ Chatbot routes for the backend API. -Implements simple chatbot endpoints using direct AI center calls via chatbot feature. +Implements chatbot endpoints using LangGraph-based conversation workflows. """ import logging @@ -32,9 +32,6 @@ from modules.datamodels.datamodelPagination import PaginationParams, PaginatedRe from . import chatProcess from .eventManager import get_event_manager -# Import workflow control functions -from modules.workflows.automation import chatStop - # Configure logger logger = logging.getLogger(__name__) @@ -378,83 +375,9 @@ async def stop_chatbot( detail=str(e) ) -# Delete chatbot workflow endpoint -@router.delete("/{instanceId}/{workflowId}", response_model=Dict[str, Any]) -@limiter.limit("120/minute") -async def delete_chatbot( - request: Request, - instanceId: str = Path(..., description="Feature Instance ID"), - workflowId: str = Path(..., description="ID of the workflow to delete"), - context: RequestContext = Depends(getRequestContext) -) -> Dict[str, Any]: - """Deletes a chatbot workflow and its associated data.""" - # Validate instance access - mandateId = await _validateInstanceAccess(instanceId, context) - - try: - # Get service center - interfaceDbChat = _getServiceChat(context, instanceId) - - # Check workflow access and permission using RBAC - workflows = getRecordsetWithRBAC( - interfaceDbChat.db, - ChatWorkflow, - context.user, - recordFilter={"id": workflowId} - ) - if not workflows: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=f"Workflow with ID {workflowId} not found" - ) - - workflow_data = workflows[0] - - # Check if workflow is a chatbot workflow - if workflow_data.get("workflowMode") != WorkflowModeEnum.WORKFLOW_CHATBOT.value: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=f"Workflow {workflowId} is not a chatbot workflow" - ) - - # Verify workflow belongs to this instance - workflow_instance_id = workflow_data.get("featureInstanceId") - if workflow_instance_id != instanceId: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail=f"Workflow {workflowId} does not belong to instance '{instanceId}'" - ) - - # Check if user has permission to delete using RBAC - if not interfaceDbChat.checkRbacPermission(ChatWorkflow, "delete", workflowId): - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="You don't have permission to delete this workflow" - ) - - # Delete workflow - success = interfaceDbChat.deleteWorkflow(workflowId) - - if not success: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Failed to delete workflow" - ) - - return { - "id": workflowId, - "message": "Chatbot workflow and associated data deleted successfully" - } - except HTTPException: - raise - except Exception as e: - logger.error(f"Error in delete_chatbot: {str(e)}", exc_info=True) - raise HTTPException( - status_code=500, - detail=f"Error deleting chatbot workflow: {str(e)}" - ) - # List chatbot threads/workflows or get specific thread details +# NOTE: This route MUST be defined BEFORE /{instanceId}/{workflowId} routes +# to prevent "threads" from being matched as a workflowId @router.get("/{instanceId}/threads") @limiter.limit("120/minute") async def get_chatbot_threads( @@ -583,3 +506,80 @@ async def get_chatbot_threads( status_code=500, detail=f"Error getting chatbot threads: {str(e)}" ) + +# Delete chatbot workflow endpoint +# NOTE: This catch-all route MUST be defined AFTER more specific routes like /threads +@router.delete("/{instanceId}/{workflowId}", response_model=Dict[str, Any]) +@limiter.limit("120/minute") +async def delete_chatbot( + request: Request, + instanceId: str = Path(..., description="Feature Instance ID"), + workflowId: str = Path(..., description="ID of the workflow to delete"), + context: RequestContext = Depends(getRequestContext) +) -> Dict[str, Any]: + """Deletes a chatbot workflow and its associated data.""" + # Validate instance access + mandateId = await _validateInstanceAccess(instanceId, context) + + try: + # Get service center + interfaceDbChat = _getServiceChat(context, instanceId) + + # Check workflow access and permission using RBAC + workflows = getRecordsetWithRBAC( + interfaceDbChat.db, + ChatWorkflow, + context.user, + recordFilter={"id": workflowId} + ) + if not workflows: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Workflow with ID {workflowId} not found" + ) + + workflow_data = workflows[0] + + # Check if workflow is a chatbot workflow + if workflow_data.get("workflowMode") != WorkflowModeEnum.WORKFLOW_CHATBOT.value: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Workflow {workflowId} is not a chatbot workflow" + ) + + # Verify workflow belongs to this instance + workflow_instance_id = workflow_data.get("featureInstanceId") + if workflow_instance_id != instanceId: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail=f"Workflow {workflowId} does not belong to instance '{instanceId}'" + ) + + # Check if user has permission to delete using RBAC + if not interfaceDbChat.checkRbacPermission(ChatWorkflow, "delete", workflowId): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="You don't have permission to delete this workflow" + ) + + # Delete workflow + success = interfaceDbChat.deleteWorkflow(workflowId) + + if not success: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to delete workflow" + ) + + return { + "id": workflowId, + "message": "Chatbot workflow and associated data deleted successfully" + } + except HTTPException: + raise + except Exception as e: + logger.error(f"Error in delete_chatbot: {str(e)}", exc_info=True) + raise HTTPException( + status_code=500, + detail=f"Error deleting chatbot workflow: {str(e)}" + ) diff --git a/requirements.txt b/requirements.txt index 63856d09..5c018257 100644 --- a/requirements.txt +++ b/requirements.txt @@ -114,4 +114,5 @@ fiona>=1.9.0 # Required by geopandas for reading GeoPackage files langchain>=0.1.0 langchain-core>=0.1.0 langgraph>=0.0.20 -langchain-tavily>=0.0.1 \ No newline at end of file +langchain-tavily>=0.0.1 +nest-asyncio>=1.6.0 # For running async code in sync context (LangGraph compatibility) \ No newline at end of file