gateway/modules/features/chatbot/mainChatbot.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Simple chatbot feature - basic implementation.
User input is processed by AI to create list of needed queries.
Those queries get streamed back.
"""

import logging
import json
import uuid
import asyncio
import re
from typing import Optional, Dict, Any, List

from modules.datamodels.datamodelChat import ChatWorkflow, UserInputRequest, WorkflowModeEnum, ChatLog
from modules.datamodels.datamodelUam import User
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, ProcessingModeEnum
from modules.shared.timeUtils import getUtcTimestamp, parseTimestamp
from modules.services import getInterface as getServices
from modules.features.chatbot.eventManager import get_event_manager
from modules.workflows.methods.methodAi.methodAi import MethodAi
from modules.connectors.connectorPreprocessor import PreprocessorConnector
from modules.features.chatbot.chatbotConstants import (
    get_initial_analysis_prompt,
    generate_conversation_name,
    get_final_answer_system_prompt
)

logger = logging.getLogger(__name__)


def _extractJsonFromResponse(content: str) -> Optional[dict]:
    """Extract JSON from AI response, handling markdown code blocks."""
    # Try direct JSON parse first
    try:
        return json.loads(content.strip())
    except json.JSONDecodeError:
        pass

    # Try to extract JSON from markdown code blocks
    json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', content, re.DOTALL)
    if json_match:
        try:
            return json.loads(json_match.group(1))
        except json.JSONDecodeError:
            pass

    # Try to find JSON object in the text
    json_match = re.search(r'\{.*\}', content, re.DOTALL)
    if json_match:
        try:
            return json.loads(json_match.group(0))
        except json.JSONDecodeError:
            pass

    return None


async def chatProcess(
    currentUser: User,
    userInput: UserInputRequest,
    workflowId: Optional[str] = None
) -> ChatWorkflow:
    """
    Simple chatbot processing - analyze user input and generate queries.

    Flow:
    1. Create or load workflow
    2. Store user message
    3. AI analyzes user input to create list of needed queries
    4. Stream queries back

    Args:
        currentUser: Current user
        userInput: User input request
        workflowId: Optional workflow ID to continue existing conversation

    Returns:
        ChatWorkflow instance
    """
    try:
        # Get services
        services = getServices(currentUser, None)
        interfaceDbChat = services.interfaceDbChat

        # Get event manager and create queue if needed
        event_manager = get_event_manager()

        # Create or load workflow
        if workflowId:
            workflow = interfaceDbChat.getWorkflow(workflowId)
            if not workflow:
                raise ValueError(f"Workflow {workflowId} not found")

            # Resume workflow: increment round number
            new_round = workflow.currentRound + 1
            interfaceDbChat.updateWorkflow(workflowId, {
                "status": "running",
                "currentRound": new_round,
                "lastActivity": getUtcTimestamp()
            })
            workflow = interfaceDbChat.getWorkflow(workflowId)
            logger.info(f"Resumed workflow {workflowId}, round incremented to {new_round}")

            # Create event queue if it doesn't exist (for streaming)
            if not event_manager.has_queue(workflowId):
                event_manager.create_queue(workflowId)
        else:
            # Generate conversation name based on user's prompt
            conversation_name = await generate_conversation_name(
                services,
                userInput.prompt,
                userInput.userLanguage
            )

            # Create new workflow
            workflowData = {
                "id": str(uuid.uuid4()),
                "mandateId": currentUser.mandateId,
                "status": "running",
                "name": conversation_name,
                "currentRound": 1,
                "currentTask": 0,
                "currentAction": 0,
                "totalTasks": 0,
                "totalActions": 0,
                "workflowMode": WorkflowModeEnum.WORKFLOW_CHATBOT.value,
                "startedAt": getUtcTimestamp(),
                "lastActivity": getUtcTimestamp()
            }
            workflow = interfaceDbChat.createWorkflow(workflowData)
            logger.info(f"Created new chatbot workflow: {workflow.id} with name: {conversation_name}")

            # Create event queue for new workflow (for streaming)
            event_manager.create_queue(workflow.id)

        # Reload workflow to get current message count
        workflow = interfaceDbChat.getWorkflow(workflow.id)

        # Store user message
        userMessageData = {
            "id": f"msg_{uuid.uuid4()}",
            "workflowId": workflow.id,
            "message": userInput.prompt,
            "role": "user",
            "status": "first" if workflowId is None else "step",
            "sequenceNr": len(workflow.messages) + 1,
            "publishedAt": getUtcTimestamp(),
            "roundNumber": workflow.currentRound,
            "taskNumber": 0,
            "actionNumber": 0
        }

        userMessage = interfaceDbChat.createMessage(userMessageData)
        logger.info(f"Stored user message: {userMessage.id}")

        # Emit message event for streaming (exact chatData format)
        message_timestamp = parseTimestamp(userMessage.publishedAt, default=getUtcTimestamp())
        await event_manager.emit_event(
            context_id=workflow.id,
            event_type="chatdata",
            data={
                "type": "message",
                "createdAt": message_timestamp,
                "item": userMessage.dict()
            },
            event_category="chat"
        )

        # Update workflow status
        interfaceDbChat.updateWorkflow(workflow.id, {
            "status": "running",
            "lastActivity": getUtcTimestamp()
        })

        # Process in background (async)
        asyncio.create_task(_processChatbotMessage(
            services,
            workflow.id,
            userInput,
            userMessage.id
        ))

        # Reload workflow to include new message
        workflow = interfaceDbChat.getWorkflow(workflow.id)
        return workflow

    except Exception as e:
        logger.error(f"Error in chatProcess: {str(e)}", exc_info=True)
        raise


async def _execute_queries_parallel(queries: List[Dict[str, Any]]) -> Dict[str, Any]:
    """
    Execute multiple SQL queries in parallel.

    Args:
        queries: List of query dictionaries, each containing:
            - "query": SQL query string
            - "purpose": Description of what the query retrieves
            - "table": Primary table name

    Returns:
        Dictionary mapping query indices to results:
        - "query_1", "query_2", etc.: Success result text
        - "query_1_data", "query_2_data", etc.: Raw data arrays
        - "query_1_error", "query_2_error", etc.: Error messages if query failed
    """
    async def execute_single_query(idx: int, query_info: Dict[str, Any]):
        """Execute a single query and return result."""
        connector = PreprocessorConnector()
        try:
            query_text = query_info.get("query", "")
            result = await connector.executeQuery(query_text, return_json=True)
            await connector.close()
            return idx, result, None
        except Exception as e:
            await connector.close()
            return idx, None, str(e)

    # Execute all queries in parallel
    tasks = [execute_single_query(i, q) for i, q in enumerate(queries)]
    results = await asyncio.gather(*tasks, return_exceptions=True)

    # Process results into dictionary
    query_results = {}
    for result in results:
        if isinstance(result, Exception):
            # Handle exceptions from gather
            logger.error(f"Exception in parallel query execution: {result}")
            continue

        idx, result_data, error = result

        if error:
            query_results[f"query_{idx+1}_error"] = error
            logger.error(f"Query {idx+1} failed: {error}")
        else:
            if result_data and not result_data.get("text", "").startswith(("Error:", "Query failed:")):
                query_results[f"query_{idx+1}"] = result_data.get("text", "")
                query_results[f"query_{idx+1}_data"] = result_data.get("data", [])
                row_count = len(result_data.get('data', []))
                logger.info(f"Query {idx+1} executed successfully, returned {row_count} rows")
            else:
                error_text = result_data.get("text", "Query failed") if result_data else "Query failed: No response"
                query_results[f"query_{idx+1}_error"] = error_text
                logger.error(f"Query {idx+1} failed: {error_text}")

    return query_results


async def _emit_log_and_event(
    interfaceDbChat,
    workflowId: str,
    event_manager,
    message: str,
    log_type: str = "info",
    status: str = "running",
    round_number: Optional[int] = None
) -> None:
    """
    Store log in database. The route's periodic chat data fetch will handle emitting it.
    This avoids duplicate log emissions.

    Args:
        interfaceDbChat: Database interface
        workflowId: Workflow ID
        event_manager: Event manager (unused, kept for compatibility)
        message: Log message
        log_type: Log type (info, warning, error)
        status: Status string
        round_number: Optional round number (will be fetched from workflow if not provided)
    """
    try:
        # Get round number from workflow if not provided
        if round_number is None:
            workflow = interfaceDbChat.getWorkflow(workflowId)
            if workflow:
                round_number = workflow.currentRound

        log_timestamp = getUtcTimestamp()
        log_data = {
            "id": f"log_{uuid.uuid4()}",
            "workflowId": workflowId,
            "message": message,
            "type": log_type,
            "timestamp": log_timestamp,
            "status": status,
            "roundNumber": round_number
        }
        # Only store in database - route's periodic fetch will emit it
        interfaceDbChat.createLog(log_data)
    except Exception as e:
        logger.error(f"Error storing log: {e}")


async def _check_workflow_stopped(interfaceDbChat, workflowId: str) -> bool:
    """
    Check if workflow was stopped.

    Args:
        interfaceDbChat: Database interface
        workflowId: Workflow ID

    Returns:
        True if workflow is stopped, False otherwise
    """
    try:
        workflow = interfaceDbChat.getWorkflow(workflowId)
        return workflow and workflow.status == "stopped"
    except Exception as e:
        logger.warning(f"Error checking workflow status: {e}")
        return False


def _buildWebResearchQuery(userPrompt: str, workflowMessages: List, queryResults: Optional[Dict[str, Any]] = None) -> str:
    """
    Build enriched web research query by extracting product context from conversation history and current prompt.

    Extracts product information from:
    1. Current user prompt (article numbers, product mentions)
    2. Database query results (if available)
    3. Previous assistant messages (conversation history)

    Args:
        userPrompt: Current user prompt
        workflowMessages: List of workflow messages (conversation history)
        queryResults: Optional database query results to extract product info from

    Returns:
        Enriched search query string
    """
    # Normalize user prompt for detection
    prompt_lower = userPrompt.lower().strip()

    # Patterns that indicate a search request
    search_patterns = [
        "ja", "yes", "oui", "si",
        "such", "suche", "search", "recherche", "recherchier",
        "internet", "web", "online",
        "datenblatt", "datasheet", "fiche technique",
        "mehr informationen", "more information", "plus d'information",
        "weitere informationen", "further information", "additional information"
    ]

    # Check if current prompt contains search-related keywords
    has_search_intent = any(pattern in prompt_lower for pattern in search_patterns)

    # Extract product information - try multiple sources
    article_number = None
    article_description = None
    supplier = None

    # Pattern for article numbers like "6AV2 181-8XP00-0AX0" or "6AV2181-8XP00-0AX0"
    article_patterns = [
        r'\b[A-Z0-9]{2,}\s+[0-9]{3,}-[A-Z0-9-]+\b',  # With space: "6AV2 181-8XP00-0AX0"
        r'\b[A-Z0-9]{4,}[\s-][A-Z0-9-]{6,}\b',  # General pattern
        r'\b[A-Z]{2,}[0-9]+\s+[0-9]+-[A-Z0-9-]+\b',  # Specific Siemens pattern
    ]

    # 1. First, try to extract from current user prompt
    for pattern in article_patterns:
        matches = re.findall(pattern, userPrompt)
        if matches:
            article_number = matches[0]
            logger.info(f"Extracted article number from user prompt: {article_number}")
            break

    # 2. Try to extract from database query results if available
    # Always check queryResults to enrich with product description and supplier, even if article_number was already found
    if queryResults:
        # Look for article numbers in query result text (if not already found)
        if not article_number:
            for key in queryResults.keys():
                if key.startswith("query_") and not key.endswith("_error") and not key.endswith("_data"):
                    result_text = queryResults.get(key, "")
                    if isinstance(result_text, str):
                        for pattern in article_patterns:
                            matches = re.findall(pattern, result_text)
                            if matches:
                                article_number = matches[0]
                                logger.info(f"Extracted article number from query results: {article_number}")
                                break
                        if article_number:
                            break

        # Always check data arrays for product description and supplier (even if article_number already found)
        for key in queryResults.keys():
            if key.startswith("query_") and not key.endswith("_error") and not key.endswith("_data"):
                data_key = f"{key}_data"
                if data_key in queryResults:
                    data_array = queryResults[data_key]
                    if isinstance(data_array, list) and len(data_array) > 0:
                        # Look for article number in first row (if not already found)
                        first_row = data_array[0]
                        if isinstance(first_row, dict):
                            # Check common article number fields (if not already found)
                            if not article_number:
                                for field in ["Artikelnummer", "Artikelkürzel", "article_number", "articleNumber"]:
                                    if field in first_row and first_row[field]:
                                        article_number = str(first_row[field])
                                        logger.info(f"Extracted article number from query data: {article_number}")
                                        break

                            # Always check article description (can enrich even if article_number already found)
                            if not article_description:
                                for field in ["Artikelbezeichnung", "Bezeichnung", "article_description", "description"]:
                                    if field in first_row and first_row[field]:
                                        article_description = str(first_row[field])
                                        logger.info(f"Extracted article description from query data: {article_description}")
                                        break

                            # Always check supplier (can enrich even if article_number already found)
                            if not supplier:
                                for field in ["Lieferant", "Supplier", "supplier"]:
                                    if field in first_row and first_row[field]:
                                        supplier = str(first_row[field])
                                        logger.info(f"Extracted supplier from query data: {supplier}")
                                        break

                            # If we found all needed info, we can stop
                            if article_number and article_description and supplier:
                                break

    # 3. Extract from previous assistant messages (conversation history)
    if not article_number or not article_description:
        for msg in reversed(workflowMessages[-10:]):
            if msg.role == "assistant":
                message_text = msg.message

                # Extract article number if not found yet
                if not article_number:
                    for pattern in article_patterns:
                        matches = re.findall(pattern, message_text)
                        if matches:
                            article_number = matches[0]
                            break

                # Extract article description if not found yet
                if not article_description:
                    description_patterns = [
                        r'Es handelt sich um\s+([^\.]+)',
                        r'It is a\s+([^\.]+)',
                        r'C\'est\s+([^\.]+)',
                        r'Bezeichnung:\s*([^\n]+)',
                        r'Description:\s*([^\n]+)',
                        r'Artikelbezeichnung:\s*([^\n]+)',
                        r'Artikelbezeichnung:\s*([^\n]+)'
                    ]
                    for pattern in description_patterns:
                        match = re.search(pattern, message_text, re.IGNORECASE)
                        if match:
                            article_description = match.group(1).strip()
                            break

                # Extract supplier if not found yet
                if not supplier:
                    supplier_patterns = [
                        r'von\s+([A-Z][A-Za-z\s]+(?:AG|GmbH|Ltd|Inc|Corp)?)',
                        r'from\s+([A-Z][A-Za-z\s]+(?:AG|GmbH|Ltd|Inc|Corp)?)',
                        r'Lieferant:\s*([^\n]+)',
                        r'Supplier:\s*([^\n]+)'
                    ]
                    for pattern in supplier_patterns:
                        match = re.search(pattern, message_text, re.IGNORECASE)
                        if match:
                            supplier = match.group(1).strip()
                            break

                # Stop if we found everything
                if article_number and article_description and supplier:
                    break

    # Build enriched search query
    query_parts = []

    # If we have search intent but no product info, try to use the user prompt intelligently
    if has_search_intent and not article_number and not article_description:
        # Try to extract meaningful parts from the prompt
        # Remove common search phrases and keep the product-related parts
        cleaned_prompt = userPrompt
        for phrase in ["recherchier nach", "recherche", "suche nach", "search for", "find", "informationen zu", "information about", "weitere informationen", "further information"]:
            cleaned_prompt = re.sub(phrase, "", cleaned_prompt, flags=re.IGNORECASE)
        cleaned_prompt = cleaned_prompt.strip()

        # If cleaned prompt still has content and is different, use it
        if cleaned_prompt and cleaned_prompt != userPrompt and len(cleaned_prompt) > 10:
            query_parts.append(cleaned_prompt)

    # Add article description if found
    if article_description:
        query_parts.append(article_description)

    # Add article number if found
    if article_number:
        query_parts.append(article_number)

    # Add supplier if found
    if supplier:
        query_parts.append(supplier)

    # Add "Datenblatt" or "datasheet" if user requested it or if we have product info
    if "datenblatt" in prompt_lower or "datasheet" in prompt_lower or "fiche technique" in prompt_lower:
        query_parts.append("Datenblatt")
    elif query_parts:
        # If we have product info but no explicit request for datasheet, add it anyway
        query_parts.append("Datenblatt")

    # If we found product information or built a meaningful query, use it
    if query_parts:
        enriched_query = " ".join(query_parts)
        logger.info(f"Built enriched search query: '{enriched_query}' from context (original: '{userPrompt}')")
        return enriched_query
    else:
        # Fall back to original prompt, but try to clean it up
        logger.info(f"No product context found, using original prompt: '{userPrompt}'")
        return userPrompt


async def _processChatbotMessage(
    services,
    workflowId: str,
    userInput: UserInputRequest,
    userMessageId: str
):
    """
    Process chatbot message in background.
    Analyzes user input and generates list of queries, then streams them back.
    """
    event_manager = get_event_manager()

    try:
        interfaceDbChat = services.interfaceDbChat

        # Reload workflow to get current messages
        workflow = interfaceDbChat.getWorkflow(workflowId)
        if not workflow:
            logger.error(f"Workflow {workflowId} not found during processing")
            await event_manager.emit_event(
                context_id=workflowId,
                event_type="error",
                data={"error": f"Workflow {workflowId} nicht gefunden"},
                event_category="workflow",
                message=f"Workflow {workflowId} nicht gefunden",
                step="error"
            )
            return

        # Check if workflow was stopped before starting
        if await _check_workflow_stopped(interfaceDbChat, workflowId):
            logger.info(f"Workflow {workflowId} was stopped, aborting processing")
            return

        # Build conversation context from history
        context = ""
        if workflow.messages:
            recent_messages = workflow.messages[-5:]
            context = "\n\nPrevious conversation:\n"
            for msg in recent_messages:
                if msg.role == "user":
                    context += f"User: {msg.message}\n"
                elif msg.role == "assistant":
                    context += f"Assistant: {msg.message}\n"

        await services.ai.ensureAiObjectsInitialized()

        # Step 1: Analyze user input to generate queries
        logger.info("Analyzing user input to generate queries...")
        await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Analysiere Benutzeranfrage...")

        analysisPrompt = get_initial_analysis_prompt(userInput.prompt, context)

        # AI call for analysis
        method_ai = MethodAi(services)
        analysis_result = await method_ai.process({
            "aiPrompt": analysisPrompt,
            "documentList": None,
            "resultType": "json",
            "simpleMode": True
        })

        # Extract content from ActionResult
        analysis_content = None
        if analysis_result.success and analysis_result.documents:
            analysis_content = analysis_result.documents[0].documentData
            if isinstance(analysis_content, bytes):
                analysis_content = analysis_content.decode('utf-8')

        if not analysis_content:
            logger.warning("Analysis failed, using fallback")
            analysis = {}
        else:
            analysis = _extractJsonFromResponse(analysis_content)

        # Extract analysis results
        needsDatabaseQuery = analysis.get("needsDatabaseQuery", False) if analysis else False
        needsWebResearch = analysis.get("needsWebResearch", False) if analysis else False
        sql_queries = analysis.get("sqlQueries", [])
        # Support legacy single query format for backward compatibility
        if not sql_queries and analysis.get("sqlQuery"):
            sql_queries = [{
                "query": analysis.get("sqlQuery", ""),
                "purpose": "Database query",
                "table": "Unknown"
            }]
        reasoning = analysis.get("reasoning", "")

        logger.info(f"Analysis: DB={needsDatabaseQuery}, Web={needsWebResearch}, SQL queries={len(sql_queries)}")

        # Build initial enriched web research query if needed (for logging, will be rebuilt after DB queries)
        enriched_web_query = None
        if needsWebResearch:
            enriched_web_query = _buildWebResearchQuery(userInput.prompt, workflow.messages)

        # Build list of queries to stream back
        queries = []

        if needsDatabaseQuery and sql_queries:
            for i, sql_query_info in enumerate(sql_queries, 1):
                queries.append({
                    "type": "database",
                    "query": sql_query_info.get("query", ""),
                    "purpose": sql_query_info.get("purpose", f"Query {i}"),
                    "table": sql_query_info.get("table", "Unknown"),
                    "reasoning": reasoning
                })

        if needsWebResearch:
            queries.append({
                "type": "web",
                "query": enriched_web_query or userInput.prompt,
                "reasoning": reasoning
            })

        # Format queries as log text
        log_lines = []
        if queries:
            db_queries = [q for q in queries if q["type"] == "database"]
            log_lines.append(f"Generiert: {len(db_queries)} Datenbankabfrage(n) und {len(queries) - len(db_queries)} Web-Recherche(n)\n\n")
            for i, q in enumerate(queries, 1):
                if q["type"] == "database":
                    log_lines.append(f"{i}. Datenbankabfrage ({q.get('table', 'Unknown')}):\n")
                    log_lines.append(f"   Zweck: {q.get('purpose', 'Nicht angegeben')}\n")
                    log_lines.append(f"```sql\n{q['query']}\n```\n")
                elif q["type"] == "web":
                    log_lines.append(f"{i}. Web-Recherche:\n")
                    log_lines.append(f"   Suchbegriff: {q['query']}\n")
                if q.get("reasoning"):
                    log_lines.append(f"   Begründung: {q['reasoning']}\n")
                log_lines.append("\n")
        else:
            log_lines.append("Keine Abfragen erforderlich.")

        log_text = "".join(log_lines)

        # Stream queries as a log
        await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, log_text)

        # Check if workflow was stopped before executing queries
        if await _check_workflow_stopped(interfaceDbChat, workflowId):
            logger.info(f"Workflow {workflowId} was stopped, aborting query execution")
            return

        # Step 2: Execute queries
        queryResults = {}
        webResearchResults = ""

        # Execute database queries in parallel
        if needsDatabaseQuery and sql_queries:
            logger.info(f"Executing {len(sql_queries)} database queries in parallel...")
            await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Führe {len(sql_queries)} Datenbankabfrage(n) parallel aus...")

            try:
                queryResults = await _execute_queries_parallel(sql_queries)

                # Log results summary
                successful_queries = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
                failed_queries = [k for k in queryResults.keys() if k.endswith("_error")]

                if successful_queries:
                    total_rows = sum(len(queryResults.get(f"{k}_data", [])) for k in successful_queries)
                    logger.info(f"Successfully executed {len(successful_queries)} query/queries, total {total_rows} rows")
                    await _emit_log_and_event(
                        interfaceDbChat,
                        workflowId,
                        event_manager,
                        f"Abgeschlossen: {len(successful_queries)} Abfrage(n) erfolgreich, {total_rows} Ergebnis{'e' if total_rows != 1 else ''} gefunden"
                    )

                if failed_queries:
                    logger.warning(f"{len(failed_queries)} query/queries failed")
                    await _emit_log_and_event(
                        interfaceDbChat,
                        workflowId,
                        event_manager,
                        f"Warnung: {len(failed_queries)} Abfrage(n) fehlgeschlagen",
                        log_type="warning"
                    )
            except Exception as e:
                logger.error(f"Error executing parallel queries: {e}")
                queryResults["error"] = f"Error executing queries: {str(e)}"
                await _emit_log_and_event(
                    interfaceDbChat,
                    workflowId,
                    event_manager,
                    "Fehler bei parallelen Datenbankabfragen",
                    log_type="error"
                )

        # Execute web research
        if needsWebResearch:
            logger.info("Performing web research...")
            await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Suche im Internet nach Informationen...")

            try:
                # Rebuild enriched query with database results if available (better product context)
                web_research_query = _buildWebResearchQuery(
                    userInput.prompt,
                    workflow.messages,
                    queryResults if queryResults else None
                )

                logger.info(f"Using enriched web research query: '{web_research_query}'")

                researchResult = await services.web.performWebResearch(
                    prompt=web_research_query,
                    urls=[],
                    country=None,
                    language=userInput.userLanguage or "de",
                    researchDepth="general",
                    operationId=None
                )
                webResearchResults = json.dumps(researchResult, ensure_ascii=False, indent=2) if isinstance(researchResult, dict) else str(researchResult)
                await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche abgeschlossen")
            except Exception as e:
                logger.error(f"Web research failed: {e}", exc_info=True)
                webResearchResults = f"Web research error: {str(e)}"
                await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche fehlgeschlagen", log_type="warning")

        # Check if workflow was stopped before generating final answer
        if await _check_workflow_stopped(interfaceDbChat, workflowId):
            logger.info(f"Workflow {workflowId} was stopped, aborting final answer generation")
            return

        # Step 3: Generate final answer using AI
        logger.info("Generating final answer with AI...")
        await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Formuliere finale Antwort...")

        # Build prompt for final answer
        system_prompt = get_final_answer_system_prompt()

        # Build answer context with query results
        answerContext = f"User question: {userInput.prompt}{context}\n\n"

        # Add database results - organize by query with metadata
        db_results_part = ""
        if queryResults:
            successful_results = []
            error_results = []

            # Extract query metadata from sql_queries if available
            query_metadata = {}
            if sql_queries:
                for i, q_info in enumerate(sql_queries, 1):
                    query_metadata[f"query_{i}"] = {
                        "purpose": q_info.get("purpose", f"Query {i}"),
                        "table": q_info.get("table", "Unknown")
                    }

            # Organize results by query number
            query_numbers = set()
            for key in queryResults.keys():
                if key.startswith("query_") and not key.endswith("_data"):
                    # Extract query number (e.g., "query_1" -> 1)
                    try:
                        num = int(key.split("_")[1])
                        query_numbers.add(num)
                    except (ValueError, IndexError):
                        pass

            # Build results with metadata
            for query_num in sorted(query_numbers):
                query_key = f"query_{query_num}"
                error_key = f"{query_key}_error"

                if error_key in queryResults:
                    error_msg = queryResults[error_key]
                    metadata = query_metadata.get(query_key, {})
                    purpose = metadata.get("purpose", f"Query {query_num}")
                    table = metadata.get("table", "Unknown")
                    error_results.append(f"Abfrage {query_num} ({table} - {purpose}): {error_msg}")
                elif query_key in queryResults:
                    result_text = queryResults[query_key]
                    metadata = query_metadata.get(query_key, {})
                    purpose = metadata.get("purpose", f"Query {query_num}")
                    table = metadata.get("table", "Unknown")
                    successful_results.append(f"=== Abfrage {query_num}: {purpose} (Tabelle: {table}) ===\n{result_text}")

            # Handle general error if present
            if "error" in queryResults:
                error_results.append(f"Allgemeiner Fehler: {queryResults['error']}")

            if successful_results:
                db_results_part = "\n\nDATENBANK-ERGEBNISSE:\n" + "\n\n".join(successful_results)
                answerContext += "DATENBANK-ERGEBNISSE:\n" + "\n\n".join(successful_results) + "\n\n"

            if error_results:
                db_results_part += "\n\nDATENBANK-FEHLER:\n" + "\n".join(error_results)
                answerContext += "DATENBANK-FEHLER:\n" + "\n".join(error_results) + "\n\n"

        # Add web research results
        web_results_part = ""
        if webResearchResults:
            web_results_part = f"\n\nINTERNET-RECHERCHE:\n{webResearchResults}"
            answerContext += f"INTERNET-RECHERCHE:\n{webResearchResults}\n\n"

        # Check if we have any actual data
        successful_query_keys = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
        has_query_results = bool(successful_query_keys)
        error_query_keys = [k for k in queryResults.keys() if k.endswith("_error")]
        has_only_errors = bool(error_query_keys and not successful_query_keys)

        if not has_query_results and needsDatabaseQuery:
            db_results_part = "\n\nWICHTIG: Es wurden KEINE Datenbank-Ergebnisse gefunden. Die Datenbankabfrage wurde nicht ausgeführt oder hat keine Ergebnisse zurückgegeben."

        if has_only_errors:
            db_results_part += "\n\n⚠️⚠️⚠️ KRITISCH - ALLE QUERIES FEHLGESCHLAGEN ⚠️⚠️⚠️\n" + \
                              "ALLE Datenbankabfragen sind fehlgeschlagen. Es gibt KEINE gültigen Daten aus der Datenbank.\n" + \
                              "DU DARFST KEINE DATEN ERFINDEN! Schreibe stattdessen: 'Es wurden keine Artikel gefunden' oder 'Die Datenbankabfrage ist fehlgeschlagen'."

        answer_prompt = f"""{system_prompt}

Antworte auf die folgende Frage des Nutzers: {userInput.prompt}{context}

{db_results_part}{web_results_part}

KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar".

WICHTIG - MEHRERE ABFRAGEN:
Die oben angegebenen DATENBANK-ERGEBNISSE können aus mehreren separaten Abfragen stammen. Jede Abfrage ist mit "=== Abfrage X ===" markiert und enthält Informationen zu einem spezifischen Aspekt (z.B. Artikel-Informationen, Lagerbestände, etc.).
- Kombiniere die Informationen aus ALLEN erfolgreichen Abfragen zu einer umfassenden Antwort
- Beispiel: Wenn Abfrage 1 Artikel-Informationen liefert und Abfrage 2 Lagerbestände liefert, kombiniere beide in deiner Antwort
- Verwende ALLE verfügbaren Informationen aus den verschiedenen Abfragen

⚠️⚠️⚠️ ABSOLUT VERBOTEN - KEINE DATEN ERFINDEN ⚠️⚠️⚠️
Wenn KEINE Datenbank-Ergebnisse vorhanden sind, dann:
- ❌ ERFINDE KEINE Artikelnummern, Artikelbezeichnungen, Preise oder Lagerbestände!
- ❌ ERFINDE KEINE Beispielartikel!
- ✓ Schreibe stattdessen: "Es wurden keine Artikel in der Datenbank gefunden." oder "Die Datenbankabfrage ist fehlgeschlagen."

WICHTIG: Deine Antwort soll NUR die finale Antwort enthalten - KEINE Planungsschritte, KEINE SQL-Queries, KEINE Zwischenschritte!
Beginne DIREKT mit "Aus der Datenbank habe ich..." (wenn Daten vorhanden) oder "Es wurden keine Artikel gefunden" (wenn keine Daten vorhanden)."""

        answerRequest = AiCallRequest(
            prompt=answer_prompt,
            context=answerContext if (queryResults or webResearchResults) else None,
            options=AiCallOptions(
                resultFormat="txt",
                operationType=OperationTypeEnum.DATA_ANALYSE,
                processingMode=ProcessingModeEnum.DETAILED
            )
        )

        answerResponse = await services.ai.callAi(answerRequest)
        finalAnswer = answerResponse.content

        logger.info("Final answer generated")

        # Check if workflow was stopped during AI call - if so, don't store the message
        if await _check_workflow_stopped(interfaceDbChat, workflowId):
            logger.info(f"Workflow {workflowId} was stopped during final answer generation, not storing message")
            return

        # Reload workflow to get current message count
        workflow = interfaceDbChat.getWorkflow(workflowId)

        # Double-check workflow wasn't stopped while we were reloading
        if workflow and workflow.status == "stopped":
            logger.info(f"Workflow {workflowId} was stopped, not storing final message")
            return

        # Create assistant message with final answer
        assistantMessageData = {
            "id": f"msg_{uuid.uuid4()}",
            "workflowId": workflowId,
            "parentMessageId": userMessageId,
            "message": finalAnswer,
            "role": "assistant",
            "status": "last",
            "sequenceNr": len(workflow.messages) + 1,
            "publishedAt": getUtcTimestamp(),
            "success": True,
            "roundNumber": workflow.currentRound,
            "taskNumber": 0,
            "actionNumber": 0
        }

        assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
        logger.info(f"Stored assistant message with final answer: {assistantMessage.id}")

        # Emit message event for streaming (exact chatData format)
        message_timestamp = parseTimestamp(assistantMessage.publishedAt, default=getUtcTimestamp())
        await event_manager.emit_event(
            context_id=workflowId,
            event_type="chatdata",
            data={
                "type": "message",
                "createdAt": message_timestamp,
                "item": assistantMessage.dict()
            },
            event_category="chat"
        )

        # Update workflow status to completed (only if not stopped)
        if not await _check_workflow_stopped(interfaceDbChat, workflowId):
            interfaceDbChat.updateWorkflow(workflowId, {
                "status": "completed",
                "lastActivity": getUtcTimestamp()
            })
        else:
            logger.info(f"Workflow {workflowId} was stopped, not updating status to completed")

        logger.info(f"Chatbot processing completed for workflow {workflowId}, generated {len(queries)} queries and final answer")

        # Emit completion event only if workflow wasn't stopped
        if not await _check_workflow_stopped(interfaceDbChat, workflowId):
            await event_manager.emit_event(
                context_id=workflowId,
                event_type="complete",
                data={"workflowId": workflowId},
                event_category="workflow",
                message="Chatbot-Verarbeitung abgeschlossen",
                step="complete"
            )

        # Schedule cleanup
        await event_manager.cleanup(workflowId)

    except Exception as e:
        logger.error(f"Error processing chatbot message: {str(e)}", exc_info=True)

        # Check if workflow was stopped - if so, don't store error message
        if await _check_workflow_stopped(interfaceDbChat, workflowId):
            logger.info(f"Workflow {workflowId} was stopped, not storing error message")
            return

        # Store error message
        try:
            # Reload workflow to get current message count
            workflow = interfaceDbChat.getWorkflow(workflowId)

            # Double-check workflow wasn't stopped while we were reloading
            if workflow and workflow.status == "stopped":
                logger.info(f"Workflow {workflowId} was stopped, not storing error message")
                return

            errorMessageData = {
                "id": f"msg_{uuid.uuid4()}",
                "workflowId": workflowId,
                "parentMessageId": userMessageId,
                "message": f"Sorry, I encountered an error: {str(e)}",
                "role": "assistant",
                "status": "last",
                "sequenceNr": len(workflow.messages) + 1,
                "publishedAt": getUtcTimestamp(),
                "success": False,
                "roundNumber": workflow.currentRound if workflow else 1,
                "taskNumber": 0,
                "actionNumber": 0
            }
            errorMessage = interfaceDbChat.createMessage(errorMessageData)

            # Emit message event for streaming (exact chatData format)
            message_timestamp = parseTimestamp(errorMessage.publishedAt, default=getUtcTimestamp())
            await event_manager.emit_event(
                context_id=workflowId,
                event_type="chatdata",
                data={
                    "type": "message",
                    "createdAt": message_timestamp,
                    "item": errorMessage.dict()
                },
                event_category="chat"
            )

            # Update workflow status to error (only if not stopped)
            if not await _check_workflow_stopped(interfaceDbChat, workflowId):
                interfaceDbChat.updateWorkflow(workflowId, {
                    "status": "error",
                    "lastActivity": getUtcTimestamp()
                })
            else:
                logger.info(f"Workflow {workflowId} was stopped, not updating status to error")

            # Schedule cleanup
            await event_manager.cleanup(workflowId)
        except Exception as storeError:
            logger.error(f"Error storing error message: {storeError}")