gateway/modules/features/chatbot/mainChatbot.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Simple chatbot feature - basic implementation.
User input is processed by AI to create list of needed queries.
Those queries get streamed back.
"""

import logging
import json
import uuid
import asyncio
import re
from typing import Optional, Dict, Any, List

from modules.datamodels.datamodelChat import ChatWorkflow, UserInputRequest, WorkflowModeEnum, ChatLog, ChatDocument
from modules.datamodels.datamodelUam import User
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, ProcessingModeEnum
from modules.datamodels.datamodelDocref import DocumentReferenceList, DocumentItemReference
from modules.shared.timeUtils import getUtcTimestamp, parseTimestamp
from modules.services import getInterface as getServices
from modules.features.chatbot.eventManager import get_event_manager
from modules.workflows.methods.methodAi.methodAi import MethodAi
from modules.connectors.connectorPreprocessor import PreprocessorConnector
from modules.features.chatbot.chatbotConstants import (
    get_initial_analysis_prompt,
    generate_conversation_name,
    get_final_answer_system_prompt,
    get_final_answer_prompt_with_results
)
import base64

logger = logging.getLogger(__name__)


def _extractJsonFromResponse(content: str) -> Optional[dict]:
    """Extract JSON from AI response, handling markdown code blocks."""
    # Try direct JSON parse first
    try:
        return json.loads(content.strip())
    except json.JSONDecodeError:
        pass

    # Try to extract JSON from markdown code blocks
    json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', content, re.DOTALL)
    if json_match:
        try:
            return json.loads(json_match.group(1))
        except json.JSONDecodeError:
            pass

    # Try to find JSON object in the text
    json_match = re.search(r'\{.*\}', content, re.DOTALL)
    if json_match:
        try:
            return json.loads(json_match.group(0))
        except json.JSONDecodeError:
            pass

    return None


async def chatProcess(
    currentUser: User,
    mandateId: str,
    userInput: UserInputRequest,
    workflowId: Optional[str] = None
) -> ChatWorkflow:
    """
    Simple chatbot processing - analyze user input and generate queries.

    Flow:
    1. Create or load workflow
    2. Store user message
    3. AI analyzes user input to create list of needed queries
    4. Stream queries back

    Args:
        currentUser: Current user
        mandateId: Mandate context (from RequestContext / X-Mandate-Id header)
        userInput: User input request
        workflowId: Optional workflow ID to continue existing conversation

    Returns:
        ChatWorkflow instance
    """
    try:
        # Get services with mandate context
        services = getServices(currentUser, None, mandateId=mandateId)
        interfaceDbChat = services.interfaceDbChat

        # Get event manager and create queue if needed
        event_manager = get_event_manager()

        # Create or load workflow
        if workflowId:
            workflow = interfaceDbChat.getWorkflow(workflowId)
            if not workflow:
                raise ValueError(f"Workflow {workflowId} not found")

            # Resume workflow: increment round number
            new_round = workflow.currentRound + 1
            interfaceDbChat.updateWorkflow(workflowId, {
                "status": "running",
                "currentRound": new_round,
                "lastActivity": getUtcTimestamp()
            })
            workflow = interfaceDbChat.getWorkflow(workflowId)
            logger.info(f"Resumed workflow {workflowId}, round incremented to {new_round}")

            # Create event queue if it doesn't exist (for streaming)
            if not event_manager.has_queue(workflowId):
                event_manager.create_queue(workflowId)
        else:
            # Generate conversation name based on user's prompt
            conversation_name = await generate_conversation_name(
                services,
                userInput.prompt,
                userInput.userLanguage
            )

            # Create new workflow
            workflowData = {
                "id": str(uuid.uuid4()),
                "mandateId": mandateId,
                "status": "running",
                "name": conversation_name,
                "currentRound": 1,
                "currentTask": 0,
                "currentAction": 0,
                "totalTasks": 0,
                "totalActions": 0,
                "workflowMode": WorkflowModeEnum.WORKFLOW_CHATBOT.value,
                "startedAt": getUtcTimestamp(),
                "lastActivity": getUtcTimestamp()
            }
            workflow = interfaceDbChat.createWorkflow(workflowData)
            logger.info(f"Created new chatbot workflow: {workflow.id} with name: {conversation_name}")

            # Create event queue for new workflow (for streaming)
            event_manager.create_queue(workflow.id)

        # Reload workflow to get current message count
        workflow = interfaceDbChat.getWorkflow(workflow.id)

        # Process uploaded files and create ChatDocuments
        user_documents = []
        if userInput.listFileId and len(userInput.listFileId) > 0:
            logger.info(f"Processing {len(userInput.listFileId)} uploaded file(s) for user message")
            for fileId in userInput.listFileId:
                try:
                    # Get file info from chat service
                    fileInfo = services.chat.getFileInfo(fileId)
                    if not fileInfo:
                        logger.warning(f"No file info found for file ID {fileId}")
                        continue

                    originalFileName = fileInfo.get("fileName", "unknown")
                    originalMimeType = fileInfo.get("mimeType", "application/octet-stream")
                    fileSizeToUse = fileInfo.get("size", 0)

                    # Create ChatDocument for the file
                    document = ChatDocument(
                        id=str(uuid.uuid4()),
                        messageId="",  # Will be set when message is created
                        fileId=fileId,
                        fileName=originalFileName,
                        fileSize=fileSizeToUse,
                        mimeType=originalMimeType,
                        roundNumber=workflow.currentRound,
                        taskNumber=0,
                        actionNumber=0
                    )
                    user_documents.append(document)
                    logger.info(f"Created ChatDocument for file {fileId} -> {originalFileName}")
                except Exception as e:
                    logger.error(f"Error processing file ID {fileId}: {e}", exc_info=True)

        # Store user message
        userMessageData = {
            "id": f"msg_{uuid.uuid4()}",
            "workflowId": workflow.id,
            "message": userInput.prompt,
            "role": "user",
            "status": "first" if workflowId is None else "step",
            "sequenceNr": len(workflow.messages) + 1,
            "publishedAt": getUtcTimestamp(),
            "roundNumber": workflow.currentRound,
            "taskNumber": 0,
            "actionNumber": 0
        }

        userMessage = interfaceDbChat.createMessage(userMessageData)
        logger.info(f"Stored user message: {userMessage.id} with {len(user_documents)} document(s)")

        # Emit message event for streaming (exact chatData format)
        message_timestamp = parseTimestamp(userMessage.publishedAt, default=getUtcTimestamp())
        await event_manager.emit_event(
            context_id=workflow.id,
            event_type="chatdata",
            data={
                "type": "message",
                "createdAt": message_timestamp,
                "item": userMessage.dict()
            },
            event_category="chat"
        )

        # Update workflow status
        interfaceDbChat.updateWorkflow(workflow.id, {
            "status": "running",
            "lastActivity": getUtcTimestamp()
        })

        # Process in background (async)
        asyncio.create_task(_processChatbotMessage(
            services,
            workflow.id,
            userInput,
            userMessage.id
        ))

        # Reload workflow to include new message
        workflow = interfaceDbChat.getWorkflow(workflow.id)
        return workflow

    except Exception as e:
        logger.error(f"Error in chatProcess: {str(e)}", exc_info=True)
        raise


async def _execute_queries_parallel(queries: List[Dict[str, Any]]) -> Dict[str, Any]:
    """
    Execute multiple SQL queries in parallel with shared connector.

    Args:
        queries: List of query dictionaries, each containing:
            - "query": SQL query string
            - "purpose": Description of what the query retrieves
            - "table": Primary table name

    Returns:
        Dictionary mapping query indices to results:
        - "query_1", "query_2", etc.: Success result text
        - "query_1_data", "query_2_data", etc.: Raw data arrays
        - "query_1_error", "query_2_error", etc.: Error messages if query failed
    """
    # Create single connector instance to reuse across all queries
    connector = PreprocessorConnector()
    try:
        async def execute_single_query(idx: int, query_info: Dict[str, Any]):
            """Execute a single query using shared connector."""
            try:
                query_text = query_info.get("query", "")
                result = await connector.executeQuery(query_text, return_json=True)
                return idx, result, None
            except Exception as e:
                return idx, None, str(e)

        # Execute all queries in parallel with shared connector
        tasks = [execute_single_query(i, q) for i, q in enumerate(queries)]
        results = await asyncio.gather(*tasks, return_exceptions=True)
    finally:
        # Close connector once after all queries complete
        await connector.close()

    # Process results into dictionary
    query_results = {}
    for result in results:
        if isinstance(result, Exception):
            # Handle exceptions from gather
            logger.error(f"Exception in parallel query execution: {result}")
            continue

        idx, result_data, error = result

        if error:
            query_results[f"query_{idx+1}_error"] = error
            logger.error(f"Query {idx+1} failed: {error}")
        else:
            if result_data and not result_data.get("text", "").startswith(("Error:", "Query failed:")):
                query_results[f"query_{idx+1}"] = result_data.get("text", "")
                query_results[f"query_{idx+1}_data"] = result_data.get("data", [])
                row_count = len(result_data.get('data', []))
                logger.info(f"Query {idx+1} executed successfully, returned {row_count} rows")
            else:
                error_text = result_data.get("text", "Query failed") if result_data else "Query failed: No response"
                query_results[f"query_{idx+1}_error"] = error_text
                logger.error(f"Query {idx+1} failed: {error_text}")

    return query_results


async def _emit_log_and_event(
    interfaceDbChat,
    workflowId: str,
    event_manager,
    message: str,
    log_type: str = "info",
    status: str = "running",
    round_number: Optional[int] = None
) -> None:
    """
    Store log in database and emit event for streaming.

    Args:
        interfaceDbChat: Database interface
        workflowId: Workflow ID
        event_manager: Event manager for streaming
        message: Log message
        log_type: Log type (info, warning, error)
        status: Status string
        round_number: Optional round number (will be fetched from workflow if not provided)
    """
    try:
        # Get round number from workflow if not provided
        if round_number is None:
            workflow = interfaceDbChat.getWorkflow(workflowId)
            if workflow:
                round_number = workflow.currentRound

        log_timestamp = getUtcTimestamp()
        log_data = {
            "id": f"log_{uuid.uuid4()}",
            "workflowId": workflowId,
            "message": message,
            "type": log_type,
            "timestamp": log_timestamp,
            "status": status,
            "roundNumber": round_number
        }
        # Store log in database
        created_log = interfaceDbChat.createLog(log_data)

        # Emit event directly for streaming (using correct signature)
        if created_log and event_manager:
            try:
                from modules.datamodels.datamodelChat import ChatLog
                # Convert to dict if it's a Pydantic model
                if hasattr(created_log, "model_dump"):
                    log_dict = created_log.model_dump()
                elif hasattr(created_log, "dict"):
                    log_dict = created_log.dict()
                else:
                    log_dict = log_data

                await event_manager.emit_event(
                    context_id=workflowId,
                    event_type="chatdata",
                    data={
                        "type": "log",
                        "createdAt": log_timestamp,
                        "item": log_dict
                    },
                    event_category="chat",
                    message="New log",
                    step="log"
                )
            except Exception as emit_error:
                logger.warning(f"Error emitting log event: {emit_error}")
    except Exception as e:
        logger.error(f"Error storing log: {e}", exc_info=True)


async def _check_workflow_stopped(interfaceDbChat, workflowId: str) -> bool:
    """
    Check if workflow was stopped.

    Args:
        interfaceDbChat: Database interface
        workflowId: Workflow ID

    Returns:
        True if workflow is stopped, False otherwise
    """
    try:
        workflow = interfaceDbChat.getWorkflow(workflowId)
        return workflow and workflow.status == "stopped"
    except Exception as e:
        logger.warning(f"Error checking workflow status: {e}")
        return False


def _buildWebResearchQuery(userPrompt: str, workflowMessages: List, queryResults: Optional[Dict[str, Any]] = None) -> str:
    """
    Build enriched web research query by extracting product context from conversation history and current prompt.

    Extracts product information from:
    1. Current user prompt (article numbers, product mentions)
    2. Database query results (if available)
    3. Previous assistant messages (conversation history)

    Args:
        userPrompt: Current user prompt
        workflowMessages: List of workflow messages (conversation history)
        queryResults: Optional database query results to extract product info from

    Returns:
        Enriched search query string
    """
    # Normalize user prompt for detection
    prompt_lower = userPrompt.lower().strip()

    # Patterns that indicate a search request
    search_patterns = [
        "ja", "yes", "oui", "si",
        "such", "suche", "search", "recherche", "recherchier",
        "internet", "web", "online",
        "datenblatt", "datasheet", "fiche technique",
        "mehr informationen", "more information", "plus d'information",
        "weitere informationen", "further information", "additional information"
    ]

    # Certification patterns that require web research
    certification_patterns = [
        "ul", "ce", "tüv", "vde", "iec", "en", "iso",
        "zertifiziert", "certified", "certification", "zertifizierung",
        "geprüft", "approved", "compliance"
    ]

    # Check if current prompt contains search-related keywords
    has_search_intent = any(pattern in prompt_lower for pattern in search_patterns)

    # Check if prompt contains certification-related keywords
    has_certification_intent = any(pattern in prompt_lower for pattern in certification_patterns)

    # Extract product information - try multiple sources
    article_number = None
    article_description = None
    supplier = None

    # Pattern for article numbers like "6AV2 181-8XP00-0AX0" or "6AV2181-8XP00-0AX0"
    article_patterns = [
        r'\b[A-Z0-9]{2,}\s+[0-9]{3,}-[A-Z0-9-]+\b',  # With space: "6AV2 181-8XP00-0AX0"
        r'\b[A-Z0-9]{4,}[\s-][A-Z0-9-]{6,}\b',  # General pattern
        r'\b[A-Z]{2,}[0-9]+\s+[0-9]+-[A-Z0-9-]+\b',  # Specific Siemens pattern
    ]

    # 1. First, try to extract from current user prompt
    for pattern in article_patterns:
        matches = re.findall(pattern, userPrompt)
        if matches:
            article_number = matches[0]
            logger.info(f"Extracted article number from user prompt: {article_number}")
            break

    # 2. Try to extract from database query results if available
    # Always check queryResults to enrich with product description and supplier, even if article_number was already found
    if queryResults:
        # Look for article numbers in query result text (if not already found)
        if not article_number:
            for key in queryResults.keys():
                if key.startswith("query_") and not key.endswith("_error") and not key.endswith("_data"):
                    result_text = queryResults.get(key, "")
                    if isinstance(result_text, str):
                        for pattern in article_patterns:
                            matches = re.findall(pattern, result_text)
                            if matches:
                                article_number = matches[0]
                                logger.info(f"Extracted article number from query results: {article_number}")
                                break
                        if article_number:
                            break

        # Always check data arrays for product description and supplier (even if article_number already found)
        for key in queryResults.keys():
            if key.startswith("query_") and not key.endswith("_error") and not key.endswith("_data"):
                data_key = f"{key}_data"
                if data_key in queryResults:
                    data_array = queryResults[data_key]
                    if isinstance(data_array, list) and len(data_array) > 0:
                        # Look for article number in first row (if not already found)
                        first_row = data_array[0]
                        if isinstance(first_row, dict):
                            # Check common article number fields (if not already found)
                            if not article_number:
                                for field in ["Artikelnummer", "Artikelkürzel", "article_number", "articleNumber"]:
                                    if field in first_row and first_row[field]:
                                        article_number = str(first_row[field])
                                        logger.info(f"Extracted article number from query data: {article_number}")
                                        break

                            # Always check article description (can enrich even if article_number already found)
                            if not article_description:
                                for field in ["Artikelbezeichnung", "Bezeichnung", "article_description", "description"]:
                                    if field in first_row and first_row[field]:
                                        article_description = str(first_row[field])
                                        logger.info(f"Extracted article description from query data: {article_description}")
                                        break

                            # Always check supplier (can enrich even if article_number already found)
                            if not supplier:
                                for field in ["Lieferant", "Supplier", "supplier"]:
                                    if field in first_row and first_row[field]:
                                        supplier = str(first_row[field])
                                        logger.info(f"Extracted supplier from query data: {supplier}")
                                        break

                            # If we found all needed info, we can stop
                            if article_number and article_description and supplier:
                                break

    # Check if current prompt is an explicit search request that should NOT use context
    # If user explicitly asks to search for something, prioritize that over previous messages
    explicit_search_patterns = [
        r"recherchier\s+(?:im\s+internet\s+)?nach\s+(.+)",
        r"suche\s+(?:im\s+internet\s+)?nach\s+(.+)",
        r"search\s+(?:the\s+internet\s+)?for\s+(.+)",
        r"find\s+(?:information\s+)?(?:about\s+)?(.+)",
        r"recherche\s+(?:sur\s+internet\s+)?(.+)"
    ]

    explicit_search_term = None
    for pattern in explicit_search_patterns:
        match = re.search(pattern, userPrompt, re.IGNORECASE)
        if match:
            explicit_search_term = match.group(1).strip()
            logger.info(f"Found explicit search term in prompt: '{explicit_search_term}'")
            break

    # 3. Extract from previous assistant messages (conversation history)
    # ONLY if there's no explicit search term (to avoid using old context for new searches)
    if not explicit_search_term and (not article_number or not article_description):
        for msg in reversed(workflowMessages[-10:]):
            if msg.role == "assistant":
                message_text = msg.message

                # Extract article number if not found yet
                if not article_number:
                    for pattern in article_patterns:
                        matches = re.findall(pattern, message_text)
                        if matches:
                            article_number = matches[0]
                            break

                # Extract article description if not found yet
                if not article_description:
                    description_patterns = [
                        r'Es handelt sich um\s+([^\.]+)',
                        r'It is a\s+([^\.]+)',
                        r'C\'est\s+([^\.]+)',
                        r'Bezeichnung:\s*([^\n]+)',
                        r'Description:\s*([^\n]+)',
                        r'Artikelbezeichnung:\s*([^\n]+)',
                        r'Artikelbezeichnung:\s*([^\n]+)'
                    ]
                    for pattern in description_patterns:
                        match = re.search(pattern, message_text, re.IGNORECASE)
                        if match:
                            article_description = match.group(1).strip()
                            break

                # Extract supplier if not found yet
                if not supplier:
                    supplier_patterns = [
                        r'von\s+([A-Z][A-Za-z\s]+(?:AG|GmbH|Ltd|Inc|Corp)?)',
                        r'from\s+([A-Z][A-Za-z\s]+(?:AG|GmbH|Ltd|Inc|Corp)?)',
                        r'Lieferant:\s*([^\n]+)',
                        r'Supplier:\s*([^\n]+)'
                    ]
                    for pattern in supplier_patterns:
                        match = re.search(pattern, message_text, re.IGNORECASE)
                        if match:
                            supplier = match.group(1).strip()
                            break

                # Stop if we found everything
                if article_number and article_description and supplier:
                    break

    # Build enriched search query
    query_parts = []

    # If we have an explicit search term, use it as the primary query
    if explicit_search_term:
        query_parts.append(explicit_search_term)
        logger.info(f"Using explicit search term as primary query: '{explicit_search_term}'")
    # If we have search intent but no product info, try to use the user prompt intelligently
    elif has_search_intent and not article_number and not article_description:
        # Try to extract meaningful parts from the prompt
        # Remove common search phrases and keep the product-related parts
        cleaned_prompt = userPrompt
        for phrase in ["recherchier", "recherche", "suche nach", "search for", "find", "informationen zu", "information about", "weitere informationen", "further information", "im internet", "the internet", "sur internet"]:
            cleaned_prompt = re.sub(phrase, "", cleaned_prompt, flags=re.IGNORECASE)
        cleaned_prompt = cleaned_prompt.strip()

        # Use cleaned prompt if it has meaningful content
        if cleaned_prompt and len(cleaned_prompt) > 2:
            query_parts.append(cleaned_prompt)

    # Add article description if found (but NOT if we have an explicit search term)
    if article_description and not explicit_search_term:
        query_parts.append(article_description)

    # Add article number if found (but NOT if we have an explicit search term)
    if article_number and not explicit_search_term:
        query_parts.append(article_number)

    # Add supplier if found (but NOT if we have an explicit search term)
    if supplier and not explicit_search_term:
        query_parts.append(supplier)

    # Extract certification information from prompt if present
    certification_terms = []
    if has_certification_intent:
        # Extract specific certification mentions
        cert_keywords = {
            "ul": "UL certification",
            "ce": "CE certification",
            "tüv": "TÜV certification",
            "vde": "VDE certification",
            "iec": "IEC certification",
            "iso": "ISO certification"
        }
        for cert_key, cert_term in cert_keywords.items():
            if cert_key in prompt_lower:
                certification_terms.append(cert_term)

        # If no specific certification found but certification intent detected, add generic term
        if not certification_terms:
            certification_terms.append("certification")

    # Add certification terms to query if found
    if certification_terms:
        query_parts.extend(certification_terms)

    # Add "Datenblatt" or "datasheet" if user requested it or if we have product info
    # But NOT if we have an explicit search term (user wants to search for something specific)
    if not explicit_search_term:
        if "datenblatt" in prompt_lower or "datasheet" in prompt_lower or "fiche technique" in prompt_lower:
            query_parts.append("Datenblatt")
        elif query_parts and (article_number or article_description):
            # If we have product info but no explicit request for datasheet, add it anyway
            query_parts.append("Datenblatt")

    # If we found product information or built a meaningful query, use it
    if query_parts:
        enriched_query = " ".join(query_parts)
        logger.info(f"Built enriched search query: '{enriched_query}' from context (original: '{userPrompt}')")
        return enriched_query
    else:
        # Fall back to original prompt, but try to clean it up
        logger.info(f"No product context found, using original prompt: '{userPrompt}'")
        return userPrompt


async def _convert_file_ids_to_document_references(
    services,
    file_ids: List[str]
) -> DocumentReferenceList:
    """
    Convert file IDs to DocumentReferenceList for use with ai.process.

    Args:
        services: Services instance
        file_ids: List of file IDs to convert

    Returns:
        DocumentReferenceList with docItem references
    """
    references = []

    # Get workflow to search for ChatDocuments
    workflow = services.workflow
    if not workflow:
        logger.error("Cannot convert file IDs to document references: workflow not set in services")
        return DocumentReferenceList(references=[])

    for file_id in file_ids:
        try:
            # Get file info to verify it exists
            file_info = services.chat.getFileInfo(file_id)
            if not file_info:
                logger.warning(f"File {file_id} not found, skipping")
                continue

            # Find ChatDocument that has this fileId
            document_id = None
            if workflow.messages:
                for message in workflow.messages:
                    if hasattr(message, 'documents') and message.documents:
                        for doc in message.documents:
                            if getattr(doc, 'fileId', None) == file_id:
                                document_id = getattr(doc, 'id', None)
                                break
                    if document_id:
                        break

            # Search database if not found in messages
            if not document_id:
                try:
                    from modules.interfaces.interfaceRbac import getRecordsetWithRBAC
                    documents = getRecordsetWithRBAC(
                        services.interfaceDbChat.db,
                        ChatDocument,
                        services.user,
                        recordFilter={"fileId": file_id},
                        mandateId=services.mandateId
                    )
                    if documents:
                        workflow_message_ids = {msg.id for msg in workflow.messages} if workflow.messages else set()
                        for doc in documents:
                            if doc.get("messageId") in workflow_message_ids:
                                document_id = doc.get("id")
                                break
                except Exception:
                    pass  # Fallback to fileId

            # Use ChatDocument ID if found, otherwise use fileId as fallback
            ref = DocumentItemReference(documentId=document_id if document_id else file_id)
            references.append(ref)
        except Exception as e:
            logger.error(f"Error converting fileId {file_id}: {e}", exc_info=True)

    logger.info(f"Converted {len(references)} file IDs to document references")
    return DocumentReferenceList(references=references)


def _format_query_results_as_lookup(query_data: Dict[str, List[Dict]]) -> str:
    """
    Format database query results as JSON lookup table for Excel matching.
    Converts query result data into structured JSON format: {Artikelnummer: {columns...}}

    Args:
        query_data: Dict with query_key -> list of row dicts (from connector with return_json=True)

    Returns:
        JSON string formatted as lookup table
    """
    lookup_table = {}

    for query_key, rows in query_data.items():
        if query_key == "error" or not rows:
            logger.warning(f"Skipping query key '{query_key}' - no rows or error")
            continue

        logger.info(f"Processing {len(rows)} rows from query '{query_key}'")

        for row in rows:
            if not isinstance(row, dict):
                logger.warning(f"Skipping non-dict row: {type(row)}")
                continue

            # Find Artikelnummer field (case-insensitive)
            artikelnummer = None
            for key in row.keys():
                if key.lower() in ['artikelnummer', 'artikel_nummer', 'art_nr', 'part_number']:
                    artikelnummer = str(row[key])
                    break

            if artikelnummer:
                lookup_table[artikelnummer] = row
            else:
                logger.warning(f"No Artikelnummer found in row with keys: {list(row.keys())}")

    logger.info(f"Generated lookup table with {len(lookup_table)} entries")
    if lookup_table:
        sample_keys = list(lookup_table.keys())[:3]
        logger.info(f"Sample Artikelnummern: {sample_keys}")
        if sample_keys:
            sample_entry = lookup_table[sample_keys[0]]
            logger.info(f"Sample entry keys: {list(sample_entry.keys())}")

    return json.dumps(lookup_table, ensure_ascii=False, indent=2)


async def _create_chat_document_from_action_document(
    services,
    action_document,
    message_id: str,
    workflow_id: str,
    round_number: int
) -> ChatDocument:
    """
    Create a ChatDocument from an ActionDocument by storing the file data.

    Args:
        services: Services instance
        action_document: ActionDocument from ai.process result
        message_id: ID of the message to attach to
        workflow_id: Workflow ID
        round_number: Round number

    Returns:
        ChatDocument instance
    """
    try:
        # Get file data (could be bytes or string)
        document_data = action_document.documentData

        # Convert to bytes if needed
        if isinstance(document_data, str):
            # Check if it's base64 encoded
            try:
                # Try to decode as base64 first
                file_bytes = base64.b64decode(document_data)
            except Exception:
                # Not base64, encode as UTF-8
                file_bytes = document_data.encode('utf-8')
        elif isinstance(document_data, bytes):
            file_bytes = document_data
        else:
            # Try to convert to bytes
            try:
                file_bytes = bytes(document_data)
            except Exception:
                # Last resort: convert to string then encode
                file_bytes = str(document_data).encode('utf-8')

        # Get MIME type (default to Excel)
        mime_type = action_document.mimeType or "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"

        # Get file name
        file_name = action_document.documentName or "data_export.xlsx"
        # Ensure it has .xlsx extension
        if not file_name.lower().endswith('.xlsx'):
            # Remove any existing extension and add .xlsx
            file_name = file_name.rsplit('.', 1)[0] + '.xlsx'

        # Store file using component interface
        file_item = services.interfaceDbComponent.createFile(
            name=file_name,
            mimeType=mime_type,
            content=file_bytes
        )

        # Store file data
        success = services.interfaceDbComponent.createFileData(file_item.id, file_bytes)
        if not success:
            logger.warning(f"Failed to store file data for {file_item.id}, but continuing...")

        # Create ChatDocument
        chat_document = ChatDocument(
            id=str(uuid.uuid4()),
            messageId=message_id,
            fileId=file_item.id,
            fileName=file_name,
            fileSize=len(file_bytes),
            mimeType=mime_type,
            roundNumber=round_number,
            taskNumber=0,
            actionNumber=0
        )

        logger.info(f"Created ChatDocument {chat_document.id} from ActionDocument {file_name} (size: {len(file_bytes)} bytes)")
        return chat_document

    except Exception as e:
        logger.error(f"Error creating ChatDocument from ActionDocument: {e}", exc_info=True)
        raise


async def _processChatbotMessage(
    services,
    workflowId: str,
    userInput: UserInputRequest,
    userMessageId: str
):
    """
    Process chatbot message in background.
    Analyzes user input and generates list of queries, then streams them back.
    """
    event_manager = get_event_manager()

    try:
        interfaceDbChat = services.interfaceDbChat

        # Reload workflow to get current messages
        workflow = interfaceDbChat.getWorkflow(workflowId)
        if not workflow:
            logger.error(f"Workflow {workflowId} not found during processing")
            await event_manager.emit_event(
                context_id=workflowId,
                event_type="error",
                data={"error": f"Workflow {workflowId} nicht gefunden"},
                event_category="workflow",
                message=f"Workflow {workflowId} nicht gefunden",
                step="error"
            )
            return

        # Check if workflow was stopped before starting
        if await _check_workflow_stopped(interfaceDbChat, workflowId):
            logger.info(f"Workflow {workflowId} was stopped, aborting processing")
            return

        # Build conversation context from history
        # Only include context if the new question might need it (e.g., references to previous messages)
        context = ""
        is_resumed = len(workflow.messages) > 0 if workflow.messages else False

        # Check if the current question might need context (references like "it", "that", "previous", "earlier", etc.)
        needs_context = False
        if is_resumed:
            current_prompt_lower = userInput.prompt.lower()
            context_keywords = ["es", "das", "dieses", "jenes", "vorherige", "frühere", "vorhin", "oben",
                               "it", "that", "this", "previous", "earlier", "above", "mentioned", "before",
                               "davor", "dazu", "darauf", "damit", "davon"]
            needs_context = any(keyword in current_prompt_lower for keyword in context_keywords)

        if is_resumed and needs_context:
            recent_messages = workflow.messages[-3:]  # Reduced from 5 to 3 for less distraction
            context = "\n\n⚠️ WICHTIG - KONTEXT NUR FÜR REFERENZ ⚠️\n"
            context += "Die folgende Konversation ist nur als Referenz, falls die aktuelle Frage darauf Bezug nimmt.\n"
            context += "FOKUSSIERE AUF DIE AKTUELLE FRAGE OBEN!\n\n"
            context += "Vorherige Konversation:\n"
            for msg in recent_messages:
                if msg.role == "user":
                    context += f"User: {msg.message}\n"
                elif msg.role == "assistant":
                    context += f"Assistant: {msg.message}\n"

        await services.ai.ensureAiObjectsInitialized()

        # Step 1: Analyze user input to generate queries
        logger.info("Analyzing user input to generate queries...")
        await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Analysiere Benutzeranfrage...")

        analysisPrompt = get_initial_analysis_prompt(userInput.prompt, context, is_resumed)

        # AI call for analysis
        method_ai = MethodAi(services)
        analysis_result = await method_ai.process({
            "aiPrompt": analysisPrompt,
            "documentList": None,
            "resultType": "json",
            "simpleMode": True
        })

        # Extract content from ActionResult
        analysis_content = None
        if analysis_result.success and analysis_result.documents:
            analysis_content = analysis_result.documents[0].documentData
            if isinstance(analysis_content, bytes):
                analysis_content = analysis_content.decode('utf-8')

        if not analysis_content:
            logger.warning("Analysis failed, using fallback")
            analysis = {}
        else:
            analysis = _extractJsonFromResponse(analysis_content)

        # Extract analysis results
        needsDatabaseQuery = analysis.get("needsDatabaseQuery", False) if analysis else False
        needsWebResearch = analysis.get("needsWebResearch", False) if analysis else False
        sql_queries = analysis.get("sqlQueries", [])
        # Support legacy single query format for backward compatibility
        if not sql_queries and analysis.get("sqlQuery"):
            sql_queries = [{
                "query": analysis.get("sqlQuery", ""),
                "purpose": "Database query",
                "table": "Unknown"
            }]
        reasoning = analysis.get("reasoning", "")

        # Check if we need web research for certifications
        user_prompt_lower = userInput.prompt.lower()
        certification_keywords = ["ul", "ce", "tüv", "vde", "iec", "iso", "zertifiziert", "certified", "certification"]
        has_certification = any(keyword in user_prompt_lower for keyword in certification_keywords)
        if has_certification and not needsWebResearch:
            logger.warning("Certification detected but needsWebResearch is false - forcing to true")
            needsWebResearch = True

        # Limit query count to maximum 5 for performance
        max_queries_allowed = 5
        if needsDatabaseQuery and len(sql_queries) > max_queries_allowed:
            logger.info(f"Limiting queries from {len(sql_queries)} to {max_queries_allowed} for performance")
            sql_queries = sql_queries[:max_queries_allowed]

        logger.info(f"Analysis: DB={needsDatabaseQuery}, Web={needsWebResearch}, SQL queries={len(sql_queries)}")

        # Build initial enriched web research query if needed (for logging, will be rebuilt after DB queries)
        enriched_web_query = None
        if needsWebResearch:
            enriched_web_query = _buildWebResearchQuery(userInput.prompt, workflow.messages)

        # Build list of queries to stream back
        queries = []

        if needsDatabaseQuery and sql_queries:
            for i, sql_query_info in enumerate(sql_queries, 1):
                queries.append({
                    "type": "database",
                    "query": sql_query_info.get("query", ""),
                    "purpose": sql_query_info.get("purpose", f"Query {i}"),
                    "table": sql_query_info.get("table", "Unknown"),
                    "reasoning": reasoning
                })

        if needsWebResearch:
            queries.append({
                "type": "web",
                "query": enriched_web_query or userInput.prompt,
                "reasoning": reasoning
            })

        # Format queries as log text
        log_lines = []
        if queries:
            db_queries = [q for q in queries if q["type"] == "database"]
            log_lines.append(f"Generiert: {len(db_queries)} Datenbankabfrage(n) und {len(queries) - len(db_queries)} Web-Recherche(n)\n\n")
            for i, q in enumerate(queries, 1):
                if q["type"] == "database":
                    log_lines.append(f"{i}. Datenbankabfrage ({q.get('table', 'Unknown')}):\n")
                    log_lines.append(f"   Zweck: {q.get('purpose', 'Nicht angegeben')}\n")
                    log_lines.append(f"```sql\n{q['query']}\n```\n")
                elif q["type"] == "web":
                    log_lines.append(f"{i}. Web-Recherche:\n")
                    log_lines.append(f"   Suchbegriff: {q['query']}\n")
                if q.get("reasoning"):
                    log_lines.append(f"   Begründung: {q['reasoning']}\n")
                log_lines.append("\n")
        else:
            log_lines.append("Keine Abfragen erforderlich.")

        log_text = "".join(log_lines)

        # Stream queries as a log
        await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, log_text)

        # Check if workflow was stopped before executing queries
        if await _check_workflow_stopped(interfaceDbChat, workflowId):
            logger.info(f"Workflow {workflowId} was stopped, aborting query execution")
            return

        # Step 2: Execute queries
        queryResults = {}
        webResearchResults = ""

        # Start web research early in parallel with DB queries if needed
        web_research_task = None
        if needsWebResearch:
            # Start with basic query (will enrich later with DB results if available)
            basic_web_query = _buildWebResearchQuery(userInput.prompt, workflow.messages, None)
            logger.info(f"Starting web research in parallel with DB queries using basic query: '{basic_web_query}'")
            await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Suche im Internet nach Informationen...")

            async def perform_web_research():
                """Perform web research and return results."""
                try:
                    researchResult = await services.web.performWebResearch(
                        prompt=basic_web_query,
                        urls=[],
                        country=None,
                        language=userInput.userLanguage or "de",
                        researchDepth="general",
                        operationId=None
                    )
                    return json.dumps(researchResult, ensure_ascii=False, indent=2) if isinstance(researchResult, dict) else str(researchResult)
                except Exception as e:
                    logger.error(f"Web research failed: {e}", exc_info=True)
                    return f"Web research error: {str(e)}"

            web_research_task = asyncio.create_task(perform_web_research())

        # Execute database queries in parallel
        if needsDatabaseQuery and sql_queries:
            logger.info(f"Executing {len(sql_queries)} database queries in parallel...")
            await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Führe {len(sql_queries)} Datenbankabfrage(n) parallel aus...")

            try:
                queryResults = await _execute_queries_parallel(sql_queries)

                # Log results summary
                successful_queries = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
                failed_queries = [k for k in queryResults.keys() if k.endswith("_error")]

                if successful_queries:
                    total_rows = sum(len(queryResults.get(f"{k}_data", [])) for k in successful_queries)
                    logger.info(f"Successfully executed {len(successful_queries)} query/queries, total {total_rows} rows")
                    await _emit_log_and_event(
                        interfaceDbChat,
                        workflowId,
                        event_manager,
                        f"Abgeschlossen: {len(successful_queries)} Abfrage(n) erfolgreich, {total_rows} Ergebnis{'e' if total_rows != 1 else ''} gefunden"
                    )

                if failed_queries:
                    logger.warning(f"{len(failed_queries)} query/queries failed")
                    await _emit_log_and_event(
                        interfaceDbChat,
                        workflowId,
                        event_manager,
                        f"Warnung: {len(failed_queries)} Abfrage(n) fehlgeschlagen",
                        log_type="warning"
                    )

                # Check if we got empty results and need to retry with alternative strategies
                # Robust calculation: check all successful queries for empty data
                total_rows = 0
                queries_with_results = 0
                queries_with_empty_results = 0

                if successful_queries:
                    for query_key in successful_queries:
                        data_key = f"{query_key}_data"
                        if data_key in queryResults:
                            row_count = len(queryResults[data_key])
                            total_rows += row_count
                            if row_count > 0:
                                queries_with_results += 1
                            else:
                                queries_with_empty_results += 1
                        else:
                            # Query succeeded but no data key - treat as empty
                            queries_with_empty_results += 1
                            logger.debug(f"Query {query_key} succeeded but has no _data key")
                else:
                    # No successful queries at all
                    logger.debug("No successful queries found")

                # Also check if we have any query results at all
                has_any_results = total_rows > 0

                # Debug logging
                logger.info(f"Query results analysis: total_rows={total_rows}, successful_queries={len(successful_queries)}, "
                          f"queries_with_results={queries_with_results}, queries_with_empty_results={queries_with_empty_results}, "
                          f"failed_queries={len(failed_queries)}")

                # Trigger retry if: no results AND we have database queries AND we executed at least one query
                # Also trigger if all successful queries returned empty results
                should_retry = (
                    not has_any_results and
                    needsDatabaseQuery and
                    len(sql_queries) > 0 and
                    (len(successful_queries) > 0 or len(failed_queries) == 0)  # Either we have successful queries or no failures (queries executed but empty)
                )

                # Iterative retry loop: try up to 2 times with different strategies
                max_empty_retry_attempts = 2
                empty_retry_attempt = 0
                original_sql_queries_count = len(sql_queries)
                previous_retry_rows = 0

                while should_retry and empty_retry_attempt < max_empty_retry_attempts:
                    empty_retry_attempt += 1
                    logger.info(f"No results found (attempt {empty_retry_attempt}/{max_empty_retry_attempts}), retrying with alternative query strategies...")
                    await _emit_log_and_event(
                        interfaceDbChat,
                        workflowId,
                        event_manager,
                        f"Keine Ergebnisse gefunden ({len(successful_queries)} erfolgreiche Abfrage(n), {total_rows} Zeilen). Versuch {empty_retry_attempt}/{max_empty_retry_attempts}: Versuche alternative Abfrage-Strategien...",
                        log_type="info"
                    )

                    # Retry analysis with empty results context - create NEW analysis with alternative strategies
                    from modules.features.chatbot.chatbotConstants import get_empty_results_retry_instructions

                    # Build retry prompt with progressively different strategies
                    empty_count = len(sql_queries)
                    empty_results_instructions = get_empty_results_retry_instructions(empty_count)

                    retry_context = f"{context}\n\n"
                    if empty_retry_attempt == 1:
                        retry_context += "⚠️⚠️⚠️ WICHTIG - ALTERNATIVE STRATEGIEN ERFORDERLICH ⚠️⚠️⚠️\n"
                        retry_context += "Strategie: Breitere Suche, weniger Filter\n"
                    elif empty_retry_attempt == 2:
                        retry_context += "⚠️⚠️⚠️ KRITISCH - IMMER NOCH KEINE ERGEBNISSE ⚠️⚠️⚠️\n"
                        retry_context += "Strategie: Entferne spezifische Filter komplett, verwende nur Hauptkriterien\n"
                    else:
                        retry_context += "⚠️⚠️⚠️ LETZTER VERSUCH - MINIMALE FILTER ⚠️⚠️⚠️\n"
                        retry_context += "Strategie: Nur Hauptbegriffe, keine spezifischen Filter\n"

                    retry_context += f"Die bisherigen {len(sql_queries)} Abfragen haben 0 Zeilen zurückgegeben.\n"
                    retry_context += f"{empty_results_instructions}\n"
                    retry_context += f"Dies ist bereits Versuch {empty_retry_attempt} von {max_empty_retry_attempts}!\n"
                    retry_context += "Erstelle JETZT MAXIMAL 5 alternative SQL-Queries mit komplett anderen Strategien (für Performance):\n"

                    if empty_retry_attempt == 1:
                        retry_context += "- Breitere Suche ohne zu spezifische Filter\n"
                        retry_context += "- Suche ohne Zertifizierungsfilter (falls Zertifizierung nicht in DB)\n"
                        retry_context += "- Suche nur nach Hauptkriterien (z.B. nur Netzgerät + 10A, ohne einphasig)\n"
                        retry_context += "- Suche nach alternativen Begriffen (Netzteil statt Netzgerät, etc.)\n"
                        retry_context += "- COUNT-Queries für Statistik\n"
                        retry_context += "- Fallback-Queries mit minimalen Filtern\n"
                    elif empty_retry_attempt == 2:
                        retry_context += "- ENTFERNE alle Zertifizierungsfilter komplett\n"
                        retry_context += "- ENTFERNE Phasen-Filter (einphasig/dreiphasig)\n"
                        retry_context += "- Suche NUR nach: Netzgerät/Netzteil + Ampere-Angaben\n"
                        retry_context += "- Verwende breitere Ampere-Patterns (5A, 6A, 8A, 10A, 12A, 15A, 20A, etc.)\n"
                        retry_context += "- Suche auch in Keywords-Feld\n"
                    else:
                        retry_context += "- MINIMALE Filter: Nur 'Netzgerät' ODER 'Netzteil' ODER 'Power Supply'\n"
                        retry_context += "- KEINE spezifischen Filter auf Ampere, Phasen oder Zertifizierung\n"
                        retry_context += "- COUNT-Query: Wie viele Netzgeräte gibt es insgesamt?\n"
                        retry_context += "- Suche nach ALLEN verfügbaren Netzgeräten\n"

                    # Retry analysis is always part of an ongoing chat, so use is_resumed=True
                    retry_analysis_prompt = get_initial_analysis_prompt(userInput.prompt, retry_context, is_resumed=True)

                    # AI call for retry analysis
                    retry_analysis_result = await method_ai.process({
                        "aiPrompt": retry_analysis_prompt,
                        "documentList": None,
                        "resultType": "json",
                        "simpleMode": True
                    })

                    # Extract retry analysis
                    retry_analysis_content = None
                    if retry_analysis_result.success and retry_analysis_result.documents:
                        retry_analysis_content = retry_analysis_result.documents[0].documentData
                        if isinstance(retry_analysis_content, bytes):
                            retry_analysis_content = retry_analysis_content.decode('utf-8')

                    if retry_analysis_content:
                        retry_analysis = _extractJsonFromResponse(retry_analysis_content)
                        if retry_analysis and retry_analysis.get("needsDatabaseQuery", False):
                            retry_sql_queries = retry_analysis.get("sqlQueries", [])
                            # Limit to maximum 5 queries for performance
                            if len(retry_sql_queries) > 5:
                                logger.info(f"Limiting retry queries from {len(retry_sql_queries)} to 5 for performance")
                                retry_sql_queries = retry_sql_queries[:5]
                            if retry_sql_queries:
                                logger.info(f"Executing {len(retry_sql_queries)} retry queries (attempt {empty_retry_attempt}) with alternative strategies...")
                                await _emit_log_and_event(
                                    interfaceDbChat,
                                    workflowId,
                                    event_manager,
                                    f"Führe {len(retry_sql_queries)} alternative Abfrage(n) mit anderen Strategien aus (Versuch {empty_retry_attempt})...",
                                    log_type="info"
                                )

                                # Execute retry queries
                                try:
                                    retry_results = await _execute_queries_parallel(retry_sql_queries)

                                    # Merge retry results into main results (renumber to continue sequence)
                                    base_query_num = len(sql_queries)
                                    for key, value in retry_results.items():
                                        if key.startswith("query_"):
                                            # Extract query number from retry result
                                            try:
                                                query_num = int(key.split("_")[1])
                                                new_query_num = base_query_num + query_num
                                                new_key = f"query_{new_query_num}"

                                                if not key.endswith("_data") and not key.endswith("_error"):
                                                    queryResults[new_key] = value
                                                    if f"{key}_data" in retry_results:
                                                        queryResults[f"{new_key}_data"] = retry_results[f"{key}_data"]
                                                elif key.endswith("_error"):
                                                    queryResults[f"{new_key}_error"] = value
                                            except (ValueError, IndexError):
                                                # Fallback if parsing fails
                                                new_key = f"query_{base_query_num + 1}"
                                                if not key.endswith("_data") and not key.endswith("_error"):
                                                    queryResults[new_key] = value

                                    # Recalculate results after retry
                                    retry_successful = [k for k in retry_results.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
                                    retry_rows = sum(len(retry_results.get(f"{k}_data", [])) for k in retry_successful) if retry_successful else 0

                                    # Update successful_queries list to include retry results
                                    successful_queries = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
                                    total_rows = sum(len(queryResults.get(f"{k}_data", [])) for k in successful_queries)

                                    logger.info(f"Retry attempt {empty_retry_attempt}: Found {retry_rows} rows from {len(retry_successful)} queries. Total: {total_rows} rows from {len(successful_queries)} queries")

                                    if retry_rows > 0:
                                        # Success! Found results
                                        await _emit_log_and_event(
                                            interfaceDbChat,
                                            workflowId,
                                            event_manager,
                                            f"Alternative Abfragen erfolgreich: {len(retry_successful)} Abfrage(n) mit {retry_rows} Ergebnis{'en' if retry_rows != 1 else ''} gefunden",
                                            log_type="info"
                                        )
                                        should_retry = False  # Stop retry loop, we found results
                                        break
                                    elif retry_rows > previous_retry_rows:
                                        # Made some progress (found more rows than before) - continue
                                        previous_retry_rows = retry_rows
                                        await _emit_log_and_event(
                                            interfaceDbChat,
                                            workflowId,
                                            event_manager,
                                            f"Versuch {empty_retry_attempt}: Fortschritt erzielt ({retry_rows} Zeilen gefunden). Versuche weitere Strategie...",
                                            log_type="info"
                                        )
                                    else:
                                        # No progress made - stop retrying
                                        await _emit_log_and_event(
                                            interfaceDbChat,
                                            workflowId,
                                            event_manager,
                                            f"Versuch {empty_retry_attempt}: Keine Ergebnisse gefunden. Beende Retry-Versuche.",
                                            log_type="warning"
                                        )
                                        should_retry = False  # Stop retry loop, no progress
                                        break
                                except Exception as retry_error:
                                    logger.error(f"Error executing retry queries (attempt {empty_retry_attempt}): {retry_error}", exc_info=True)
                                    # Continue to next attempt even on error

                    # Check if we should continue retrying (already handled in break conditions above)
                    if empty_retry_attempt >= max_empty_retry_attempts:
                        logger.warning(f"Reached maximum empty retry attempts ({max_empty_retry_attempts}), stopping retry loop")
                        await _emit_log_and_event(
                            interfaceDbChat,
                            workflowId,
                            event_manager,
                            f"⚠️ Maximale Anzahl Retry-Versuche ({max_empty_retry_attempts}) erreicht. Keine Ergebnisse gefunden.",
                            log_type="warning"
                        )
                        should_retry = False
            except Exception as e:
                logger.error(f"Error executing parallel queries: {e}")
                queryResults["error"] = f"Error executing queries: {str(e)}"
                await _emit_log_and_event(
                    interfaceDbChat,
                    workflowId,
                    event_manager,
                    "Fehler bei parallelen Datenbankabfragen",
                    log_type="error"
                )

        # Wait for web research to complete (if it was started in parallel)
        if web_research_task:
            try:
                webResearchResults = await web_research_task
                if webResearchResults and not webResearchResults.startswith("Web research error"):
                    logger.info("Web research completed successfully")
                    await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche abgeschlossen")
                else:
                    logger.warning("Web research completed with errors")
                    await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche fehlgeschlagen", log_type="warning")
            except Exception as e:
                logger.error(f"Error waiting for web research: {e}", exc_info=True)
                webResearchResults = f"Web research error: {str(e)}"
                await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche fehlgeschlagen", log_type="warning")

        # Check if workflow was stopped before generating final answer
        if await _check_workflow_stopped(interfaceDbChat, workflowId):
            logger.info(f"Workflow {workflowId} was stopped, aborting final answer generation")
            return

        # Step 3: Generate final answer using AI
        logger.info("Generating final answer with AI...")
        await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Formuliere finale Antwort...")

        # Build prompt for final answer
        system_prompt = get_final_answer_system_prompt()

        # Build answer context with query results using efficient list-based building
        answer_context_parts = [f"User question: {userInput.prompt}{context}\n"]

        # Add database results - organize by query with metadata
        db_results_parts = []
        if queryResults:
            successful_results = []
            error_results = []

            # Extract query metadata from sql_queries if available
            query_metadata = {}
            if sql_queries:
                for i, q_info in enumerate(sql_queries, 1):
                    query_metadata[f"query_{i}"] = {
                        "purpose": q_info.get("purpose", f"Query {i}"),
                        "table": q_info.get("table", "Unknown")
                    }

            # Organize results by query number
            query_numbers = set()
            for key in queryResults.keys():
                if key.startswith("query_") and not key.endswith("_data"):
                    # Extract query number (e.g., "query_1" -> 1)
                    try:
                        num = int(key.split("_")[1])
                        query_numbers.add(num)
                    except (ValueError, IndexError):
                        pass

            # Build results with metadata
            for query_num in sorted(query_numbers):
                query_key = f"query_{query_num}"
                error_key = f"{query_key}_error"

                if error_key in queryResults:
                    error_msg = queryResults[error_key]
                    metadata = query_metadata.get(query_key, {})
                    purpose = metadata.get("purpose", f"Query {query_num}")
                    table = metadata.get("table", "Unknown")
                    error_results.append(f"Abfrage {query_num} ({table} - {purpose}): {error_msg}")
                elif query_key in queryResults:
                    result_text = queryResults[query_key]
                    metadata = query_metadata.get(query_key, {})
                    purpose = metadata.get("purpose", f"Query {query_num}")
                    table = metadata.get("table", "Unknown")
                    successful_results.append(f"=== Abfrage {query_num}: {purpose} (Tabelle: {table}) ===\n{result_text}")

            # Handle general error if present
            if "error" in queryResults:
                error_results.append(f"Allgemeiner Fehler: {queryResults['error']}")

            # Build db_results_part efficiently
            if successful_results:
                db_results_parts.append("\n\nDATENBANK-ERGEBNISSE:\n")
                db_results_parts.append("\n\n".join(successful_results))
                answer_context_parts.append("DATENBANK-ERGEBNISSE:\n")
                answer_context_parts.append("\n\n".join(successful_results))
                answer_context_parts.append("\n")

            if error_results:
                db_results_parts.append("\n\nDATENBANK-FEHLER:\n")
                db_results_parts.append("\n".join(error_results))
                answer_context_parts.append("DATENBANK-FEHLER:\n")
                answer_context_parts.append("\n".join(error_results))
                answer_context_parts.append("\n")

        db_results_part = "".join(db_results_parts)

        # Add web research results
        web_results_part = ""
        # Check if web research results exist and are valid (not empty and not an error)
        if webResearchResults and webResearchResults.strip() and not webResearchResults.startswith("Web research error"):
            web_results_part = f"\n\nINTERNET-RECHERCHE:\n{webResearchResults}"
            answer_context_parts.append(f"INTERNET-RECHERCHE:\n{webResearchResults}\n")

        # Join answer context efficiently
        answerContext = "".join(answer_context_parts)

        # Check if we have any actual data
        successful_query_keys = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
        has_query_results = bool(successful_query_keys)
        error_query_keys = [k for k in queryResults.keys() if k.endswith("_error")]
        has_only_errors = bool(error_query_keys and not successful_query_keys)

        # Count total number of articles found across all queries
        total_articles_found = 0
        if successful_query_keys:
            for query_key in successful_query_keys:
                data_key = f"{query_key}_data"
                if data_key in queryResults:
                    article_count = len(queryResults[data_key])
                    total_articles_found += article_count
                    logger.info(f"Query {query_key} returned {article_count} articles")

        logger.info(f"Total articles found across all queries: {total_articles_found}")

        # Add explicit article count information to prompt (using efficient list building)
        if total_articles_found > 0:
            article_count_parts = [
                "\n\n⚠️⚠️⚠️ WICHTIG - ARTIKELANZAHL ⚠️⚠️⚠️\n",
                f"In den DATENBANK-ERGEBNISSEN oben wurden INSGESAMT {total_articles_found} Artikel gefunden.\n",
                f"DU MUSST ALLE {total_articles_found} Artikel in deiner Antwort zeigen!\n"
            ]
            if total_articles_found <= 20:
                article_count_parts.append(f"Zeige ALLE {total_articles_found} Artikel in einer Tabelle.\n")
            else:
                article_count_parts.append(f"Zeige die ersten 20 Artikel in einer Tabelle + Hinweis auf weitere {total_articles_found - 20} Artikel.\n")
            article_count_parts.extend([
                f"❌ VERBOTEN: Nur einen Artikel zu zeigen, wenn {total_articles_found} gefunden wurden!\n",
                f"✓ OBLIGATORISCH: Zeige ALLE {total_articles_found} Artikel!\n"
            ])
            article_count_info = "".join(article_count_parts)

            if db_results_part:
                db_results_part = article_count_info + db_results_part
            else:
                db_results_part = article_count_info

        # Add warning messages if needed (using efficient list building)
        warning_parts = []
        if not has_query_results and needsDatabaseQuery:
            warning_parts.append("\n\nWICHTIG: Es wurden KEINE Datenbank-Ergebnisse gefunden. Die Datenbankabfrage wurde nicht ausgeführt oder hat keine Ergebnisse zurückgegeben.")

        if has_only_errors:
            warning_parts.extend([
                "\n\n⚠️⚠️⚠️ KRITISCH - ALLE QUERIES FEHLGESCHLAGEN ⚠️⚠️⚠️\n",
                "ALLE Datenbankabfragen sind fehlgeschlagen. Es gibt KEINE gültigen Daten aus der Datenbank.\n",
                "DU DARFST KEINE DATEN ERFINDEN! Schreibe stattdessen: 'Es wurden keine Artikel gefunden' oder 'Die Datenbankabfrage ist fehlgeschlagen'."
            ])

        if warning_parts:
            db_results_part = db_results_part + "".join(warning_parts) if db_results_part else "".join(warning_parts)

        # Determine if we have database results, web results, or both
        has_db_results = bool(db_results_part and db_results_part.strip())
        has_web_results = bool(web_results_part and web_results_part.strip())

        # Use the function from constants file to build the prompt
        answer_prompt = get_final_answer_prompt_with_results(
            userInput.prompt,
            context,
            db_results_part,
            web_results_part,
            is_resumed,
            has_db_results,
            has_web_results
        )

        answerRequest = AiCallRequest(
            prompt=answer_prompt,
            context=answerContext if (queryResults or webResearchResults) else None,
            options=AiCallOptions(
                resultFormat="txt",
                operationType=OperationTypeEnum.DATA_ANALYSE,
                processingMode=ProcessingModeEnum.DETAILED
            )
        )

        answerResponse = await services.ai.callAi(answerRequest)

        # Check for errors in AI response
        if answerResponse.errorCount > 0:
            logger.error(f"AI call failed with errorCount={answerResponse.errorCount}: {answerResponse.content}")
            finalAnswer = "Entschuldigung, ich konnte Ihre Anfrage derzeit nicht verarbeiten. Bitte versuchen Sie es später erneut."
        else:
            finalAnswer = answerResponse.content
            logger.info("Final answer generated")

        # Check if workflow was stopped during AI call - if so, don't store the message
        if await _check_workflow_stopped(interfaceDbChat, workflowId):
            logger.info(f"Workflow {workflowId} was stopped during final answer generation, not storing message")
            return

        # Reload workflow to get current message count
        workflow = interfaceDbChat.getWorkflow(workflowId)

        # Double-check workflow wasn't stopped while we were reloading
        if workflow and workflow.status == "stopped":
            logger.info(f"Workflow {workflowId} was stopped, not storing final message")
            return

        # Create assistant message with final answer
        message_id = f"msg_{uuid.uuid4()}"
        assistantMessageData = {
            "id": message_id,
            "workflowId": workflowId,
            "parentMessageId": userMessageId,
            "message": finalAnswer,
            "role": "assistant",
            "status": "last",
            "sequenceNr": len(workflow.messages) + 1,
            "publishedAt": getUtcTimestamp(),
            "success": answerResponse.errorCount == 0 if answerResponse else True,
            "roundNumber": workflow.currentRound,
            "taskNumber": 0,
            "actionNumber": 0
        }

        assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
        logger.info(f"Stored assistant message with final answer: {assistantMessage.id}")

        # Emit message event for streaming (exact chatData format)
        message_timestamp = parseTimestamp(assistantMessage.publishedAt, default=getUtcTimestamp())
        await event_manager.emit_event(
            context_id=workflowId,
            event_type="chatdata",
            data={
                "type": "message",
                "createdAt": message_timestamp,
                "item": assistantMessage.dict()
            },
            event_category="chat"
        )

        # Update workflow status to completed (only if not stopped)
        if not await _check_workflow_stopped(interfaceDbChat, workflowId):
            interfaceDbChat.updateWorkflow(workflowId, {
                "status": "completed",
                "lastActivity": getUtcTimestamp()
            })
        else:
            logger.info(f"Workflow {workflowId} was stopped, not updating status to completed")

        logger.info(f"Chatbot processing completed for workflow {workflowId}, generated {len(queries)} queries and final answer")

        # Emit completion event only if workflow wasn't stopped
        if not await _check_workflow_stopped(interfaceDbChat, workflowId):
            await event_manager.emit_event(
                context_id=workflowId,
                event_type="complete",
                data={"workflowId": workflowId},
                event_category="workflow",
                message="Chatbot-Verarbeitung abgeschlossen",
                step="complete"
            )

        # Schedule cleanup with longer delay to allow stream to stay open
        await event_manager.cleanup(workflowId, delay=300.0)  # 5 minutes delay

    except Exception as e:
        logger.error(f"Error processing chatbot message: {str(e)}", exc_info=True)

        # Check if workflow was stopped - if so, don't store error message
        if await _check_workflow_stopped(interfaceDbChat, workflowId):
            logger.info(f"Workflow {workflowId} was stopped, not storing error message")
            return

        # Store error message
        try:
            # Reload workflow to get current message count
            workflow = interfaceDbChat.getWorkflow(workflowId)

            # Double-check workflow wasn't stopped while we were reloading
            if workflow and workflow.status == "stopped":
                logger.info(f"Workflow {workflowId} was stopped, not storing error message")
                return

            errorMessageData = {
                "id": f"msg_{uuid.uuid4()}",
                "workflowId": workflowId,
                "parentMessageId": userMessageId,
                "message": f"Sorry, I encountered an error: {str(e)}",
                "role": "assistant",
                "status": "last",
                "sequenceNr": len(workflow.messages) + 1,
                "publishedAt": getUtcTimestamp(),
                "success": False,
                "roundNumber": workflow.currentRound if workflow else 1,
                "taskNumber": 0,
                "actionNumber": 0
            }
            errorMessage = interfaceDbChat.createMessage(errorMessageData)

            # Emit message event for streaming (exact chatData format)
            message_timestamp = parseTimestamp(errorMessage.publishedAt, default=getUtcTimestamp())
            await event_manager.emit_event(
                context_id=workflowId,
                event_type="chatdata",
                data={
                    "type": "message",
                    "createdAt": message_timestamp,
                    "item": errorMessage.dict()
                },
                event_category="chat"
            )

            # Update workflow status to error (only if not stopped)
            if not await _check_workflow_stopped(interfaceDbChat, workflowId):
                interfaceDbChat.updateWorkflow(workflowId, {
                    "status": "error",
                    "lastActivity": getUtcTimestamp()
                })
            else:
                logger.info(f"Workflow {workflowId} was stopped, not updating status to error")

            # Schedule cleanup
            await event_manager.cleanup(workflowId)
        except Exception as storeError:
            logger.error(f"Error storing error message: {storeError}")