gateway/modules/features/chatbot/mainChatbot.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Simple chatbot feature - direct AI center implementation.
Bypasses complex workflow engine for fast, simple chatbot responses.
"""

import logging
import json
import uuid
import asyncio
import re
import datetime
import base64
from typing import Optional, Dict, Any, List

from modules.datamodels.datamodelChat import ChatWorkflow, UserInputRequest, ChatDocument, WorkflowModeEnum
from modules.datamodels.datamodelUam import User
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, ProcessingModeEnum
from modules.shared.timeUtils import getUtcTimestamp, parseTimestamp
from modules.services import getInterface as getServices
from modules.connectors.connectorPreprocessor import PreprocessorConnector
from modules.features.chatbot.eventManager import get_event_manager
from modules.datamodels.datamodelDocref import DocumentReferenceList, DocumentItemReference
from modules.workflows.methods.methodAi.methodAi import MethodAi

logger = logging.getLogger(__name__)


def _extractJsonFromResponse(content: str) -> Optional[dict]:
    """Extract JSON from AI response, handling markdown code blocks."""
    # Try direct JSON parse first
    try:
        return json.loads(content.strip())
    except json.JSONDecodeError:
        pass

    # Try to extract JSON from markdown code blocks
    json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', content, re.DOTALL)
    if json_match:
        try:
            return json.loads(json_match.group(1))
        except json.JSONDecodeError:
            pass

    # Try to find JSON object in the text
    json_match = re.search(r'\{.*\}', content, re.DOTALL)
    if json_match:
        try:
            return json.loads(json_match.group(0))
        except json.JSONDecodeError:
            pass

    return None


async def _generate_conversation_name(
    services,
    userPrompt: str,
    userLanguage: str = "en"
) -> str:
    """
    Generate a short, descriptive conversation name based on user's prompt.

    Args:
        services: Services instance with AI access
        userPrompt: The user's input prompt
        userLanguage: User's preferred language (for prompt localization)

    Returns:
        Short conversation name (max 60 characters)
    """
    try:
        truncated_prompt = userPrompt[:200] if len(userPrompt) > 200 else userPrompt

        name_prompt = f"""Create a professional conversation title in THE SAME LANGUAGE as the user's question.

Question: "{truncated_prompt}"

Rules:
- Title MUST be in the same language as the question (German→German, French→French, English→English)
- Max 60 characters, no punctuation (?, !, .)
- Professional and concise
- Respond ONLY with the title, nothing else"""

        await services.ai.ensureAiObjectsInitialized()

        nameRequest = AiCallRequest(
            prompt=name_prompt,
            options=AiCallOptions(
                resultFormat="txt",
                operationType=OperationTypeEnum.DATA_GENERATE,
                processingMode=ProcessingModeEnum.DETAILED,
                temperature=0.7
            )
        )

        nameResponse = await services.ai.callAi(nameRequest)
        generated_name = nameResponse.content.strip()

        # Extract first line and clean up
        generated_name = generated_name.split('\n')[0].strip()
        generated_name = re.sub(r'^(Title|Titel|Titre|Name|Name:):\s*', '', generated_name, flags=re.IGNORECASE)
        generated_name = re.sub(r'^["\']|["\']$', '', generated_name)
        generated_name = re.sub(r'[?!.]+$', '', generated_name)  # Remove trailing punctuation

        # Apply title case
        if generated_name:
            words = generated_name.split()
            capitalized_words = []
            for word in words:
                if word.isupper() and len(word) > 1:
                    capitalized_words.append(word)  # Keep acronyms
                else:
                    capitalized_words.append(word.capitalize())
            generated_name = " ".join(capitalized_words).strip()

        # Validate and truncate if needed
        if not generated_name or len(generated_name) < 3:
            if userLanguage == "de":
                generated_name = "Chatbot Konversation"
            elif userLanguage == "fr":
                generated_name = "Conversation Chatbot"
            else:
                generated_name = "Chatbot Conversation"

        if len(generated_name) > 60:
            truncated = generated_name[:57]
            last_space = truncated.rfind(' ')
            generated_name = truncated[:last_space] + "..." if last_space > 30 else truncated + "..."

        logger.info(f"Generated conversation name: '{generated_name}'")
        return generated_name

    except Exception as e:
        logger.error(f"Error generating conversation name: {e}", exc_info=True)
        if userLanguage == "de":
            return "Chatbot Konversation"
        elif userLanguage == "fr":
            return "Conversation Chatbot"
        else:
            return "Chatbot Conversation"


async def chatProcess(
    currentUser: User,
    userInput: UserInputRequest,
    workflowId: Optional[str] = None
) -> ChatWorkflow:
    """
    Simple chatbot processing - direct AI center implementation.

    Flow:
    1. Create or load workflow
    2. Store user message
    3. AI analyzes: determine if DB query/web research needed
    4. Execute database query if needed
    5. Execute web research if needed
    6. Generate final answer
    7. Store assistant message
    8. Return workflow

    Args:
        currentUser: Current user
        userInput: User input request
        workflowId: Optional workflow ID to continue existing conversation

    Returns:
        ChatWorkflow instance
    """
    try:
        # Get services
        services = getServices(currentUser, None)
        interfaceDbChat = services.interfaceDbChat

        # Get event manager and create queue if needed
        event_manager = get_event_manager()

        # Create or load workflow
        if workflowId:
            workflow = interfaceDbChat.getWorkflow(workflowId)
            if not workflow:
                raise ValueError(f"Workflow {workflowId} not found")

            # Resume workflow: increment round number
            new_round = workflow.currentRound + 1
            interfaceDbChat.updateWorkflow(workflowId, {
                "status": "running",
                "currentRound": new_round,
                "lastActivity": getUtcTimestamp()
            })
            workflow = interfaceDbChat.getWorkflow(workflowId)
            logger.info(f"Resumed workflow {workflowId}, round incremented to {new_round}")

            # Create event queue if it doesn't exist (for streaming)
            if not event_manager.has_queue(workflowId):
                event_manager.create_queue(workflowId)
        else:
            # Generate conversation name based on user's prompt
            conversation_name = await _generate_conversation_name(
                services,
                userInput.prompt,
                userInput.userLanguage
            )

            # Create new workflow
            workflowData = {
                "id": str(uuid.uuid4()),
                "mandateId": currentUser.mandateId,
                "status": "running",
                "name": conversation_name,
                "currentRound": 1,
                "currentTask": 0,
                "currentAction": 0,
                "totalTasks": 0,
                "totalActions": 0,
                "workflowMode": WorkflowModeEnum.WORKFLOW_CHATBOT.value,  # Use Chatbot mode for chatbot conversations
                "startedAt": getUtcTimestamp(),
                "lastActivity": getUtcTimestamp()
            }
            workflow = interfaceDbChat.createWorkflow(workflowData)
            logger.info(f"Created new chatbot workflow: {workflow.id} with name: {conversation_name}")

            # Create event queue for new workflow (for streaming)
            event_manager.create_queue(workflow.id)

        # Reload workflow to get current message count
        workflow = interfaceDbChat.getWorkflow(workflow.id)

        # Process uploaded files and create ChatDocuments
        user_documents = []
        if userInput.listFileId and len(userInput.listFileId) > 0:
            logger.info(f"Processing {len(userInput.listFileId)} uploaded file(s) for user message")
            for fileId in userInput.listFileId:
                try:
                    # Get file info from chat service
                    fileInfo = services.chat.getFileInfo(fileId)
                    if not fileInfo:
                        logger.warning(f"No file info found for file ID {fileId}")
                        continue

                    originalFileName = fileInfo.get("fileName", "unknown")
                    originalMimeType = fileInfo.get("mimeType", "application/octet-stream")
                    fileSizeToUse = fileInfo.get("size", 0)

                    # Create ChatDocument for the file
                    document = ChatDocument(
                        id=str(uuid.uuid4()),
                        messageId="",  # Will be set when message is created
                        fileId=fileId,
                        fileName=originalFileName,
                        fileSize=fileSizeToUse,
                        mimeType=originalMimeType,
                        roundNumber=workflow.currentRound,
                        taskNumber=0,
                        actionNumber=0
                    )
                    user_documents.append(document)
                    logger.info(f"Created ChatDocument for file {fileId} -> {originalFileName}")
                except Exception as e:
                    logger.error(f"Error processing file ID {fileId}: {e}", exc_info=True)

        # Store user message
        userMessageData = {
            "id": f"msg_{uuid.uuid4()}",
            "workflowId": workflow.id,
            "message": userInput.prompt,
            "role": "user",
            "status": "first" if workflowId is None else "step",
            "sequenceNr": len(workflow.messages) + 1,
            "publishedAt": getUtcTimestamp(),
            "roundNumber": workflow.currentRound,
            "taskNumber": 0,
            "actionNumber": 0
        }

        # Add documents to message if any
        if user_documents:
            # Update messageId in all documents
            for doc in user_documents:
                doc.messageId = userMessageData["id"]
            userMessageData["documents"] = [doc.dict() for doc in user_documents]
            userMessageData["documentsLabel"] = "Uploaded Files"
            logger.info(f"Attaching {len(user_documents)} document(s) to user message")

        userMessage = interfaceDbChat.createMessage(userMessageData)
        logger.info(f"Stored user message: {userMessage.id} with {len(user_documents)} document(s)")

        # Emit message event for streaming (exact chatData format)
        event_manager = get_event_manager()
        message_timestamp = parseTimestamp(userMessage.publishedAt, default=getUtcTimestamp())
        await event_manager.emit_event(
            workflow.id,
            "chatdata",
            "New message",
            "message",
            {
                "type": "message",
                "createdAt": message_timestamp,
                "item": userMessage.dict()
            }
        )

        # Update workflow status
        interfaceDbChat.updateWorkflow(workflow.id, {
            "status": "running",
            "lastActivity": getUtcTimestamp()
        })

        # Process in background (async)
        asyncio.create_task(_processChatbotMessage(
            services,
            workflow.id,
            userInput,
            userMessage.id
        ))

        # Reload workflow to include new message
        workflow = interfaceDbChat.getWorkflow(workflow.id)
        return workflow

    except Exception as e:
        logger.error(f"Error in chatProcess: {str(e)}", exc_info=True)
        raise


async def _check_workflow_status(interfaceDbChat, workflowId: str, event_manager) -> bool:
    """
    Check if workflow is stopped. If stopped, emit stopped event and return True.

    Returns:
        True if workflow is stopped, False otherwise
    """
    workflow = interfaceDbChat.getWorkflow(workflowId)
    if workflow and workflow.status == "stopped":
        await event_manager.emit_event(
            workflowId,
            "stopped",
            "Workflow stopped",
            "stopped"
        )
        logger.info(f"Workflow {workflowId} was stopped, exiting processing")
        return True
    return False


async def _convert_file_ids_to_document_references(
    services,
    file_ids: List[str]
) -> DocumentReferenceList:
    """
    Convert file IDs to DocumentReferenceList for use with ai.process.

    Args:
        services: Services instance
        file_ids: List of file IDs to convert

    Returns:
        DocumentReferenceList with docItem references
    """
    references = []

    # Get workflow to search for ChatDocuments
    workflow = services.workflow
    if not workflow:
        logger.error("Cannot convert file IDs to document references: workflow not set in services")
        return DocumentReferenceList(references=[])

    for file_id in file_ids:
        try:
            # Get file info to verify it exists
            file_info = services.chat.getFileInfo(file_id)
            if not file_info:
                logger.warning(f"File {file_id} not found, skipping")
                continue

            # Find ChatDocument that has this fileId
            document_id = None
            if workflow.messages:
                for message in workflow.messages:
                    if hasattr(message, 'documents') and message.documents:
                        for doc in message.documents:
                            if getattr(doc, 'fileId', None) == file_id:
                                document_id = getattr(doc, 'id', None)
                                break
                    if document_id:
                        break

            # Search database if not found in messages
            if not document_id:
                try:
                    from modules.datamodels.datamodelChat import ChatDocument
                    from modules.shared.databaseUtils import getRecordsetWithRBAC
                    documents = getRecordsetWithRBAC(
                        services.interfaceDbChat.db,
                        ChatDocument,
                        services.currentUser,
                        recordFilter={"fileId": file_id}
                    )
                    if documents:
                        workflow_message_ids = {msg.id for msg in workflow.messages} if workflow.messages else set()
                        for doc in documents:
                            if doc.get("messageId") in workflow_message_ids:
                                document_id = doc.get("id")
                                break
                except Exception:
                    pass  # Fallback to fileId

            # Use ChatDocument ID if found, otherwise use fileId as fallback
            ref = DocumentItemReference(documentId=document_id if document_id else file_id)
            references.append(ref)
        except Exception as e:
            logger.error(f"Error converting fileId {file_id}: {e}", exc_info=True)

    logger.info(f"Converted {len(references)} file IDs to document references")
    return DocumentReferenceList(references=references)


def _format_query_results_as_lookup(query_data: Dict[str, List[Dict]]) -> str:
    """
    Format database query results as JSON lookup table for Excel matching.
    Converts query result data into structured JSON format: {Artikelnummer: {columns...}}

    Args:
        query_data: Dict with query_key -> list of row dicts (from connector with return_json=True)

    Returns:
        JSON string formatted as lookup table
    """
    lookup_table = {}

    for query_key, rows in query_data.items():
        if query_key == "error" or not rows:
            logger.warning(f"Skipping query key '{query_key}' - no rows or error")
            continue

        logger.info(f"Processing {len(rows)} rows from query '{query_key}'")

        for row in rows:
            if not isinstance(row, dict):
                logger.warning(f"Skipping non-dict row: {type(row)}")
                continue

            # Find Artikelnummer field (case-insensitive)
            artikelnummer = None
            for key in row.keys():
                if key.lower() in ['artikelnummer', 'artikel_nummer', 'art_nr', 'part_number']:
                    artikelnummer = str(row[key])
                    break

            if artikelnummer:
                lookup_table[artikelnummer] = row
            else:
                logger.warning(f"No Artikelnummer found in row with keys: {list(row.keys())}")

    logger.info(f"Generated lookup table with {len(lookup_table)} entries")
    if lookup_table:
        sample_keys = list(lookup_table.keys())[:3]
        logger.info(f"Sample Artikelnummern: {sample_keys}")
        if sample_keys:
            sample_entry = lookup_table[sample_keys[0]]
            logger.info(f"Sample entry keys: {list(sample_entry.keys())}")

    return json.dumps(lookup_table, ensure_ascii=False, indent=2)


async def _create_chat_document_from_action_document(
    services,
    action_document,
    message_id: str,
    workflow_id: str,
    round_number: int
) -> ChatDocument:
    """
    Create a ChatDocument from an ActionDocument by storing the file data.

    Args:
        services: Services instance
        action_document: ActionDocument from ai.process result
        message_id: ID of the message to attach to
        workflow_id: Workflow ID
        round_number: Round number

    Returns:
        ChatDocument instance
    """
    try:
        # Get file data (could be bytes or string)
        document_data = action_document.documentData

        # Convert to bytes if needed
        if isinstance(document_data, str):
            # Check if it's base64 encoded
            try:
                # Try to decode as base64 first
                file_bytes = base64.b64decode(document_data)
            except Exception:
                # Not base64, encode as UTF-8
                file_bytes = document_data.encode('utf-8')
        elif isinstance(document_data, bytes):
            file_bytes = document_data
        else:
            # Try to convert to bytes
            try:
                file_bytes = bytes(document_data)
            except Exception:
                # Last resort: convert to string then encode
                file_bytes = str(document_data).encode('utf-8')

        # Get MIME type (default to Excel)
        mime_type = action_document.mimeType or "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"

        # Get file name
        file_name = action_document.documentName or "data_export.xlsx"
        # Ensure it has .xlsx extension
        if not file_name.lower().endswith('.xlsx'):
            # Remove any existing extension and add .xlsx
            file_name = file_name.rsplit('.', 1)[0] + '.xlsx'

        # Store file using component interface
        file_item = services.interfaceDbComponent.createFile(
            name=file_name,
            mimeType=mime_type,
            content=file_bytes
        )

        # Store file data
        success = services.interfaceDbComponent.createFileData(file_item.id, file_bytes)
        if not success:
            logger.warning(f"Failed to store file data for {file_item.id}, but continuing...")

        # Create ChatDocument
        chat_document = ChatDocument(
            id=str(uuid.uuid4()),
            messageId=message_id,
            fileId=file_item.id,
            fileName=file_name,
            fileSize=len(file_bytes),
            mimeType=mime_type,
            roundNumber=round_number,
            taskNumber=0,
            actionNumber=0
        )

        logger.info(f"Created ChatDocument {chat_document.id} from ActionDocument {file_name} (size: {len(file_bytes)} bytes)")
        return chat_document

    except Exception as e:
        logger.error(f"Error creating ChatDocument from ActionDocument: {e}", exc_info=True)
        raise


async def _processChatbotMessage(
    services,
    workflowId: str,
    userInput: UserInputRequest,
    userMessageId: str
):
    """
    Process chatbot message in background.
    Simplified 4-step flow: Analysis → Query → Excel → Answer
    """
    event_manager = get_event_manager()

    try:
        interfaceDbChat = services.interfaceDbChat

        # Reload workflow to get current messages
        workflow = interfaceDbChat.getWorkflow(workflowId)
        if not workflow:
            logger.error(f"Workflow {workflowId} not found during processing")
            await event_manager.emit_event(workflowId, "error", f"Workflow {workflowId} nicht gefunden", "error")
            return

        if await _check_workflow_status(interfaceDbChat, workflowId, event_manager):
            return

        # Build conversation context from history
        context = ""
        if workflow.messages:
            recent_messages = workflow.messages[-5:]
            context = "\n\nPrevious conversation:\n"
            for msg in recent_messages:
                if msg.role == "user":
                    context += f"User: {msg.message}\n"
                elif msg.role == "assistant":
                    context += f"Assistant: {msg.message}\n"

        await services.ai.ensureAiObjectsInitialized()
        current_date = datetime.datetime.now().strftime("%d.%m.%Y")

        if await _check_workflow_status(interfaceDbChat, workflowId, event_manager):
            return

        # Step 1: Unified Analysis
        logger.info("Step 1: Analyzing user input and attached files...")
        await event_manager.emit_event(workflowId, "status", "Analysiere Benutzeranfrage und angehängte Dateien...", "analysis")

        # Prepare document references if files are attached
        has_uploaded_files = bool(userInput.listFileId and len(userInput.listFileId) > 0)
        document_references = None

        if has_uploaded_files:
            workflow = interfaceDbChat.getWorkflow(workflowId)
            if workflow:
                services.workflow = workflow
            document_references = await _convert_file_ids_to_document_references(services, userInput.listFileId)
            logger.info(f"Prepared {len(document_references.references)} document references for analysis")

        # Build analysis prompt
        file_context = "\n\nIMPORTANT - ATTACHED FILES:\nIf files are attached, read them completely. For Excel files, extract ALL article identifiers (Artikelnummer) from the file and include them directly in the SQL query using WHERE Artikelnummer IN ('ART1', 'ART2', ...). DO NOT reference ExcelFile table - it doesn't exist!\n" if has_uploaded_files else ""

        analysisPrompt = f"""Heute ist der {current_date}.

Du bist ein Chatbot der Althaus AG mit Zugriff auf eine SQL-Datenbank.

DATENBANK-SCHEMA:
- Artikel: I_ID, Artikelnummer, Artikelbezeichnung, Lieferant, etc.
- Einkaufspreis: m_Artikel, EP_CHF
- Lagerplatz_Artikel: R_ARTIKEL, S_IST_BESTAND, etc.
- Beziehungen: Artikel.I_ID = Einkaufspreis.m_Artikel = Lagerplatz_Artikel.R_ARTIKEL

SQL-HINWEISE:
- IMMER doppelte Anführungszeichen für Spaltennamen: "Artikelnummer", "S_IST_BESTAND"
- S_IST_BESTAND kann "Unbekannt" sein (TEXT), prüfe mit WHERE l."S_IST_BESTAND" != 'Unbekannt'
- Verwende Tabellenaliase: a für Artikel, e für Einkaufspreis, l für Lagerplatz_Artikel
- WICHTIG: Wenn Excel-Dateien angehängt sind, extrahiere Artikelnummern aus der Datei und verwende sie direkt im SQL: WHERE a."Artikelnummer" IN ('ART1', 'ART2', 'ART3')
- VERBOTEN: Verwende NICHT "SELECT ... FROM ExcelFile" - diese Tabelle existiert nicht!

{file_context}User question: {userInput.prompt}{context}

Return ONLY valid JSON:
{{
  "needsDatabaseQuery": boolean,
  "needsWebResearch": boolean,
  "needsExcelFile": boolean,
  "excelAction": "create" | "update" | null,
  "excelFileName": string | null,
  "sqlQuery": string (ready-to-execute SQL with double quotes for column names. If Excel file attached, extract Artikelnummern and use WHERE a."Artikelnummer" IN ('ART1', 'ART2', ...)),
  "requestedColumns": array of strings (columns to add for Excel updates, e.g. ["Lieferant", "Lagerbestand"]),
  "reasoning": string
}}"""

        # Single AI call for analysis
        method_ai = MethodAi(services)
        analysis_result = await method_ai.process({
            "aiPrompt": analysisPrompt,
            "documentList": document_references if has_uploaded_files else None,
            "resultType": "json",
            "simpleMode": True
        })

        # Extract content from ActionResult (in simple mode, content is in first document's documentData)
        analysis_content = None
        if analysis_result.success and analysis_result.documents:
            analysis_content = analysis_result.documents[0].documentData
            if isinstance(analysis_content, bytes):
                analysis_content = analysis_content.decode('utf-8')

        if not analysis_content:
            logger.warning("Analysis failed, using fallback")
            analysis = {}
        else:
            analysis = _extractJsonFromResponse(analysis_content)

        # Extract analysis results
        needsDatabaseQuery = analysis.get("needsDatabaseQuery", False) if analysis else False
        needsWebResearch = analysis.get("needsWebResearch", False) if analysis else False
        needsExcelFile = analysis.get("needsExcelFile", False) if analysis else False
        excelAction = analysis.get("excelAction")
        excelFileName = analysis.get("excelFileName")
        sql_query = analysis.get("sqlQuery", "")
        requested_columns = analysis.get("requestedColumns", [])

        # Ensure database query if Excel update is needed
        if needsExcelFile and excelAction == "update" and not needsDatabaseQuery:
            needsDatabaseQuery = True

        logger.info(f"Analysis: DB={needsDatabaseQuery}, Excel={needsExcelFile}, SQL={'present' if sql_query else 'missing'}")

        if await _check_workflow_status(interfaceDbChat, workflowId, event_manager):
            return

        # Step 2: Execute SQL Query
        queryResults = {}
        queryData = {}
        excel_documents = []

        if needsDatabaseQuery and sql_query:
            logger.info("Step 2: Executing SQL query...")
            await event_manager.emit_event(workflowId, "status", "Führe Datenbankabfrage aus...", "query_execution")

            try:
                connector = PreprocessorConnector()
                result_dict = await connector.executeQuery(sql_query, return_json=True)
                await connector.close()

                if result_dict and not result_dict.get("text", "").startswith(("Error:", "Query failed:")):
                    queryResults["main"] = result_dict.get("text", "")
                    queryData["main"] = result_dict.get("data", [])
                    logger.info(f"Query executed successfully, returned {len(queryData.get('main', []))} rows")
                else:
                    error_text = result_dict.get("text", "Query failed") if result_dict else "Query failed: No response"
                    queryResults["error"] = error_text
                    logger.error(f"Query failed: {error_text}")

                    # If query failed and we need Excel update, try to extract article numbers from Excel and retry
                    if needsExcelFile and excelAction == "update" and has_uploaded_files and "ExcelFile" in error_text:
                        logger.warning("Query failed due to ExcelFile table reference. Attempting to extract article numbers from Excel file and retry...")
                        # The AI should have extracted article numbers, but if query failed, we can't proceed
                        # Log the issue for debugging
                        logger.error("Cannot proceed with Excel update: SQL query references non-existent ExcelFile table. AI should extract article numbers directly.")
            except Exception as e:
                logger.error(f"Error executing query: {e}")
                queryResults["error"] = f"Error executing query: {str(e)}"

        # Step 2.5: Web Research (simplified)
        webResearchResults = ""
        if needsWebResearch:
            logger.info("Performing web research...")
            try:
                researchResult = await services.web.performWebResearch(
                    prompt=userInput.prompt,
                    urls=[],
                    country=None,
                    language=userInput.userLanguage or "de",
                    researchDepth="general",
                    operationId=None
                )
                webResearchResults = json.dumps(researchResult, ensure_ascii=False, indent=2) if isinstance(researchResult, dict) else str(researchResult)
            except Exception as e:
                logger.error(f"Web research failed: {e}")
                webResearchResults = f"Web research error: {str(e)}"

        if await _check_workflow_status(interfaceDbChat, workflowId, event_manager):
            return

        # Build answer context
        answerContext = f"User question: {userInput.prompt}{context}\n\n"
        if queryResults:
            for key, result in queryResults.items():
                if key != "error":
                    answerContext += f"Database results:\n{result}\n\n"
        if webResearchResults:
            answerContext += f"Web research:\n{webResearchResults}\n\n"

        # Step 3: Process Excel File
        excel_error = None
        if needsExcelFile:
            logger.info(f"Step 3: Processing Excel file (action: {excelAction})...")
            await event_manager.emit_event(
                workflowId,
                "status",
                f"Erstelle Excel-Datei..." if excelAction == "create" else "Aktualisiere Excel-Datei...",
                "excel_processing"
            )

            try:
                # Check if we have query data for updates
                if excelAction == "update":
                    if not queryData or "main" not in queryData or not queryData["main"]:
                        error_msg = f"No query data available for Excel update. queryData keys: {list(queryData.keys()) if queryData else 'None'}"
                        if queryResults.get("error"):
                            error_msg += f" Query error: {queryResults['error']}"
                        logger.error(error_msg)
                        excel_error = f"Die Datenbankabfrage ist fehlgeschlagen: {queryResults.get('error', 'Unbekannter Fehler')}. Die Excel-Datei konnte nicht aktualisiert werden."
                        raise ValueError(excel_error)

                workflow = interfaceDbChat.getWorkflow(workflowId)
                if workflow:
                    services.workflow = workflow

                method_ai = MethodAi(services)

                # Prepare document references for updates
                if excelAction == "update" and userInput.listFileId:
                    if not document_references or not document_references.references:
                        document_references = await _convert_file_ids_to_document_references(services, userInput.listFileId)
                else:
                    from modules.datamodels.datamodelDocref import DocumentReferenceList
                    document_references = DocumentReferenceList(references=[])

                # Build Excel prompt
                if excelAction == "update":
                    # Format query data as JSON lookup table
                    lookup_table_json = _format_query_results_as_lookup(queryData)
                    lookup_table_dict = json.loads(lookup_table_json)
                    logger.info(f"Generated lookup table with {len(lookup_table_dict)} entries")

                    # Log sample entries for debugging
                    if lookup_table_dict:
                        sample_keys = list(lookup_table_dict.keys())[:3]
                        logger.info(f"Sample Artikelnummern in lookup table: {sample_keys}")
                        if sample_keys:
                            sample_entry = lookup_table_dict[sample_keys[0]]
                            logger.info(f"Sample entry for '{sample_keys[0]}': {list(sample_entry.keys())}")
                            logger.info(f"Sample entry values: {sample_entry}")

                    # Determine columns to add
                    column_names = requested_columns if requested_columns else []
                    if not column_names:
                        prompt_lower = userInput.prompt.lower()
                        if "einkaufspreis" in prompt_lower or "purchase price" in prompt_lower or "kaufpreis" in prompt_lower:
                            column_names = ["Einkaufspreis"]
                        elif "lieferant" in prompt_lower or "supplier" in prompt_lower:
                            column_names = ["Lieferant"]
                        elif "lagerbestand" in prompt_lower or "stock" in prompt_lower:
                            column_names = ["Lagerbestand"]
                        else:
                            # Default: try to infer from available columns in lookup table
                            if lookup_table_dict:
                                sample_entry = next(iter(lookup_table_dict.values()))
                                available_columns = list(sample_entry.keys())
                                logger.info(f"Available columns in lookup table: {available_columns}")
                                # Prefer common columns
                                if "Einkaufspreis" in available_columns:
                                    column_names = ["Einkaufspreis"]
                                elif "Lieferant" in available_columns:
                                    column_names = ["Lieferant"]
                                else:
                                    column_names = available_columns[:1] if available_columns else ["Lieferant"]
                            else:
                                column_names = ["Lieferant"]

                    logger.info(f"Adding columns to Excel: {column_names}")
                    column_name_str = ", ".join(column_names)

                    # Build a more explicit prompt with clear instructions and the lookup table
                    # Include the lookup table data directly in the prompt so it's always available
                    excel_prompt = f"""You are updating an Excel file. CRITICAL TASK:

Add new column(s) "{column_name_str}" to the attached Excel file by matching Artikelnummer values.

STEP-BY-STEP PROCESS (MUST FOLLOW EXACTLY):
1. Read the attached Excel file completely - all sheets, all rows, all columns.
2. Find the column containing "Artikelnummer" (may be named "Artikelnummer", "Artikel Nummer", "Art-Nr", "Artikelnummer", etc.)
3. For EACH ROW in the Excel file:
   a. Read the Artikelnummer value from that row (e.g., "LED001", "12345", etc.)
   b. Look up that EXACT Artikelnummer in the DATABASE LOOKUP TABLE below (case-sensitive match)
   c. If the Artikelnummer is found in the lookup table:
      - Extract the value(s) for column(s) "{column_name_str}" from the matching entry
      - Write the value(s) to the new column(s) in that row
   d. If the Artikelnummer is NOT found in the lookup table:
      - Leave the new column(s) empty for that row
4. Preserve ALL existing data, formatting, formulas, and structure exactly as-is.
5. Return ONLY the complete modified Excel file (.xlsx format). Do NOT include any text or explanations.

=== DATABASE LOOKUP TABLE (JSON) ===
Use this table to match Artikelnummer and extract values. Format: {{"Artikelnummer": {{"column": "value", ...}}}}

{lookup_table_json}

=== EXAMPLE MATCHING ===
- Excel row has Artikelnummer "LED001" in column "Artikelnummer"
- Search for "LED001" in the lookup table above
- Find entry: {{"LED001": {{"Lieferant": "Phoenix Contact", "Einkaufspreis": 12.50, ...}}}}
- Extract "{column_name_str}" value from that entry
- Write the extracted value to the new "{column_name_str}" column in that Excel row

CRITICAL REQUIREMENTS:
- Match EXACTLY by Artikelnummer (case-sensitive, exact string match)
- Preserve ALL existing columns, rows, formatting, and formulas
- Only add the new column(s), do NOT modify existing data
- Return ONLY the Excel file binary data, no explanations"""
                else:
                    excel_prompt = f"""Create Excel file (.xlsx) from database results:

{answerContext if queryResults else "No database results available."}

User request: {userInput.prompt}

Create well-structured spreadsheet with appropriate headers and formatting."""

                # Use simpleMode=True for Excel updates to ensure lookup table is directly accessible
                # simpleMode=False goes through complex document generation pipeline which may lose context
                excel_result = await method_ai.process({
                    "aiPrompt": excel_prompt,
                    "documentList": document_references if excelAction == "update" else None,
                    "resultType": "xlsx",
                    "simpleMode": True  # Use simple mode for direct Excel updates with lookup table
                })

                if excel_result.success and excel_result.documents:
                    for action_doc in excel_result.documents:
                        try:
                            chat_doc = await _create_chat_document_from_action_document(
                                services, action_doc, "", workflowId, workflow.currentRound
                            )
                            if chat_doc:
                                excel_documents.append(chat_doc)
                        except Exception as e:
                            logger.error(f"Error creating ChatDocument: {e}", exc_info=True)
                    logger.info(f"Excel processing complete: {len(excel_documents)} documents")
                else:
                    excel_error = f"Excel-Verarbeitung fehlgeschlagen: {excel_result.error if excel_result else 'Unbekannter Fehler'}"
                    logger.warning(excel_error)
            except Exception as e:
                excel_error = str(e) if not excel_error else excel_error
                logger.error(f"Error processing Excel: {e}", exc_info=True)

        if await _check_workflow_status(interfaceDbChat, workflowId, event_manager):
            return

        # Step 4: Generate Final Answer
        logger.info("Step 4: Generating final answer...")
        await event_manager.emit_event(workflowId, "status", "Formuliere finale Antwort...", "answer_generation")

        excel_context = ""
        if needsExcelFile:
            if excel_documents:
                file_names = [doc.fileName for doc in excel_documents]
                action_text = "erstellt" if excelAction == "create" else "aktualisiert"
                file_name_str = file_names[0] if len(file_names) == 1 else f"{len(file_names)} Dateien"
                excel_context = f"\n\nWICHTIG - EXCEL-DATEI {action_text.upper()}:\nDie Excel-Datei '{file_name_str}' wurde erfolgreich {action_text} und ist als Anhang verfügbar. Erwähne dies am Ende deiner Antwort."
            elif excel_error:
                excel_context = f"\n\nFEHLER - EXCEL-VERARBEITUNG:\n{excel_error}\nErkläre dem Nutzer, warum die Excel-Datei nicht aktualisiert werden konnte."

        # Build answer context parts (avoid backslashes in f-string expressions)
        db_results_part = f"\nDATENBANK-ERGEBNISSE:\n{answerContext}" if queryResults and not queryResults.get("error") else ""
        if queryResults.get("error"):
            db_results_part = f"\nDATENBANK-FEHLER:\n{queryResults['error']}"
        web_results_part = f"\nINTERNET-RECHERCHE:\n{webResearchResults}" if webResearchResults else ""

        answerPrompt = f"""Heute ist der {current_date}.

Du bist ein Chatbot der Althaus AG. Antworte auf Deutsch (kein ß, immer ss).

Antworte auf: {userInput.prompt}{context}
{db_results_part}{web_results_part}{excel_context}

WICHTIG:
- Klare, strukturierte Antwort
- Zahlen mit Tausender-Trennzeichen (7'411)
- Markdown-Tabellen für Daten (max 20 Zeilen)
- Artikelnummern als Link: [ARTIKELNUMMER](/details/ARTIKELNUMMER)
- Wenn Excel-Datei erstellt wurde, bestätige dies explizit am Ende
- Wenn Excel-Verarbeitung fehlgeschlagen ist, erkläre dem Nutzer den Fehler klar"""

        answerRequest = AiCallRequest(
            prompt=answerPrompt,
            context=answerContext if (queryResults or webResearchResults) else None,
            options=AiCallOptions(
                resultFormat="txt",
                operationType=OperationTypeEnum.DATA_ANALYSE,
                processingMode=ProcessingModeEnum.BASIC
            )
        )

        answerResponse = await services.ai.callAi(answerRequest)
        finalAnswer = answerResponse.content

        logger.info("Final answer generated")

        if await _check_workflow_status(interfaceDbChat, workflowId, event_manager):
            return

        # Step 5: Store assistant message
        # Reload workflow to get updated message count
        workflow = interfaceDbChat.getWorkflow(workflowId)

        # Prepare message data with Excel documents if any
        message_id = f"msg_{uuid.uuid4()}"
        assistantMessageData = {
            "id": message_id,
            "workflowId": workflowId,
            "parentMessageId": userMessageId,
            "message": finalAnswer,
            "role": "assistant",
            "status": "last",
            "sequenceNr": len(workflow.messages) + 1,
            "publishedAt": getUtcTimestamp(),
            "success": True,
            "roundNumber": workflow.currentRound,
            "taskNumber": 0,
            "actionNumber": 0
        }

        # Add Excel documents if any were created
        if excel_documents:
            # Update messageId in all Excel documents
            for excel_doc in excel_documents:
                excel_doc.messageId = message_id

            # Convert ChatDocuments to dict format
            try:
                documents_dict = [doc.model_dump() for doc in excel_documents]
            except Exception:
                documents_dict = [doc.dict() for doc in excel_documents]

            assistantMessageData["documents"] = documents_dict
            assistantMessageData["documentsLabel"] = "Excel Files"

        assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
        logger.info(f"Stored assistant message: {assistantMessage.id}")

        # Emit message event for streaming (exact chatData format)
        message_timestamp = parseTimestamp(assistantMessage.publishedAt, default=getUtcTimestamp())
        await event_manager.emit_event(
            workflowId,
            "chatdata",
            "New message",
            "message",
            {
                "type": "message",
                "createdAt": message_timestamp,
                "item": assistantMessage.dict()
            }
        )

        # Update workflow status to completed
        interfaceDbChat.updateWorkflow(workflowId, {
            "status": "completed",
            "lastActivity": getUtcTimestamp()
        })

        logger.info(f"Chatbot processing completed for workflow {workflowId}")

        # Emit completion event
        await event_manager.emit_event(
            workflowId,
            "complete",
            "Chatbot-Verarbeitung abgeschlossen",
            "complete",
            {"workflowId": workflowId}
        )

        # Schedule cleanup
        await event_manager.cleanup(workflowId)

    except Exception as e:
        logger.error(f"Error processing chatbot message: {str(e)}", exc_info=True)

        # Store error message
        try:
            # Reload workflow to get current message count
            workflow = interfaceDbChat.getWorkflow(workflowId)

            errorMessageData = {
                "id": f"msg_{uuid.uuid4()}",
                "workflowId": workflowId,
                "parentMessageId": userMessageId,
                "message": f"Sorry, I encountered an error: {str(e)}",
                "role": "assistant",
                "status": "last",
                "sequenceNr": len(workflow.messages) + 1,
                "publishedAt": getUtcTimestamp(),
                "success": False,
                "roundNumber": workflow.currentRound if workflow else 1,
                "taskNumber": 0,
                "actionNumber": 0
            }
            errorMessage = interfaceDbChat.createMessage(errorMessageData)

            # Emit message event for streaming (exact chatData format)
            message_timestamp = parseTimestamp(errorMessage.publishedAt, default=getUtcTimestamp())
            await event_manager.emit_event(
                workflowId,
                "chatdata",
                "New message",
                "message",
                {
                    "type": "message",
                    "createdAt": message_timestamp,
                    "item": errorMessage.dict()
                }
            )

            # Update workflow status to error
            interfaceDbChat.updateWorkflow(workflowId, {
                "status": "error",
                "lastActivity": getUtcTimestamp()
            })

            # Schedule cleanup
            await event_manager.cleanup(workflowId)
        except Exception as storeError:
            logger.error(f"Error storing error message: {storeError}")