# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Simple chatbot feature - basic implementation. User input is processed by AI to create list of needed queries. Those queries get streamed back. """ import logging import json import uuid import asyncio import re from typing import Optional, Dict, Any, List from modules.datamodels.datamodelChat import ChatWorkflow, UserInputRequest, WorkflowModeEnum, ChatLog from modules.datamodels.datamodelUam import User from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, ProcessingModeEnum from modules.shared.timeUtils import getUtcTimestamp, parseTimestamp from modules.services import getInterface as getServices from modules.features.chatbot.eventManager import get_event_manager from modules.workflows.methods.methodAi.methodAi import MethodAi from modules.connectors.connectorPreprocessor import PreprocessorConnector from modules.features.chatbot.chatbotConstants import ( get_initial_analysis_prompt, generate_conversation_name, get_final_answer_system_prompt ) logger = logging.getLogger(__name__) def _extractJsonFromResponse(content: str) -> Optional[dict]: """Extract JSON from AI response, handling markdown code blocks.""" # Try direct JSON parse first try: return json.loads(content.strip()) except json.JSONDecodeError: pass # Try to extract JSON from markdown code blocks json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', content, re.DOTALL) if json_match: try: return json.loads(json_match.group(1)) except json.JSONDecodeError: pass # Try to find JSON object in the text json_match = re.search(r'\{.*\}', content, re.DOTALL) if json_match: try: return json.loads(json_match.group(0)) except json.JSONDecodeError: pass return None async def chatProcess( currentUser: User, userInput: UserInputRequest, workflowId: Optional[str] = None ) -> ChatWorkflow: """ Simple chatbot processing - analyze user input and generate queries. Flow: 1. Create or load workflow 2. Store user message 3. AI analyzes user input to create list of needed queries 4. Stream queries back Args: currentUser: Current user userInput: User input request workflowId: Optional workflow ID to continue existing conversation Returns: ChatWorkflow instance """ try: # Get services services = getServices(currentUser, None) interfaceDbChat = services.interfaceDbChat # Get event manager and create queue if needed event_manager = get_event_manager() # Create or load workflow if workflowId: workflow = interfaceDbChat.getWorkflow(workflowId) if not workflow: raise ValueError(f"Workflow {workflowId} not found") # Resume workflow: increment round number new_round = workflow.currentRound + 1 interfaceDbChat.updateWorkflow(workflowId, { "status": "running", "currentRound": new_round, "lastActivity": getUtcTimestamp() }) workflow = interfaceDbChat.getWorkflow(workflowId) logger.info(f"Resumed workflow {workflowId}, round incremented to {new_round}") # Create event queue if it doesn't exist (for streaming) if not event_manager.has_queue(workflowId): event_manager.create_queue(workflowId) else: # Generate conversation name based on user's prompt conversation_name = await generate_conversation_name( services, userInput.prompt, userInput.userLanguage ) # Create new workflow workflowData = { "id": str(uuid.uuid4()), "mandateId": currentUser.mandateId, "status": "running", "name": conversation_name, "currentRound": 1, "currentTask": 0, "currentAction": 0, "totalTasks": 0, "totalActions": 0, "workflowMode": WorkflowModeEnum.WORKFLOW_CHATBOT.value, "startedAt": getUtcTimestamp(), "lastActivity": getUtcTimestamp() } workflow = interfaceDbChat.createWorkflow(workflowData) logger.info(f"Created new chatbot workflow: {workflow.id} with name: {conversation_name}") # Create event queue for new workflow (for streaming) event_manager.create_queue(workflow.id) # Reload workflow to get current message count workflow = interfaceDbChat.getWorkflow(workflow.id) # Store user message userMessageData = { "id": f"msg_{uuid.uuid4()}", "workflowId": workflow.id, "message": userInput.prompt, "role": "user", "status": "first" if workflowId is None else "step", "sequenceNr": len(workflow.messages) + 1, "publishedAt": getUtcTimestamp(), "roundNumber": workflow.currentRound, "taskNumber": 0, "actionNumber": 0 } userMessage = interfaceDbChat.createMessage(userMessageData) logger.info(f"Stored user message: {userMessage.id}") # Emit message event for streaming (exact chatData format) message_timestamp = parseTimestamp(userMessage.publishedAt, default=getUtcTimestamp()) await event_manager.emit_event( workflow.id, "chatdata", "New message", "message", { "type": "message", "createdAt": message_timestamp, "item": userMessage.dict() } ) # Update workflow status interfaceDbChat.updateWorkflow(workflow.id, { "status": "running", "lastActivity": getUtcTimestamp() }) # Process in background (async) asyncio.create_task(_processChatbotMessage( services, workflow.id, userInput, userMessage.id )) # Reload workflow to include new message workflow = interfaceDbChat.getWorkflow(workflow.id) return workflow except Exception as e: logger.error(f"Error in chatProcess: {str(e)}", exc_info=True) raise async def _execute_queries_parallel(queries: List[Dict[str, Any]]) -> Dict[str, Any]: """ Execute multiple SQL queries in parallel. Args: queries: List of query dictionaries, each containing: - "query": SQL query string - "purpose": Description of what the query retrieves - "table": Primary table name Returns: Dictionary mapping query indices to results: - "query_1", "query_2", etc.: Success result text - "query_1_data", "query_2_data", etc.: Raw data arrays - "query_1_error", "query_2_error", etc.: Error messages if query failed """ async def execute_single_query(idx: int, query_info: Dict[str, Any]): """Execute a single query and return result.""" connector = PreprocessorConnector() try: query_text = query_info.get("query", "") result = await connector.executeQuery(query_text, return_json=True) await connector.close() return idx, result, None except Exception as e: await connector.close() return idx, None, str(e) # Execute all queries in parallel tasks = [execute_single_query(i, q) for i, q in enumerate(queries)] results = await asyncio.gather(*tasks, return_exceptions=True) # Process results into dictionary query_results = {} for result in results: if isinstance(result, Exception): # Handle exceptions from gather logger.error(f"Exception in parallel query execution: {result}") continue idx, result_data, error = result if error: query_results[f"query_{idx+1}_error"] = error logger.error(f"Query {idx+1} failed: {error}") else: if result_data and not result_data.get("text", "").startswith(("Error:", "Query failed:")): query_results[f"query_{idx+1}"] = result_data.get("text", "") query_results[f"query_{idx+1}_data"] = result_data.get("data", []) row_count = len(result_data.get('data', [])) logger.info(f"Query {idx+1} executed successfully, returned {row_count} rows") else: error_text = result_data.get("text", "Query failed") if result_data else "Query failed: No response" query_results[f"query_{idx+1}_error"] = error_text logger.error(f"Query {idx+1} failed: {error_text}") return query_results async def _emit_log_and_event( interfaceDbChat, workflowId: str, event_manager, message: str, log_type: str = "info", status: str = "running", round_number: Optional[int] = None ) -> None: """ Store log in database. The route's periodic chat data fetch will handle emitting it. This avoids duplicate log emissions. Args: interfaceDbChat: Database interface workflowId: Workflow ID event_manager: Event manager (unused, kept for compatibility) message: Log message log_type: Log type (info, warning, error) status: Status string round_number: Optional round number (will be fetched from workflow if not provided) """ try: # Get round number from workflow if not provided if round_number is None: workflow = interfaceDbChat.getWorkflow(workflowId) if workflow: round_number = workflow.currentRound log_timestamp = getUtcTimestamp() log_data = { "id": f"log_{uuid.uuid4()}", "workflowId": workflowId, "message": message, "type": log_type, "timestamp": log_timestamp, "status": status, "roundNumber": round_number } # Only store in database - route's periodic fetch will emit it interfaceDbChat.createLog(log_data) except Exception as e: logger.error(f"Error storing log: {e}") async def _check_workflow_stopped(interfaceDbChat, workflowId: str) -> bool: """ Check if workflow was stopped. Args: interfaceDbChat: Database interface workflowId: Workflow ID Returns: True if workflow is stopped, False otherwise """ try: workflow = interfaceDbChat.getWorkflow(workflowId) return workflow and workflow.status == "stopped" except Exception as e: logger.warning(f"Error checking workflow status: {e}") return False async def _processChatbotMessage( services, workflowId: str, userInput: UserInputRequest, userMessageId: str ): """ Process chatbot message in background. Analyzes user input and generates list of queries, then streams them back. """ event_manager = get_event_manager() try: interfaceDbChat = services.interfaceDbChat # Reload workflow to get current messages workflow = interfaceDbChat.getWorkflow(workflowId) if not workflow: logger.error(f"Workflow {workflowId} not found during processing") await event_manager.emit_event(workflowId, "error", f"Workflow {workflowId} nicht gefunden", "error") return # Check if workflow was stopped before starting if await _check_workflow_stopped(interfaceDbChat, workflowId): logger.info(f"Workflow {workflowId} was stopped, aborting processing") return # Build conversation context from history context = "" if workflow.messages: recent_messages = workflow.messages[-5:] context = "\n\nPrevious conversation:\n" for msg in recent_messages: if msg.role == "user": context += f"User: {msg.message}\n" elif msg.role == "assistant": context += f"Assistant: {msg.message}\n" await services.ai.ensureAiObjectsInitialized() # Step 1: Analyze user input to generate queries logger.info("Analyzing user input to generate queries...") await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Analysiere Benutzeranfrage...") analysisPrompt = get_initial_analysis_prompt(userInput.prompt, context) # AI call for analysis method_ai = MethodAi(services) analysis_result = await method_ai.process({ "aiPrompt": analysisPrompt, "documentList": None, "resultType": "json", "simpleMode": True }) # Extract content from ActionResult analysis_content = None if analysis_result.success and analysis_result.documents: analysis_content = analysis_result.documents[0].documentData if isinstance(analysis_content, bytes): analysis_content = analysis_content.decode('utf-8') if not analysis_content: logger.warning("Analysis failed, using fallback") analysis = {} else: analysis = _extractJsonFromResponse(analysis_content) # Extract analysis results needsDatabaseQuery = analysis.get("needsDatabaseQuery", False) if analysis else False needsWebResearch = analysis.get("needsWebResearch", False) if analysis else False sql_queries = analysis.get("sqlQueries", []) # Support legacy single query format for backward compatibility if not sql_queries and analysis.get("sqlQuery"): sql_queries = [{ "query": analysis.get("sqlQuery", ""), "purpose": "Database query", "table": "Unknown" }] reasoning = analysis.get("reasoning", "") logger.info(f"Analysis: DB={needsDatabaseQuery}, Web={needsWebResearch}, SQL queries={len(sql_queries)}") # Build list of queries to stream back queries = [] if needsDatabaseQuery and sql_queries: for i, sql_query_info in enumerate(sql_queries, 1): queries.append({ "type": "database", "query": sql_query_info.get("query", ""), "purpose": sql_query_info.get("purpose", f"Query {i}"), "table": sql_query_info.get("table", "Unknown"), "reasoning": reasoning }) if needsWebResearch: queries.append({ "type": "web", "query": userInput.prompt, "reasoning": reasoning }) # Format queries as log text log_lines = [] if queries: db_queries = [q for q in queries if q["type"] == "database"] log_lines.append(f"Generiert: {len(db_queries)} Datenbankabfrage(n) und {len(queries) - len(db_queries)} Web-Recherche(n)\n\n") for i, q in enumerate(queries, 1): if q["type"] == "database": log_lines.append(f"{i}. Datenbankabfrage ({q.get('table', 'Unknown')}):\n") log_lines.append(f" Zweck: {q.get('purpose', 'Nicht angegeben')}\n") log_lines.append(f"```sql\n{q['query']}\n```\n") elif q["type"] == "web": log_lines.append(f"{i}. Web-Recherche:\n") log_lines.append(f" Suchbegriff: {q['query']}\n") if q.get("reasoning"): log_lines.append(f" Begründung: {q['reasoning']}\n") log_lines.append("\n") else: log_lines.append("Keine Abfragen erforderlich.") log_text = "".join(log_lines) # Stream queries as a log await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, log_text) # Check if workflow was stopped before executing queries if await _check_workflow_stopped(interfaceDbChat, workflowId): logger.info(f"Workflow {workflowId} was stopped, aborting query execution") return # Step 2: Execute queries queryResults = {} webResearchResults = "" # Execute database queries in parallel if needsDatabaseQuery and sql_queries: logger.info(f"Executing {len(sql_queries)} database queries in parallel...") await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Führe {len(sql_queries)} Datenbankabfrage(n) parallel aus...") try: queryResults = await _execute_queries_parallel(sql_queries) # Log results summary successful_queries = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")] failed_queries = [k for k in queryResults.keys() if k.endswith("_error")] if successful_queries: total_rows = sum(len(queryResults.get(f"{k}_data", [])) for k in successful_queries) logger.info(f"Successfully executed {len(successful_queries)} query/queries, total {total_rows} rows") await _emit_log_and_event( interfaceDbChat, workflowId, event_manager, f"Abgeschlossen: {len(successful_queries)} Abfrage(n) erfolgreich, {total_rows} Ergebnis{'e' if total_rows != 1 else ''} gefunden" ) if failed_queries: logger.warning(f"{len(failed_queries)} query/queries failed") await _emit_log_and_event( interfaceDbChat, workflowId, event_manager, f"Warnung: {len(failed_queries)} Abfrage(n) fehlgeschlagen", log_type="warning" ) except Exception as e: logger.error(f"Error executing parallel queries: {e}") queryResults["error"] = f"Error executing queries: {str(e)}" await _emit_log_and_event( interfaceDbChat, workflowId, event_manager, "Fehler bei parallelen Datenbankabfragen", log_type="error" ) # Execute web research if needsWebResearch: logger.info("Performing web research...") await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Suche im Internet nach Informationen...") try: researchResult = await services.web.performWebResearch( prompt=userInput.prompt, urls=[], country=None, language=userInput.userLanguage or "de", researchDepth="general", operationId=None ) webResearchResults = json.dumps(researchResult, ensure_ascii=False, indent=2) if isinstance(researchResult, dict) else str(researchResult) await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche abgeschlossen") except Exception as e: logger.error(f"Web research failed: {e}") webResearchResults = f"Web research error: {str(e)}" await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche fehlgeschlagen", log_type="warning") # Check if workflow was stopped before generating final answer if await _check_workflow_stopped(interfaceDbChat, workflowId): logger.info(f"Workflow {workflowId} was stopped, aborting final answer generation") return # Step 3: Generate final answer using AI logger.info("Generating final answer with AI...") await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Formuliere finale Antwort...") # Build prompt for final answer system_prompt = get_final_answer_system_prompt() # Build answer context with query results answerContext = f"User question: {userInput.prompt}{context}\n\n" # Add database results - organize by query with metadata db_results_part = "" if queryResults: successful_results = [] error_results = [] # Extract query metadata from sql_queries if available query_metadata = {} if sql_queries: for i, q_info in enumerate(sql_queries, 1): query_metadata[f"query_{i}"] = { "purpose": q_info.get("purpose", f"Query {i}"), "table": q_info.get("table", "Unknown") } # Organize results by query number query_numbers = set() for key in queryResults.keys(): if key.startswith("query_") and not key.endswith("_data"): # Extract query number (e.g., "query_1" -> 1) try: num = int(key.split("_")[1]) query_numbers.add(num) except (ValueError, IndexError): pass # Build results with metadata for query_num in sorted(query_numbers): query_key = f"query_{query_num}" error_key = f"{query_key}_error" if error_key in queryResults: error_msg = queryResults[error_key] metadata = query_metadata.get(query_key, {}) purpose = metadata.get("purpose", f"Query {query_num}") table = metadata.get("table", "Unknown") error_results.append(f"Abfrage {query_num} ({table} - {purpose}): {error_msg}") elif query_key in queryResults: result_text = queryResults[query_key] metadata = query_metadata.get(query_key, {}) purpose = metadata.get("purpose", f"Query {query_num}") table = metadata.get("table", "Unknown") successful_results.append(f"=== Abfrage {query_num}: {purpose} (Tabelle: {table}) ===\n{result_text}") # Handle general error if present if "error" in queryResults: error_results.append(f"Allgemeiner Fehler: {queryResults['error']}") if successful_results: db_results_part = "\n\nDATENBANK-ERGEBNISSE:\n" + "\n\n".join(successful_results) answerContext += "DATENBANK-ERGEBNISSE:\n" + "\n\n".join(successful_results) + "\n\n" if error_results: db_results_part += "\n\nDATENBANK-FEHLER:\n" + "\n".join(error_results) answerContext += "DATENBANK-FEHLER:\n" + "\n".join(error_results) + "\n\n" # Add web research results web_results_part = "" if webResearchResults: web_results_part = f"\n\nINTERNET-RECHERCHE:\n{webResearchResults}" answerContext += f"INTERNET-RECHERCHE:\n{webResearchResults}\n\n" # Check if we have any actual data successful_query_keys = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")] has_query_results = bool(successful_query_keys) error_query_keys = [k for k in queryResults.keys() if k.endswith("_error")] has_only_errors = bool(error_query_keys and not successful_query_keys) if not has_query_results and needsDatabaseQuery: db_results_part = "\n\nWICHTIG: Es wurden KEINE Datenbank-Ergebnisse gefunden. Die Datenbankabfrage wurde nicht ausgeführt oder hat keine Ergebnisse zurückgegeben." if has_only_errors: db_results_part += "\n\n⚠️⚠️⚠️ KRITISCH - ALLE QUERIES FEHLGESCHLAGEN ⚠️⚠️⚠️\n" + \ "ALLE Datenbankabfragen sind fehlgeschlagen. Es gibt KEINE gültigen Daten aus der Datenbank.\n" + \ "DU DARFST KEINE DATEN ERFINDEN! Schreibe stattdessen: 'Es wurden keine Artikel gefunden' oder 'Die Datenbankabfrage ist fehlgeschlagen'." answer_prompt = f"""{system_prompt} Antworte auf die folgende Frage des Nutzers: {userInput.prompt}{context} {db_results_part}{web_results_part} KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar". WICHTIG - MEHRERE ABFRAGEN: Die oben angegebenen DATENBANK-ERGEBNISSE können aus mehreren separaten Abfragen stammen. Jede Abfrage ist mit "=== Abfrage X ===" markiert und enthält Informationen zu einem spezifischen Aspekt (z.B. Artikel-Informationen, Lagerbestände, etc.). - Kombiniere die Informationen aus ALLEN erfolgreichen Abfragen zu einer umfassenden Antwort - Beispiel: Wenn Abfrage 1 Artikel-Informationen liefert und Abfrage 2 Lagerbestände liefert, kombiniere beide in deiner Antwort - Verwende ALLE verfügbaren Informationen aus den verschiedenen Abfragen ⚠️⚠️⚠️ ABSOLUT VERBOTEN - KEINE DATEN ERFINDEN ⚠️⚠️⚠️ Wenn KEINE Datenbank-Ergebnisse vorhanden sind, dann: - ❌ ERFINDE KEINE Artikelnummern, Artikelbezeichnungen, Preise oder Lagerbestände! - ❌ ERFINDE KEINE Beispielartikel! - ✓ Schreibe stattdessen: "Es wurden keine Artikel in der Datenbank gefunden." oder "Die Datenbankabfrage ist fehlgeschlagen." WICHTIG: Deine Antwort soll NUR die finale Antwort enthalten - KEINE Planungsschritte, KEINE SQL-Queries, KEINE Zwischenschritte! Beginne DIREKT mit "Aus der Datenbank habe ich..." (wenn Daten vorhanden) oder "Es wurden keine Artikel gefunden" (wenn keine Daten vorhanden).""" answerRequest = AiCallRequest( prompt=answer_prompt, context=answerContext if (queryResults or webResearchResults) else None, options=AiCallOptions( resultFormat="txt", operationType=OperationTypeEnum.DATA_ANALYSE, processingMode=ProcessingModeEnum.DETAILED ) ) answerResponse = await services.ai.callAi(answerRequest) finalAnswer = answerResponse.content logger.info("Final answer generated") # Check if workflow was stopped during AI call - if so, don't store the message if await _check_workflow_stopped(interfaceDbChat, workflowId): logger.info(f"Workflow {workflowId} was stopped during final answer generation, not storing message") return # Reload workflow to get current message count workflow = interfaceDbChat.getWorkflow(workflowId) # Double-check workflow wasn't stopped while we were reloading if workflow and workflow.status == "stopped": logger.info(f"Workflow {workflowId} was stopped, not storing final message") return # Create assistant message with final answer assistantMessageData = { "id": f"msg_{uuid.uuid4()}", "workflowId": workflowId, "parentMessageId": userMessageId, "message": finalAnswer, "role": "assistant", "status": "last", "sequenceNr": len(workflow.messages) + 1, "publishedAt": getUtcTimestamp(), "success": True, "roundNumber": workflow.currentRound, "taskNumber": 0, "actionNumber": 0 } assistantMessage = interfaceDbChat.createMessage(assistantMessageData) logger.info(f"Stored assistant message with final answer: {assistantMessage.id}") # Emit message event for streaming (exact chatData format) message_timestamp = parseTimestamp(assistantMessage.publishedAt, default=getUtcTimestamp()) await event_manager.emit_event( workflowId, "chatdata", "New message", "message", { "type": "message", "createdAt": message_timestamp, "item": assistantMessage.dict() } ) # Update workflow status to completed (only if not stopped) if not await _check_workflow_stopped(interfaceDbChat, workflowId): interfaceDbChat.updateWorkflow(workflowId, { "status": "completed", "lastActivity": getUtcTimestamp() }) else: logger.info(f"Workflow {workflowId} was stopped, not updating status to completed") logger.info(f"Chatbot processing completed for workflow {workflowId}, generated {len(queries)} queries and final answer") # Emit completion event only if workflow wasn't stopped if not await _check_workflow_stopped(interfaceDbChat, workflowId): await event_manager.emit_event( workflowId, "complete", "Chatbot-Verarbeitung abgeschlossen", "complete", {"workflowId": workflowId} ) # Schedule cleanup await event_manager.cleanup(workflowId) except Exception as e: logger.error(f"Error processing chatbot message: {str(e)}", exc_info=True) # Check if workflow was stopped - if so, don't store error message if await _check_workflow_stopped(interfaceDbChat, workflowId): logger.info(f"Workflow {workflowId} was stopped, not storing error message") return # Store error message try: # Reload workflow to get current message count workflow = interfaceDbChat.getWorkflow(workflowId) # Double-check workflow wasn't stopped while we were reloading if workflow and workflow.status == "stopped": logger.info(f"Workflow {workflowId} was stopped, not storing error message") return errorMessageData = { "id": f"msg_{uuid.uuid4()}", "workflowId": workflowId, "parentMessageId": userMessageId, "message": f"Sorry, I encountered an error: {str(e)}", "role": "assistant", "status": "last", "sequenceNr": len(workflow.messages) + 1, "publishedAt": getUtcTimestamp(), "success": False, "roundNumber": workflow.currentRound if workflow else 1, "taskNumber": 0, "actionNumber": 0 } errorMessage = interfaceDbChat.createMessage(errorMessageData) # Emit message event for streaming (exact chatData format) message_timestamp = parseTimestamp(errorMessage.publishedAt, default=getUtcTimestamp()) await event_manager.emit_event( workflowId, "chatdata", "New message", "message", { "type": "message", "createdAt": message_timestamp, "item": errorMessage.dict() } ) # Update workflow status to error (only if not stopped) if not await _check_workflow_stopped(interfaceDbChat, workflowId): interfaceDbChat.updateWorkflow(workflowId, { "status": "error", "lastActivity": getUtcTimestamp() }) else: logger.info(f"Workflow {workflowId} was stopped, not updating status to error") # Schedule cleanup await event_manager.cleanup(workflowId) except Exception as storeError: logger.error(f"Error storing error message: {storeError}")