gateway/modules/features/chatbot/mainChatbot.py
2026-01-30 11:24:24 +01:00

2722 lines
129 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Simple chatbot feature - basic implementation.
User input is processed by AI to create list of needed queries.
Those queries get streamed back.
This module also handles feature initialization and RBAC catalog registration.
"""
import logging
from typing import Dict, List, Any
# Feature metadata for RBAC catalog
FEATURE_CODE = "chatbot"
FEATURE_LABEL = {"en": "Chatbot", "de": "Chatbot", "fr": "Chatbot"}
FEATURE_ICON = "mdi-robot"
# UI Objects for RBAC catalog
UI_OBJECTS = [
{
"objectKey": "ui.feature.chatbot.conversations",
"label": {"en": "Conversations", "de": "Konversationen", "fr": "Conversations"},
"meta": {"area": "conversations"}
},
{
"objectKey": "ui.feature.chatbot.settings",
"label": {"en": "Settings", "de": "Einstellungen", "fr": "Paramètres"},
"meta": {"area": "settings"}
},
]
# Resource Objects for RBAC catalog
RESOURCE_OBJECTS = [
{
"objectKey": "resource.feature.chatbot.start",
"label": {"en": "Start Chatbot", "de": "Chatbot starten", "fr": "Démarrer chatbot"},
"meta": {"endpoint": "/api/chatbot/{instanceId}/start/stream", "method": "POST"}
},
{
"objectKey": "resource.feature.chatbot.stop",
"label": {"en": "Stop Chatbot", "de": "Chatbot stoppen", "fr": "Arrêter chatbot"},
"meta": {"endpoint": "/api/chatbot/{instanceId}/stop/{workflowId}", "method": "POST"}
},
]
# DATA Objects for RBAC catalog (tables/entities)
# Used for AccessRules on data-level permissions
DATA_OBJECTS = [
{
"objectKey": "data.feature.chatbot.ChatWorkflow",
"label": {"en": "Chat Workflow", "de": "Chat-Workflow", "fr": "Workflow de chat"},
"meta": {"table": "ChatWorkflow", "fields": ["id", "name", "status", "mandateId", "featureInstanceId"]}
},
{
"objectKey": "data.feature.chatbot.ChatMessage",
"label": {"en": "Chat Message", "de": "Chat-Nachricht", "fr": "Message de chat"},
"meta": {"table": "ChatMessage", "fields": ["id", "workflowId", "message", "role", "publishedAt"]}
},
{
"objectKey": "data.feature.chatbot.ChatLog",
"label": {"en": "Chat Log", "de": "Chat-Log", "fr": "Journal de chat"},
"meta": {"table": "ChatLog", "fields": ["id", "workflowId", "message", "type", "timestamp"]}
},
{
"objectKey": "data.feature.chatbot.ChatDocument",
"label": {"en": "Chat Document", "de": "Chat-Dokument", "fr": "Document de chat"},
"meta": {"table": "ChatDocument", "fields": ["id", "messageId", "fileId", "fileName", "fileSize", "mimeType"]}
},
{
"objectKey": "data.feature.chatbot.ChatStat",
"label": {"en": "Chat Statistics", "de": "Chat-Statistiken", "fr": "Statistiques de chat"},
"meta": {"table": "ChatStat", "fields": ["id", "workflowId", "processingTime", "bytesSent", "bytesReceived", "errorCount"]}
},
{
"objectKey": "data.feature.chatbot.*",
"label": {"en": "All Chatbot Data", "de": "Alle Chatbot-Daten", "fr": "Toutes les données chatbot"},
"meta": {"wildcard": True, "description": "Wildcard for all chatbot data tables"}
},
]
# Template roles for this feature
TEMPLATE_ROLES = [
{
"roleLabel": "chatbot-admin",
"description": {
"en": "Chatbot Administrator - Full access to chatbot settings and all conversations",
"de": "Chatbot-Administrator - Vollzugriff auf Chatbot-Einstellungen und alle Konversationen",
"fr": "Administrateur chatbot - Accès complet aux paramètres et conversations"
},
"accessRules": [
# Full UI access
{"context": "UI", "item": None, "view": True},
# Full DATA access
{"context": "DATA", "item": None, "view": True, "read": "a", "create": "a", "update": "a", "delete": "a"},
# Resource access
{"context": "RESOURCE", "item": "resource.feature.chatbot.start", "view": True},
]
},
{
"roleLabel": "chatbot-user",
"description": {
"en": "Chatbot User - Use chatbot and view own conversations",
"de": "Chatbot-Benutzer - Chatbot nutzen und eigene Konversationen einsehen",
"fr": "Utilisateur chatbot - Utiliser le chatbot et consulter ses conversations"
},
"accessRules": [
# UI access to conversations - vollqualifizierte ObjectKeys
{"context": "UI", "item": "ui.feature.chatbot.conversations", "view": True},
# Own DATA access (my level)
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "m", "update": "m", "delete": "m"},
# Resource access
{"context": "RESOURCE", "item": "resource.feature.chatbot.start", "view": True},
]
},
]
def getFeatureDefinition():
"""Return the feature definition for registration."""
return {
"code": FEATURE_CODE,
"label": FEATURE_LABEL,
"icon": FEATURE_ICON
}
def getUiObjects():
"""Return UI objects for RBAC catalog registration."""
return UI_OBJECTS
def getResourceObjects():
"""Return resource objects for RBAC catalog registration."""
return RESOURCE_OBJECTS
def getTemplateRoles():
"""Return template roles for this feature."""
return TEMPLATE_ROLES
def getDataObjects():
"""Return DATA objects for RBAC catalog registration."""
return DATA_OBJECTS
def registerFeature(catalogService) -> bool:
"""
Register this feature's RBAC objects in the catalog.
Args:
catalogService: The RBAC catalog service instance
Returns:
True if registration was successful
"""
try:
# Register UI objects
for uiObj in UI_OBJECTS:
catalogService.registerUiObject(
featureCode=FEATURE_CODE,
objectKey=uiObj["objectKey"],
label=uiObj["label"],
meta=uiObj.get("meta")
)
# Register Resource objects
for resObj in RESOURCE_OBJECTS:
catalogService.registerResourceObject(
featureCode=FEATURE_CODE,
objectKey=resObj["objectKey"],
label=resObj["label"],
meta=resObj.get("meta")
)
# Register DATA objects (tables/entities)
for dataObj in DATA_OBJECTS:
catalogService.registerDataObject(
featureCode=FEATURE_CODE,
objectKey=dataObj["objectKey"],
label=dataObj["label"],
meta=dataObj.get("meta")
)
# Sync template roles to database (with AccessRules)
_syncTemplateRolesToDb()
logger.info(f"Feature '{FEATURE_CODE}' registered {len(UI_OBJECTS)} UI, {len(RESOURCE_OBJECTS)} resource, {len(DATA_OBJECTS)} data objects")
return True
except Exception as e:
logger.error(f"Failed to register feature '{FEATURE_CODE}': {e}")
return False
def _syncTemplateRolesToDb() -> int:
"""
Sync template roles and their AccessRules to the database.
Creates global template roles (mandateId=None) if they don't exist.
Returns:
Number of roles created/updated
"""
try:
from modules.interfaces.interfaceDbApp import getRootInterface
from modules.datamodels.datamodelRbac import Role, AccessRule, AccessRuleContext
rootInterface = getRootInterface()
db = rootInterface.db
# Get existing template roles for this feature
existingRoles = db.getRecordset(
Role,
recordFilter={"featureCode": FEATURE_CODE, "mandateId": None}
)
existingRoleLabels = {r.get("roleLabel"): r.get("id") for r in existingRoles}
createdCount = 0
for roleTemplate in TEMPLATE_ROLES:
roleLabel = roleTemplate["roleLabel"]
if roleLabel in existingRoleLabels:
roleId = existingRoleLabels[roleLabel]
logger.debug(f"Template role '{roleLabel}' already exists with ID {roleId}")
# Ensure AccessRules exist for this role
_ensureAccessRulesForRole(db, roleId, roleTemplate.get("accessRules", []))
else:
# Create new template role
newRole = Role(
roleLabel=roleLabel,
description=roleTemplate.get("description", {}),
featureCode=FEATURE_CODE,
mandateId=None, # Global template
featureInstanceId=None,
isSystemRole=False
)
createdRole = db.recordCreate(Role, newRole.model_dump())
roleId = createdRole.get("id")
# Create AccessRules for this role
_ensureAccessRulesForRole(db, roleId, roleTemplate.get("accessRules", []))
logger.info(f"Created template role '{roleLabel}' with ID {roleId}")
createdCount += 1
if createdCount > 0:
logger.info(f"Feature '{FEATURE_CODE}': Created {createdCount} template roles")
# Repair instance-specific roles that are missing AccessRules
_repairInstanceRolesAccessRules(db, existingRoleLabels)
return createdCount
except Exception as e:
logger.error(f"Error syncing template roles for feature '{FEATURE_CODE}': {e}")
return 0
def _repairInstanceRolesAccessRules(db, templateRoleLabels: Dict[str, str]) -> int:
"""
Repair instance-specific roles by copying AccessRules from their template roles.
This ensures instance roles created before AccessRules were defined get updated.
Args:
db: Database connector
templateRoleLabels: Dict mapping roleLabel to template role ID
Returns:
Number of instance roles repaired
"""
from modules.datamodels.datamodelRbac import Role, AccessRule, AccessRuleContext
repairedCount = 0
# Get all instance-specific roles for this feature (mandateId is NOT None)
allRoles = db.getRecordset(Role, recordFilter={"featureCode": FEATURE_CODE})
instanceRoles = [r for r in allRoles if r.get("mandateId") is not None]
for instanceRole in instanceRoles:
roleLabel = instanceRole.get("roleLabel")
instanceRoleId = instanceRole.get("id")
# Find matching template role
templateRoleId = templateRoleLabels.get(roleLabel)
if not templateRoleId:
continue
# Check if instance role has AccessRules
existingRules = db.getRecordset(AccessRule, recordFilter={"roleId": instanceRoleId})
if existingRules:
continue # Already has rules, skip
# Copy AccessRules from template role
templateRules = db.getRecordset(AccessRule, recordFilter={"roleId": templateRoleId})
if not templateRules:
continue # Template has no rules
for rule in templateRules:
newRule = AccessRule(
roleId=instanceRoleId,
context=rule.get("context"),
item=rule.get("item"),
view=rule.get("view", False),
read=rule.get("read"),
create=rule.get("create"),
update=rule.get("update"),
delete=rule.get("delete"),
)
db.recordCreate(AccessRule, newRule.model_dump())
logger.info(f"Repaired instance role '{roleLabel}' (ID: {instanceRoleId}): copied {len(templateRules)} AccessRules from template")
repairedCount += 1
if repairedCount > 0:
logger.info(f"Feature '{FEATURE_CODE}': Repaired {repairedCount} instance roles with missing AccessRules")
return repairedCount
def _ensureAccessRulesForRole(db, roleId: str, ruleTemplates: List[Dict[str, Any]]) -> int:
"""
Ensure AccessRules exist for a role based on templates.
Args:
db: Database connector
roleId: Role ID
ruleTemplates: List of rule templates
Returns:
Number of rules created
"""
from modules.datamodels.datamodelRbac import AccessRule, AccessRuleContext
# Get existing rules for this role
existingRules = db.getRecordset(AccessRule, recordFilter={"roleId": roleId})
# Create a set of existing rule signatures to avoid duplicates
existingSignatures = set()
for rule in existingRules:
sig = (rule.get("context"), rule.get("item"))
existingSignatures.add(sig)
createdCount = 0
for template in ruleTemplates:
context = template.get("context", "UI")
item = template.get("item")
sig = (context, item)
if sig in existingSignatures:
continue
# Map context string to enum
if context == "UI":
contextEnum = AccessRuleContext.UI
elif context == "DATA":
contextEnum = AccessRuleContext.DATA
elif context == "RESOURCE":
contextEnum = AccessRuleContext.RESOURCE
else:
contextEnum = context
newRule = AccessRule(
roleId=roleId,
context=contextEnum,
item=item,
view=template.get("view", False),
read=template.get("read"),
create=template.get("create"),
update=template.get("update"),
delete=template.get("delete"),
)
db.recordCreate(AccessRule, newRule.model_dump())
createdCount += 1
if createdCount > 0:
logger.debug(f"Created {createdCount} AccessRules for role {roleId}")
return createdCount
import json
import uuid
import asyncio
import re
from typing import Optional, Dict, Any, List
from modules.features.chatbot.datamodelFeatureChatbot import ChatWorkflow, UserInputRequest, WorkflowModeEnum, ChatLog, ChatDocument
from modules.datamodels.datamodelUam import User
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, ProcessingModeEnum
from modules.datamodels.datamodelDocref import DocumentReferenceList, DocumentItemReference
from modules.shared.timeUtils import getUtcTimestamp, parseTimestamp
from modules.services import getInterface as getServices
from modules.features.chatbot import interfaceFeatureChatbot
from modules.features.chatbot.eventManager import get_event_manager
from modules.features.chatbot.chatbotUtils import (
generate_conversation_name,
)
from modules.features.chatbot.chatbotConfig import get_chatbot_config, ChatbotConfig
from modules.features.chatbot.langgraphChatbot import LangGraphChatbot
from langchain_core.messages import HumanMessage
import base64
logger = logging.getLogger(__name__)
def _extractJsonFromResponse(content: str) -> Optional[dict]:
"""Extract JSON from AI response, handling markdown code blocks."""
# Try direct JSON parse first
try:
return json.loads(content.strip())
except json.JSONDecodeError:
pass
# Try to extract JSON from markdown code blocks
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', content, re.DOTALL)
if json_match:
try:
return json.loads(json_match.group(1))
except json.JSONDecodeError:
pass
# Try to find JSON object in the text
json_match = re.search(r'\{.*\}', content, re.DOTALL)
if json_match:
try:
return json.loads(json_match.group(0))
except json.JSONDecodeError:
pass
return None
async def chatProcess(
currentUser: User,
mandateId: str,
userInput: UserInputRequest,
workflowId: Optional[str] = None,
featureInstanceId: Optional[str] = None
) -> ChatWorkflow:
"""
Simple chatbot processing - analyze user input and generate queries.
Flow:
1. Create or load workflow
2. Store user message
3. AI analyzes user input to create list of needed queries
4. Stream queries back
Args:
currentUser: Current user
mandateId: Mandate context (from RequestContext / X-Mandate-Id header)
userInput: User input request
workflowId: Optional workflow ID to continue existing conversation
featureInstanceId: Optional feature instance ID for instance-level isolation
Returns:
ChatWorkflow instance
"""
try:
# Load chatbot configuration for this instance
chatbot_config = get_chatbot_config(featureInstanceId)
logger.info(f"Loaded chatbot config for instance {featureInstanceId}: connector={chatbot_config.connector_type}, maxQueries={chatbot_config.max_queries}")
# Validate that required system prompt is configured
if not chatbot_config.custom_system_prompt:
error_msg = f"Chatbot instance {featureInstanceId} is missing required customSystemPrompt configuration"
logger.error(error_msg)
raise ValueError(error_msg)
# Get services normally (for other services like chat, ai, etc.)
services = getServices(currentUser, None, mandateId=mandateId)
# Replace interfaceDbChat with chatbot-specific interface that supports featureInstanceId
# This ensures instance-level data isolation
interfaceDbChat = interfaceFeatureChatbot.getInterface(
currentUser,
mandateId=mandateId,
featureInstanceId=featureInstanceId
)
# Update services to use the chatbot-specific interface
services.interfaceDbChat = interfaceDbChat
# Get event manager and create queue if needed
event_manager = get_event_manager()
# Create or load workflow
if workflowId:
workflow = interfaceDbChat.getWorkflow(workflowId)
if not workflow:
raise ValueError(f"Workflow {workflowId} not found")
# Verify workflow belongs to this instance if instanceId is provided
if featureInstanceId and workflow.featureInstanceId != featureInstanceId:
raise ValueError(f"Workflow {workflowId} does not belong to instance '{featureInstanceId}'")
# Resume workflow: increment round number
new_round = workflow.currentRound + 1
interfaceDbChat.updateWorkflow(workflowId, {
"status": "running",
"currentRound": new_round,
"lastActivity": getUtcTimestamp()
})
workflow = interfaceDbChat.getWorkflow(workflowId)
logger.info(f"Resumed workflow {workflowId}, round incremented to {new_round}")
# Create event queue if it doesn't exist (for streaming)
if not event_manager.has_queue(workflowId):
event_manager.create_queue(workflowId)
else:
# Generate conversation name based on user's prompt
conversation_name = await generate_conversation_name(
services,
userInput.prompt,
userInput.userLanguage
)
# Create new workflow
workflowData = {
"id": str(uuid.uuid4()),
"mandateId": mandateId,
"featureInstanceId": featureInstanceId,
"status": "running",
"name": conversation_name,
"currentRound": 1,
"currentTask": 0,
"currentAction": 0,
"totalTasks": 0,
"totalActions": 0,
"workflowMode": WorkflowModeEnum.WORKFLOW_CHATBOT.value,
"startedAt": getUtcTimestamp(),
"lastActivity": getUtcTimestamp()
}
workflow = interfaceDbChat.createWorkflow(workflowData)
logger.info(f"Created new chatbot workflow: {workflow.id} with name: {conversation_name}")
# Create event queue for new workflow (for streaming)
event_manager.create_queue(workflow.id)
# Reload workflow to get current message count
workflow_id = workflow.id
workflow = interfaceDbChat.getWorkflow(workflow_id)
if not workflow:
raise ValueError(f"Failed to reload workflow {workflow_id}")
# Process uploaded files and create ChatDocuments
user_documents = []
if userInput.listFileId and len(userInput.listFileId) > 0:
logger.info(f"Processing {len(userInput.listFileId)} uploaded file(s) for user message")
for fileId in userInput.listFileId:
try:
# Get file info from chat service
fileInfo = services.chat.getFileInfo(fileId)
if not fileInfo:
logger.warning(f"No file info found for file ID {fileId}")
continue
originalFileName = fileInfo.get("fileName", "unknown")
originalMimeType = fileInfo.get("mimeType", "application/octet-stream")
fileSizeToUse = fileInfo.get("size", 0)
# Create ChatDocument for the file
document = ChatDocument(
id=str(uuid.uuid4()),
messageId="", # Will be set when message is created
fileId=fileId,
fileName=originalFileName,
fileSize=fileSizeToUse,
mimeType=originalMimeType,
roundNumber=workflow.currentRound,
taskNumber=0,
actionNumber=0
)
user_documents.append(document)
logger.info(f"Created ChatDocument for file {fileId} -> {originalFileName}")
except Exception as e:
logger.error(f"Error processing file ID {fileId}: {e}", exc_info=True)
# Store user message
# Get message count safely (workflow.messages might be None or empty)
message_count = len(workflow.messages) if workflow.messages else 0
userMessageData = {
"id": f"msg_{uuid.uuid4()}",
"workflowId": workflow.id,
"message": userInput.prompt,
"role": "user",
"status": "first" if workflowId is None else "step",
"sequenceNr": message_count + 1,
"publishedAt": getUtcTimestamp(),
"roundNumber": workflow.currentRound,
"taskNumber": 0,
"actionNumber": 0
}
userMessage = interfaceDbChat.createMessage(userMessageData)
logger.info(f"Stored user message: {userMessage.id} with {len(user_documents)} document(s)")
# Emit message event for streaming (exact chatData format)
message_timestamp = parseTimestamp(userMessage.publishedAt, default=getUtcTimestamp())
await event_manager.emit_event(
context_id=workflow.id,
event_type="chatdata",
data={
"type": "message",
"createdAt": message_timestamp,
"item": userMessage.dict()
},
event_category="chat"
)
# Update workflow status
interfaceDbChat.updateWorkflow(workflow.id, {
"status": "running",
"lastActivity": getUtcTimestamp()
})
# Process in background (async)
asyncio.create_task(_processChatbotMessage(
services,
workflow.id,
userInput,
userMessage.id,
chatbot_config
))
# Reload workflow to include new message
workflow = interfaceDbChat.getWorkflow(workflow.id)
return workflow
except Exception as e:
logger.error(f"Error in chatProcess: {str(e)}", exc_info=True)
raise
async def _execute_queries_parallel(queries: List[Dict[str, Any]], chatbot_config: ChatbotConfig) -> Dict[str, Any]:
"""
Execute multiple SQL queries in parallel with shared connector.
Args:
queries: List of query dictionaries, each containing:
- "query": SQL query string
- "purpose": Description of what the query retrieves
- "table": Primary table name
chatbot_config: ChatbotConfig instance for connector selection
Returns:
Dictionary mapping query indices to results:
- "query_1", "query_2", etc.: Success result text
- "query_1_data", "query_2_data", etc.: Raw data arrays
- "query_1_error", "query_2_error", etc.: Error messages if query failed
"""
# Create connector instance based on configuration
connector = chatbot_config.get_connector_instance()
try:
async def execute_single_query(idx: int, query_info: Dict[str, Any]):
"""Execute a single query using shared connector."""
try:
query_text = query_info.get("query", "")
result = await connector.executeQuery(query_text, return_json=True)
return idx, result, None
except Exception as e:
return idx, None, str(e)
# Execute all queries in parallel with shared connector
tasks = [execute_single_query(i, q) for i, q in enumerate(queries)]
results = await asyncio.gather(*tasks, return_exceptions=True)
finally:
# Close connector once after all queries complete
await connector.close()
# Process results into dictionary
query_results = {}
for result in results:
if isinstance(result, Exception):
# Handle exceptions from gather
logger.error(f"Exception in parallel query execution: {result}")
continue
idx, result_data, error = result
if error:
query_results[f"query_{idx+1}_error"] = error
logger.error(f"Query {idx+1} failed: {error}")
else:
if result_data and not result_data.get("text", "").startswith(("Error:", "Query failed:")):
query_results[f"query_{idx+1}"] = result_data.get("text", "")
query_results[f"query_{idx+1}_data"] = result_data.get("data", [])
row_count = len(result_data.get('data', []))
logger.info(f"Query {idx+1} executed successfully, returned {row_count} rows")
else:
error_text = result_data.get("text", "Query failed") if result_data else "Query failed: No response"
query_results[f"query_{idx+1}_error"] = error_text
logger.error(f"Query {idx+1} failed: {error_text}")
return query_results
async def _emit_log_and_event(
interfaceDbChat,
workflowId: str,
event_manager,
message: str,
log_type: str = "info",
status: str = "running",
round_number: Optional[int] = None
) -> None:
"""
Store log in database and emit event for streaming.
Args:
interfaceDbChat: Database interface
workflowId: Workflow ID
event_manager: Event manager for streaming
message: Log message
log_type: Log type (info, warning, error)
status: Status string
round_number: Optional round number (will be fetched from workflow if not provided)
"""
try:
# Get round number from workflow if not provided
if round_number is None:
workflow = interfaceDbChat.getWorkflow(workflowId)
if workflow:
round_number = workflow.currentRound
log_timestamp = getUtcTimestamp()
log_data = {
"id": f"log_{uuid.uuid4()}",
"workflowId": workflowId,
"message": message,
"type": log_type,
"timestamp": log_timestamp,
"status": status,
"roundNumber": round_number
}
# Store log in database
created_log = interfaceDbChat.createLog(log_data)
# Emit event directly for streaming (using correct signature)
if created_log and event_manager:
try:
# Convert to dict if it's a Pydantic model
if hasattr(created_log, "model_dump"):
log_dict = created_log.model_dump()
elif hasattr(created_log, "dict"):
log_dict = created_log.dict()
else:
log_dict = log_data
await event_manager.emit_event(
context_id=workflowId,
event_type="chatdata",
data={
"type": "log",
"createdAt": log_timestamp,
"item": log_dict
},
event_category="chat",
message="New log",
step="log"
)
except Exception as emit_error:
logger.warning(f"Error emitting log event: {emit_error}")
except Exception as e:
logger.error(f"Error storing log: {e}", exc_info=True)
async def _check_workflow_stopped(interfaceDbChat, workflowId: str) -> bool:
"""
Check if workflow was stopped.
Args:
interfaceDbChat: Database interface
workflowId: Workflow ID
Returns:
True if workflow is stopped, False otherwise
"""
try:
workflow = interfaceDbChat.getWorkflow(workflowId)
return workflow and workflow.status == "stopped"
except Exception as e:
logger.warning(f"Error checking workflow status: {e}")
return False
def _build_final_answer_prompt_with_results(
system_prompt: str,
user_prompt: str,
context: str,
db_results_part: str,
web_results_part: str,
is_resumed: bool = False,
has_db_results: bool = False,
has_web_results: bool = False
) -> str:
"""
Build the complete prompt for generating the final answer with database and web results.
Uses the provided system_prompt from configuration instead of hardcoded prompts.
Args:
system_prompt: System prompt from chatbot configuration
user_prompt: User's original prompt
context: Conversation context
db_results_part: Formatted database results section
web_results_part: Formatted web research results section
is_resumed: If True, exclude system prompt (already in context from previous messages)
has_db_results: Whether database results are available
has_web_results: Whether web research results are available
Returns:
Complete formatted prompt string
"""
if is_resumed:
# System prompt already in context, don't repeat it
# Emphasize that the current question is primary
if context:
context_section = f"""
⚠️⚠️⚠️ KONTEXT (NUR FÜR REFERENZ - IGNORIEREN WENN NICHT BENÖTIGT) ⚠️⚠️⚠️
{context}
⚠️⚠️⚠️ ENDE KONTEXT ⚠️⚠️⚠️
"""
else:
context_section = ""
# Build instructions based on what data sources are available
if has_web_results and not has_db_results:
# Only web research - emphasize web research
instructions = f"""⚠️⚠️⚠️ WICHTIG - NUR INTERNET-RECHERCHE VERFÜGBAR ⚠️⚠️⚠️
- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss
- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf
- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext
{db_results_part}{web_results_part}
KRITISCH: Verwende NUR die oben angegebenen Daten aus der INTERNET-RECHERCHE. Erfinde KEINE Werte.
⚠️⚠️⚠️ WICHTIG - INTERNET-RECHERCHE VERWENDEN ⚠️⚠️⚠️
- ✓ OBLIGATORISCH: Verwende die Informationen aus der INTERNET-RECHERCHE oben
- ✓ OBLIGATORISCH: Beginne mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..."
- ✓ OBLIGATORISCH: Gib Quellen an: [Info] ([Quelle: Name](URL))
- ✓ OBLIGATORISCH: Präsentiere die Informationen ausführlich und strukturiert
- ❌ ABSOLUT VERBOTEN: Erwähne Datenbank-Ergebnisse, wenn keine vorhanden sind
- ❌ ABSOLUT VERBOTEN: Daten erfinden
WICHTIG:
- Beginne DIREKT mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..."
- Klare, strukturierte Antwort mit Quellenangaben
- Präsentiere die gefundenen Informationen ausführlich"""
elif has_db_results and not has_web_results:
# Only database - use existing database-focused instructions
instructions = f"""⚠️⚠️⚠️ WICHTIG ⚠️⚠️⚠️
- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss
- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf
- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext
{db_results_part}{web_results_part}
KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar".
⚠️⚠️⚠️ ABSOLUT KRITISCH - ALLE ARTIKEL ZURÜCKGEBEN ⚠️⚠️⚠️
- ✓ OBLIGATORISCH: Du MUSST ALLE Artikel zurückgeben, die die Kriterien erfüllen
- ✓ OBLIGATORISCH: Kombiniere Ergebnisse aus ALLEN erfolgreichen Abfragen
- ✓ OBLIGATORISCH: Zähle ALLE Artikel in den DATENBANK-ERGEBNISSEN oben
- ✓ OBLIGATORISCH: Zeige ALLE gefundenen Artikel in deiner Antwort (bis zu 20 in der Tabelle)
- ❌ ABSOLUT VERBOTEN: Nur einen Artikel zurückgeben, wenn mehrere gefunden wurden
- ❌ ABSOLUT VERBOTEN: Nur den ersten Artikel zeigen
- ❌ ABSOLUT VERBOTEN: Artikel auslassen, die in den DATENBANK-ERGEBNISSEN stehen
WICHTIG:
- Beginne DIREKT mit "Aus der Datenbank habe ich..." (keine Planungsschritte!)
- Klare, strukturierte Antwort
- Markdown-Tabellen (max 20 Zeilen)
- Artikelnummern als Link: [ARTIKELNUMMER](/details/ARTIKELNUMMER)"""
elif not has_db_results and not has_web_results:
# No results from either source - but database query was executed
instructions = f"""⚠️⚠️⚠️ KRITISCH - DATENBANKABFRAGE WURDE AUSGEFÜHRT ⚠️⚠️⚠️
- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss
- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf
- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext
{db_results_part}{web_results_part}
⚠️⚠️⚠️ ABSOLUT KRITISCH - DATENBANKABFRAGE WURDE AUSGEFÜHRT ⚠️⚠️⚠️
Die Datenbankabfrage wurde AUSGEFÜHRT, hat aber keine Ergebnisse zurückgegeben.
DU HAST ZUGRIFF AUF DIE DATENBANK - die Abfrage wurde durchgeführt!
VERBOTEN - NIEMALS SAGEN:
- "Ich habe keinen Zugriff auf die Datenbank"
- "Ich kann nicht auf die Datenbank zugreifen"
- "Es tut mir leid, aber ich habe keinen Zugriff"
- "Ich habe keinen Zugriff auf Echtzeit-Datenbanken"
- Jede andere Formulierung, die suggeriert, dass du keinen Zugriff hast!
RICHTIG - SAGE STATTDESSEN:
- "Es wurden keine Artikel gefunden"
- "Keine passenden Artikel in der Datenbank gefunden"
- "Die Datenbanksuche ergab keine Treffer"
- "Ich habe in der Datenbank gesucht, aber keine passenden Artikel gefunden"
WICHTIG: Die Datenbank wurde durchsucht - es wurden nur keine passenden Artikel gefunden!
Beginne deine Antwort mit: "Ich habe in der Datenbank gesucht, aber..." oder "Es wurden keine Artikel gefunden..." oder ähnlich."""
else:
# Both database and web research
instructions = f"""⚠️⚠️⚠️ WICHTIG ⚠️⚠️⚠️
- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss
- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf
- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext
{db_results_part}{web_results_part}
KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar".
⚠️⚠️⚠️ WICHTIG - BEIDE QUELLEN VERWENDEN ⚠️⚠️⚠️
- ✓ OBLIGATORISCH: Verwende sowohl DATENBANK-ERGEBNISSE als auch INTERNET-RECHERCHE
- ✓ OBLIGATORISCH: Beginne mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse
- ✓ OBLIGATORISCH: Verwende "Aus meiner Web-Recherche..." für Internet-Informationen
- ✓ OBLIGATORISCH: Gib Quellen für Web-Informationen an: [Info] ([Quelle: Name](URL))
- ✓ OBLIGATORISCH: Zeige ALLE Artikel aus den DATENBANK-ERGEBNISSEN (bis zu 20 in Tabelle)
WICHTIG:
- Beginne DIREKT mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse
- Dann "Aus meiner Web-Recherche..." für Internet-Informationen
- Klare, strukturierte Antwort mit Quellenangaben"""
return f"""⚠️⚠️⚠️ AKTUELLE FRAGE (PRIMÄR - DIESE MUSS BEANTWORTET WERDEN) ⚠️⚠️⚠️
Antworte auf die folgende Frage des Nutzers: {user_prompt}
{context_section}{instructions}"""
else:
# New chat: include system prompt
# Build instructions based on what data sources are available
if has_web_results and not has_db_results:
# Only web research - emphasize web research
return f"""{system_prompt}
Antworte auf die folgende Frage des Nutzers: {user_prompt}{context}
{db_results_part}{web_results_part}
KRITISCH: Verwende NUR die oben angegebenen Daten aus der INTERNET-RECHERCHE. Erfinde KEINE Werte.
⚠️⚠️⚠️ WICHTIG - INTERNET-RECHERCHE VERWENDEN ⚠️⚠️⚠️
- ✓ OBLIGATORISCH: Verwende die Informationen aus der INTERNET-RECHERCHE oben
- ✓ OBLIGATORISCH: Beginne mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..."
- ✓ OBLIGATORISCH: Gib Quellen an: [Info] ([Quelle: Name](URL))
- ✓ OBLIGATORISCH: Präsentiere die Informationen ausführlich und strukturiert
- ❌ ABSOLUT VERBOTEN: Erwähne Datenbank-Ergebnisse, wenn keine vorhanden sind
- ❌ ABSOLUT VERBOTEN: Daten erfinden
WICHTIG:
- Beginne DIREKT mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..."
- Klare, strukturierte Antwort mit Quellenangaben
- Präsentiere die gefundenen Informationen ausführlich"""
elif has_db_results and not has_web_results:
# Only database - use existing database-focused instructions
return f"""{system_prompt}
Antworte auf die folgende Frage des Nutzers: {user_prompt}{context}
{db_results_part}{web_results_part}
KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar".
⚠️⚠️⚠️ ABSOLUT KRITISCH - ALLE ARTIKEL ZURÜCKGEBEN ⚠️⚠️⚠️
- ✓ OBLIGATORISCH: Du MUSST ALLE Artikel zurückgeben, die die Kriterien erfüllen
- ✓ OBLIGATORISCH: Kombiniere Ergebnisse aus ALLEN erfolgreichen Abfragen
- ✓ OBLIGATORISCH: Zähle ALLE Artikel in den DATENBANK-ERGEBNISSEN oben
- ✓ OBLIGATORISCH: Zeige ALLE gefundenen Artikel in deiner Antwort (bis zu 20 in der Tabelle)
- ❌ ABSOLUT VERBOTEN: Nur einen Artikel zurückgeben, wenn mehrere gefunden wurden
- ❌ ABSOLUT VERBOTEN: Nur den ersten Artikel zeigen
- ❌ ABSOLUT VERBOTEN: Artikel auslassen, die in den DATENBANK-ERGEBNISSEN stehen
WICHTIG:
- Beginne DIREKT mit "Aus der Datenbank habe ich..." (keine Planungsschritte!)
- Klare, strukturierte Antwort
- Markdown-Tabellen (max 20 Zeilen)
- Artikelnummern als Link: [ARTIKELNUMMER](/details/ARTIKELNUMMER)"""
elif not has_db_results and not has_web_results:
# No results from either source - but database query was executed
return f"""{system_prompt}
Antworte auf die folgende Frage des Nutzers: {user_prompt}{context}
{db_results_part}{web_results_part}
⚠️⚠️⚠️ KRITISCH - DATENBANKABFRAGE WURDE AUSGEFÜHRT ⚠️⚠️⚠️
Die Datenbankabfrage wurde AUSGEFÜHRT, hat aber keine Ergebnisse zurückgegeben.
DU HAST ZUGRIFF AUF DIE DATENBANK - die Abfrage wurde durchgeführt!
VERBOTEN - NIEMALS SAGEN:
- "Ich habe keinen Zugriff auf die Datenbank"
- "Ich kann nicht auf die Datenbank zugreifen"
- "Es tut mir leid, aber ich habe keinen Zugriff"
- "Ich habe keinen Zugriff auf Echtzeit-Datenbanken"
- Jede andere Formulierung, die suggeriert, dass du keinen Zugriff hast!
RICHTIG - SAGE STATTDESSEN:
- "Es wurden keine Artikel gefunden"
- "Keine passenden Artikel in der Datenbank gefunden"
- "Die Datenbanksuche ergab keine Treffer"
- "Ich habe in der Datenbank gesucht, aber keine passenden Artikel gefunden"
WICHTIG: Die Datenbank wurde durchsucht - es wurden nur keine passenden Artikel gefunden!
Beginne deine Antwort mit: "Ich habe in der Datenbank gesucht, aber..." oder "Es wurden keine Artikel gefunden..." oder ähnlich."""
else:
# Both database and web research
return f"""{system_prompt}
Antworte auf die folgende Frage des Nutzers: {user_prompt}{context}
{db_results_part}{web_results_part}
KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar".
⚠️⚠️⚠️ WICHTIG - BEIDE QUELLEN VERWENDEN ⚠️⚠️⚠️
- ✓ OBLIGATORISCH: Verwende sowohl DATENBANK-ERGEBNISSE als auch INTERNET-RECHERCHE
- ✓ OBLIGATORISCH: Beginne mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse
- ✓ OBLIGATORISCH: Verwende "Aus meiner Web-Recherche..." für Internet-Informationen
- ✓ OBLIGATORISCH: Gib Quellen für Web-Informationen an: [Info] ([Quelle: Name](URL))
- ✓ OBLIGATORISCH: Zeige ALLE Artikel aus den DATENBANK-ERGEBNISSEN (bis zu 20 in Tabelle)
WICHTIG:
- Beginne DIREKT mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse
- Dann "Aus meiner Web-Recherche..." für Internet-Informationen
- Klare, strukturierte Antwort mit Quellenangaben"""
def _buildWebResearchQuery(userPrompt: str, workflowMessages: List, queryResults: Optional[Dict[str, Any]] = None) -> str:
"""
Build enriched web research query by extracting product context from conversation history and current prompt.
Extracts product information from:
1. Current user prompt (article numbers, product mentions)
2. Database query results (if available)
3. Previous assistant messages (conversation history)
Args:
userPrompt: Current user prompt
workflowMessages: List of workflow messages (conversation history)
queryResults: Optional database query results to extract product info from
Returns:
Enriched search query string
"""
# Normalize user prompt for detection
prompt_lower = userPrompt.lower().strip()
# Patterns that indicate a search request
search_patterns = [
"ja", "yes", "oui", "si",
"such", "suche", "search", "recherche", "recherchier",
"internet", "web", "online",
"datenblatt", "datasheet", "fiche technique",
"mehr informationen", "more information", "plus d'information",
"weitere informationen", "further information", "additional information"
]
# Certification patterns that require web research
certification_patterns = [
"ul", "ce", "tüv", "vde", "iec", "en", "iso",
"zertifiziert", "certified", "certification", "zertifizierung",
"geprüft", "approved", "compliance"
]
# Check if current prompt contains search-related keywords
has_search_intent = any(pattern in prompt_lower for pattern in search_patterns)
# Check if prompt contains certification-related keywords
has_certification_intent = any(pattern in prompt_lower for pattern in certification_patterns)
# Extract product information - try multiple sources
article_number = None
article_description = None
supplier = None
# Pattern for article numbers like "6AV2 181-8XP00-0AX0" or "6AV2181-8XP00-0AX0"
article_patterns = [
r'\b[A-Z0-9]{2,}\s+[0-9]{3,}-[A-Z0-9-]+\b', # With space: "6AV2 181-8XP00-0AX0"
r'\b[A-Z0-9]{4,}[\s-][A-Z0-9-]{6,}\b', # General pattern
r'\b[A-Z]{2,}[0-9]+\s+[0-9]+-[A-Z0-9-]+\b', # Specific Siemens pattern
]
# 1. First, try to extract from current user prompt
for pattern in article_patterns:
matches = re.findall(pattern, userPrompt)
if matches:
article_number = matches[0]
logger.info(f"Extracted article number from user prompt: {article_number}")
break
# 2. Try to extract from database query results if available
# Always check queryResults to enrich with product description and supplier, even if article_number was already found
if queryResults:
# Look for article numbers in query result text (if not already found)
if not article_number:
for key in queryResults.keys():
if key.startswith("query_") and not key.endswith("_error") and not key.endswith("_data"):
result_text = queryResults.get(key, "")
if isinstance(result_text, str):
for pattern in article_patterns:
matches = re.findall(pattern, result_text)
if matches:
article_number = matches[0]
logger.info(f"Extracted article number from query results: {article_number}")
break
if article_number:
break
# Always check data arrays for product description and supplier (even if article_number already found)
for key in queryResults.keys():
if key.startswith("query_") and not key.endswith("_error") and not key.endswith("_data"):
data_key = f"{key}_data"
if data_key in queryResults:
data_array = queryResults[data_key]
if isinstance(data_array, list) and len(data_array) > 0:
# Look for article number in first row (if not already found)
first_row = data_array[0]
if isinstance(first_row, dict):
# Check common article number fields (if not already found)
if not article_number:
for field in ["Artikelnummer", "Artikelkürzel", "article_number", "articleNumber"]:
if field in first_row and first_row[field]:
article_number = str(first_row[field])
logger.info(f"Extracted article number from query data: {article_number}")
break
# Always check article description (can enrich even if article_number already found)
if not article_description:
for field in ["Artikelbezeichnung", "Bezeichnung", "article_description", "description"]:
if field in first_row and first_row[field]:
article_description = str(first_row[field])
logger.info(f"Extracted article description from query data: {article_description}")
break
# Always check supplier (can enrich even if article_number already found)
if not supplier:
for field in ["Lieferant", "Supplier", "supplier"]:
if field in first_row and first_row[field]:
supplier = str(first_row[field])
logger.info(f"Extracted supplier from query data: {supplier}")
break
# If we found all needed info, we can stop
if article_number and article_description and supplier:
break
# Check if current prompt is an explicit search request that should NOT use context
# If user explicitly asks to search for something, prioritize that over previous messages
explicit_search_patterns = [
r"recherchier\s+(?:im\s+internet\s+)?nach\s+(.+)",
r"suche\s+(?:im\s+internet\s+)?nach\s+(.+)",
r"search\s+(?:the\s+internet\s+)?for\s+(.+)",
r"find\s+(?:information\s+)?(?:about\s+)?(.+)",
r"recherche\s+(?:sur\s+internet\s+)?(.+)"
]
explicit_search_term = None
for pattern in explicit_search_patterns:
match = re.search(pattern, userPrompt, re.IGNORECASE)
if match:
explicit_search_term = match.group(1).strip()
logger.info(f"Found explicit search term in prompt: '{explicit_search_term}'")
break
# 3. Extract from previous assistant messages (conversation history)
# ONLY if there's no explicit search term (to avoid using old context for new searches)
if not explicit_search_term and (not article_number or not article_description):
for msg in reversed(workflowMessages[-10:]):
if msg.role == "assistant":
message_text = msg.message
# Extract article number if not found yet
if not article_number:
for pattern in article_patterns:
matches = re.findall(pattern, message_text)
if matches:
article_number = matches[0]
break
# Extract article description if not found yet
if not article_description:
description_patterns = [
r'Es handelt sich um\s+([^\.]+)',
r'It is a\s+([^\.]+)',
r'C\'est\s+([^\.]+)',
r'Bezeichnung:\s*([^\n]+)',
r'Description:\s*([^\n]+)',
r'Artikelbezeichnung:\s*([^\n]+)',
r'Artikelbezeichnung:\s*([^\n]+)'
]
for pattern in description_patterns:
match = re.search(pattern, message_text, re.IGNORECASE)
if match:
article_description = match.group(1).strip()
break
# Extract supplier if not found yet
if not supplier:
supplier_patterns = [
r'von\s+([A-Z][A-Za-z\s]+(?:AG|GmbH|Ltd|Inc|Corp)?)',
r'from\s+([A-Z][A-Za-z\s]+(?:AG|GmbH|Ltd|Inc|Corp)?)',
r'Lieferant:\s*([^\n]+)',
r'Supplier:\s*([^\n]+)'
]
for pattern in supplier_patterns:
match = re.search(pattern, message_text, re.IGNORECASE)
if match:
supplier = match.group(1).strip()
break
# Stop if we found everything
if article_number and article_description and supplier:
break
# Build enriched search query
query_parts = []
# If we have an explicit search term, use it as the primary query
if explicit_search_term:
query_parts.append(explicit_search_term)
logger.info(f"Using explicit search term as primary query: '{explicit_search_term}'")
# If we have search intent but no product info, try to use the user prompt intelligently
elif has_search_intent and not article_number and not article_description:
# Try to extract meaningful parts from the prompt
# Remove common search phrases and keep the product-related parts
cleaned_prompt = userPrompt
for phrase in ["recherchier", "recherche", "suche nach", "search for", "find", "informationen zu", "information about", "weitere informationen", "further information", "im internet", "the internet", "sur internet"]:
cleaned_prompt = re.sub(phrase, "", cleaned_prompt, flags=re.IGNORECASE)
cleaned_prompt = cleaned_prompt.strip()
# Use cleaned prompt if it has meaningful content
if cleaned_prompt and len(cleaned_prompt) > 2:
query_parts.append(cleaned_prompt)
# Add article description if found (but NOT if we have an explicit search term)
if article_description and not explicit_search_term:
query_parts.append(article_description)
# Add article number if found (but NOT if we have an explicit search term)
if article_number and not explicit_search_term:
query_parts.append(article_number)
# Add supplier if found (but NOT if we have an explicit search term)
if supplier and not explicit_search_term:
query_parts.append(supplier)
# Extract certification information from prompt if present
certification_terms = []
if has_certification_intent:
# Extract specific certification mentions
cert_keywords = {
"ul": "UL certification",
"ce": "CE certification",
"tüv": "TÜV certification",
"vde": "VDE certification",
"iec": "IEC certification",
"iso": "ISO certification"
}
for cert_key, cert_term in cert_keywords.items():
if cert_key in prompt_lower:
certification_terms.append(cert_term)
# If no specific certification found but certification intent detected, add generic term
if not certification_terms:
certification_terms.append("certification")
# Add certification terms to query if found
if certification_terms:
query_parts.extend(certification_terms)
# Add "Datenblatt" or "datasheet" if user requested it or if we have product info
# But NOT if we have an explicit search term (user wants to search for something specific)
if not explicit_search_term:
if "datenblatt" in prompt_lower or "datasheet" in prompt_lower or "fiche technique" in prompt_lower:
query_parts.append("Datenblatt")
elif query_parts and (article_number or article_description):
# If we have product info but no explicit request for datasheet, add it anyway
query_parts.append("Datenblatt")
# If we found product information or built a meaningful query, use it
if query_parts:
enriched_query = " ".join(query_parts)
logger.info(f"Built enriched search query: '{enriched_query}' from context (original: '{userPrompt}')")
return enriched_query
else:
# Fall back to original prompt, but try to clean it up
logger.info(f"No product context found, using original prompt: '{userPrompt}'")
return userPrompt
async def _convert_file_ids_to_document_references(
services,
file_ids: List[str]
) -> DocumentReferenceList:
"""
Convert file IDs to DocumentReferenceList for use with ai.process.
Args:
services: Services instance
file_ids: List of file IDs to convert
Returns:
DocumentReferenceList with docItem references
"""
references = []
# Get workflow to search for ChatDocuments
workflow = services.workflow
if not workflow:
logger.error("Cannot convert file IDs to document references: workflow not set in services")
return DocumentReferenceList(references=[])
for file_id in file_ids:
try:
# Get file info to verify it exists
file_info = services.chat.getFileInfo(file_id)
if not file_info:
logger.warning(f"File {file_id} not found, skipping")
continue
# Find ChatDocument that has this fileId
document_id = None
if workflow.messages:
for message in workflow.messages:
if hasattr(message, 'documents') and message.documents:
for doc in message.documents:
if getattr(doc, 'fileId', None) == file_id:
document_id = getattr(doc, 'id', None)
break
if document_id:
break
# Search database if not found in messages
if not document_id:
try:
from modules.interfaces.interfaceRbac import getRecordsetWithRBAC
documents = getRecordsetWithRBAC(
services.interfaceDbChat.db,
ChatDocument,
services.user,
recordFilter={"fileId": file_id},
mandateId=services.mandateId
)
if documents:
workflow_message_ids = {msg.id for msg in workflow.messages} if workflow.messages else set()
for doc in documents:
if doc.get("messageId") in workflow_message_ids:
document_id = doc.get("id")
break
except Exception:
pass # Fallback to fileId
# Use ChatDocument ID if found, otherwise use fileId as fallback
ref = DocumentItemReference(documentId=document_id if document_id else file_id)
references.append(ref)
except Exception as e:
logger.error(f"Error converting fileId {file_id}: {e}", exc_info=True)
logger.info(f"Converted {len(references)} file IDs to document references")
return DocumentReferenceList(references=references)
def _format_query_results_as_lookup(query_data: Dict[str, List[Dict]]) -> str:
"""
Format database query results as JSON lookup table for Excel matching.
Converts query result data into structured JSON format: {Artikelnummer: {columns...}}
Args:
query_data: Dict with query_key -> list of row dicts (from connector with return_json=True)
Returns:
JSON string formatted as lookup table
"""
lookup_table = {}
for query_key, rows in query_data.items():
if query_key == "error" or not rows:
logger.warning(f"Skipping query key '{query_key}' - no rows or error")
continue
logger.info(f"Processing {len(rows)} rows from query '{query_key}'")
for row in rows:
if not isinstance(row, dict):
logger.warning(f"Skipping non-dict row: {type(row)}")
continue
# Find Artikelnummer field (case-insensitive)
artikelnummer = None
for key in row.keys():
if key.lower() in ['artikelnummer', 'artikel_nummer', 'art_nr', 'part_number']:
artikelnummer = str(row[key])
break
if artikelnummer:
lookup_table[artikelnummer] = row
else:
logger.warning(f"No Artikelnummer found in row with keys: {list(row.keys())}")
logger.info(f"Generated lookup table with {len(lookup_table)} entries")
if lookup_table:
sample_keys = list(lookup_table.keys())[:3]
logger.info(f"Sample Artikelnummern: {sample_keys}")
if sample_keys:
sample_entry = lookup_table[sample_keys[0]]
logger.info(f"Sample entry keys: {list(sample_entry.keys())}")
return json.dumps(lookup_table, ensure_ascii=False, indent=2)
async def _create_chat_document_from_action_document(
services,
action_document,
message_id: str,
workflow_id: str,
round_number: int
) -> ChatDocument:
"""
Create a ChatDocument from an ActionDocument by storing the file data.
Args:
services: Services instance
action_document: ActionDocument from ai.process result
message_id: ID of the message to attach to
workflow_id: Workflow ID
round_number: Round number
Returns:
ChatDocument instance
"""
try:
# Get file data (could be bytes or string)
document_data = action_document.documentData
# Convert to bytes if needed
if isinstance(document_data, str):
# Check if it's base64 encoded
try:
# Try to decode as base64 first
file_bytes = base64.b64decode(document_data)
except Exception:
# Not base64, encode as UTF-8
file_bytes = document_data.encode('utf-8')
elif isinstance(document_data, bytes):
file_bytes = document_data
else:
# Try to convert to bytes
try:
file_bytes = bytes(document_data)
except Exception:
# Last resort: convert to string then encode
file_bytes = str(document_data).encode('utf-8')
# Get MIME type (default to Excel)
mime_type = action_document.mimeType or "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
# Get file name
file_name = action_document.documentName or "data_export.xlsx"
# Ensure it has .xlsx extension
if not file_name.lower().endswith('.xlsx'):
# Remove any existing extension and add .xlsx
file_name = file_name.rsplit('.', 1)[0] + '.xlsx'
# Store file using component interface
file_item = services.interfaceDbComponent.createFile(
name=file_name,
mimeType=mime_type,
content=file_bytes
)
# Store file data
success = services.interfaceDbComponent.createFileData(file_item.id, file_bytes)
if not success:
logger.warning(f"Failed to store file data for {file_item.id}, but continuing...")
# Create ChatDocument
chat_document = ChatDocument(
id=str(uuid.uuid4()),
messageId=message_id,
fileId=file_item.id,
fileName=file_name,
fileSize=len(file_bytes),
mimeType=mime_type,
roundNumber=round_number,
taskNumber=0,
actionNumber=0
)
logger.info(f"Created ChatDocument {chat_document.id} from ActionDocument {file_name} (size: {len(file_bytes)} bytes)")
return chat_document
except Exception as e:
logger.error(f"Error creating ChatDocument from ActionDocument: {e}", exc_info=True)
raise
async def _processChatbotMessage(
services,
workflowId: str,
userInput: UserInputRequest,
userMessageId: str,
chatbot_config: ChatbotConfig
):
"""
Process chatbot message using LangGraph workflow.
Uses LangGraph to handle the conversation flow with tools (SQL, Tavily, streaming).
"""
event_manager = get_event_manager()
try:
interfaceDbChat = services.interfaceDbChat
# Reload workflow to get current messages
workflow = interfaceDbChat.getWorkflow(workflowId)
if not workflow:
logger.error(f"Workflow {workflowId} not found during processing")
await event_manager.emit_event(
context_id=workflowId,
event_type="error",
data={"error": f"Workflow {workflowId} nicht gefunden"},
event_category="workflow",
message=f"Workflow {workflowId} nicht gefunden",
step="error"
)
return
# Check if workflow was stopped before starting
if await _check_workflow_stopped(interfaceDbChat, workflowId):
logger.info(f"Workflow {workflowId} was stopped, aborting processing")
return
await services.ai.ensureAiObjectsInitialized()
# Get connector instance
connector = chatbot_config.get_connector_instance()
# Get system prompt
system_prompt = chatbot_config.custom_system_prompt
if not system_prompt:
raise ValueError(f"System prompt not configured for chatbot instance")
# Create LangGraph chatbot instance
logger.info(f"Creating LangGraph chatbot for workflow {workflowId}")
chatbot = await LangGraphChatbot.create(
services=services,
system_prompt=system_prompt,
connector_instance=connector,
enable_web_research=chatbot_config.enable_web_research,
context_window_size=8000
)
# Process message using LangGraph streaming
logger.info(f"Processing message with LangGraph for workflow {workflowId}")
final_answer = None
chat_history = []
async for event in chatbot.stream_events(message=userInput.prompt, chat_id=workflowId):
# Check if workflow was stopped
if await _check_workflow_stopped(interfaceDbChat, workflowId):
logger.info(f"Workflow {workflowId} was stopped during processing")
return
event_type = event.get("type")
if event_type == "status":
# Emit status update
label = event.get("label", "")
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, label, log_type="info")
elif event_type == "final":
# Final response received
response_data = event.get("response", {})
chat_history = response_data.get("chat_history", [])
# Extract final answer from chat history (last assistant message)
for msg in reversed(chat_history):
if msg.get("role") == "assistant":
final_answer = msg.get("content", "")
break
elif event_type == "error":
# Error occurred
error_msg = event.get("message", "Unknown error")
logger.error(f"LangGraph error: {error_msg}")
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: {error_msg}", log_type="error")
final_answer = f"Entschuldigung, ein Fehler ist aufgetreten: {error_msg}"
# Close connector
try:
await connector.close()
except Exception as e:
logger.warning(f"Error closing connector: {e}")
# Check if workflow was stopped before storing answer
if await _check_workflow_stopped(interfaceDbChat, workflowId):
logger.info(f"Workflow {workflowId} was stopped, not storing final message")
return
# Store final answer if we have one
if final_answer:
workflow = interfaceDbChat.getWorkflow(workflowId)
message_id = f"msg_{uuid.uuid4()}"
assistantMessageData = {
"id": message_id,
"workflowId": workflowId,
"parentMessageId": userMessageId,
"message": final_answer,
"role": "assistant",
"status": "last",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": getUtcTimestamp(),
"success": True,
"roundNumber": workflow.currentRound,
"taskNumber": 0,
"actionNumber": 0
}
assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
logger.info(f"Stored assistant message: {assistantMessage.id}")
# Emit message event for streaming
message_timestamp = parseTimestamp(assistantMessage.publishedAt, default=getUtcTimestamp())
await event_manager.emit_event(
context_id=workflowId,
event_type="chatdata",
data={
"type": "message",
"createdAt": message_timestamp,
"item": assistantMessage.dict()
},
event_category="chat"
)
# Update workflow status to completed
if not await _check_workflow_stopped(interfaceDbChat, workflowId):
interfaceDbChat.updateWorkflow(workflowId, {
"status": "completed",
"lastActivity": getUtcTimestamp()
})
await event_manager.emit_event(
context_id=workflowId,
event_type="complete",
data={"workflowId": workflowId},
event_category="workflow",
message="Chatbot-Verarbeitung abgeschlossen",
step="complete"
)
# Schedule cleanup
await event_manager.cleanup(workflowId, delay=300.0)
logger.info(f"LangGraph processing completed for workflow {workflowId}")
except Exception as e:
logger.info("Analyzing user input to generate queries...")
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Analysiere Benutzeranfrage...")
# Use custom prompt from configuration (already validated at start of chatProcess)
analysisPrompt = chatbot_config.custom_analysis_prompt.replace("{userPrompt}", userInput.prompt).replace("{context}", context or "")
# CRITICAL: Add explicit JSON format requirement to ensure AI returns JSON
json_format_instruction = """
⚠️⚠️⚠️ ABSOLUT KRITISCH - JSON-FORMAT ERFORDERLICH ⚠️⚠️⚠️
DU MUSST DEINE ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT GEBEN!
ANTWORTE NICHT MIT NORMALEM TEXT ODER EINER CHAT-ANTWORT!
DEINE ANTWORT MUSS EIN GÜLTIGES JSON-OBJEKT SEIN!
Erforderliches JSON-Format:
{
"needsDatabaseQuery": true/false,
"needsWebResearch": true/false,
"sqlQueries": [
{
"query": "SQL-Abfrage hier",
"purpose": "Zweck der Abfrage",
"table": "Haupttabelle"
}
],
"reasoning": "Begründung für die Abfragen"
}
⚠️⚠️⚠️ KRITISCH - WANN DATENBANKABFRAGE ERFORDERLICH ⚠️⚠️⚠️
SETZE "needsDatabaseQuery": true, WENN:
- Der Nutzer nach Artikeln, Produkten, Preisen, Lagerbeständen, Lieferanten fragt
- Der Nutzer nach Informationen aus der Datenbank fragt (auch allgemeine Fragen!)
- Der Nutzer eine Frage stellt, die mit Datenbank-Daten beantwortet werden kann
- Du dir nicht sicher bist - dann setze "needsDatabaseQuery": true und führe eine allgemeine Abfrage durch!
VERBOTEN:
- "needsDatabaseQuery": false setzen, nur weil die Frage allgemein klingt
- "needsDatabaseQuery": false setzen, ohne zu prüfen, ob Datenbank-Daten helfen könnten
- Chat-Antworten geben statt Datenbankabfragen durchzuführen
WICHTIG:
- Antworte NUR mit dem JSON-Objekt, KEIN zusätzlicher Text davor oder danach!
- KEINE Erklärungen, KEINE Begrüßungen, KEINE Chat-Antworten!
- NUR das JSON-Objekt!
- Bei Unsicherheit: IMMER "needsDatabaseQuery": true setzen!
"""
analysisPrompt = analysisPrompt + json_format_instruction
logger.info("Using custom analysis prompt from instance config with JSON format requirement")
# AI call for analysis
method_ai = MethodAi(services)
analysis_result = await method_ai.process({
"aiPrompt": analysisPrompt,
"documentList": None,
"resultType": "json",
"simpleMode": True
})
# Check if workflow was stopped during analysis
if await _check_workflow_stopped(interfaceDbChat, workflowId):
logger.info(f"Workflow {workflowId} was stopped during analysis, aborting processing")
return
# Retry logic for failed analysis (max 3 attempts)
max_analysis_retries = 3
analysis_retry_count = 0
analysis = None
analysis_content = None
while analysis_retry_count < max_analysis_retries:
# Extract content from ActionResult
analysis_content = None
if analysis_result.success and analysis_result.documents:
analysis_content = analysis_result.documents[0].documentData
if isinstance(analysis_content, bytes):
analysis_content = analysis_content.decode('utf-8')
# Validate analysis was successful
if not analysis_content:
analysis_retry_count += 1
if analysis_retry_count < max_analysis_retries:
logger.warning(f"Analysis failed (attempt {analysis_retry_count}/{max_analysis_retries}): No content returned from AI, retrying...")
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Analyse fehlgeschlagen, Versuch {analysis_retry_count}/{max_analysis_retries}...", log_type="warning")
# Retry analysis
analysis_result = await method_ai.process({
"aiPrompt": analysisPrompt,
"documentList": None,
"resultType": "json",
"simpleMode": True
})
continue
else:
error_msg = "Die Analyse Ihrer Anfrage ist nach mehreren Versuchen fehlgeschlagen. Bitte versuchen Sie es später erneut oder formulieren Sie Ihre Frage anders."
logger.error(f"Analysis failed after {max_analysis_retries} attempts: No content returned from AI")
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: Analyse nach {max_analysis_retries} Versuchen fehlgeschlagen", log_type="error")
# Store error message as assistant response
workflow = interfaceDbChat.getWorkflow(workflowId)
message_id = f"msg_{uuid.uuid4()}"
assistantMessageData = {
"id": message_id,
"workflowId": workflowId,
"parentMessageId": userMessageId,
"message": error_msg,
"role": "assistant",
"status": "last",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": getUtcTimestamp(),
"success": False,
"roundNumber": workflow.currentRound,
"taskNumber": 0,
"actionNumber": 0
}
assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
logger.info(f"Stored error message due to failed analysis after {max_analysis_retries} attempts: {assistantMessage.id}")
return
analysis = _extractJsonFromResponse(analysis_content)
if analysis is None:
analysis_retry_count += 1
if analysis_retry_count < max_analysis_retries:
logger.warning(f"Failed to extract JSON from analysis response (attempt {analysis_retry_count}/{max_analysis_retries}), retrying...")
logger.debug(f"Analysis content: {analysis_content[:500] if analysis_content else 'None'}")
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"JSON-Extraktion fehlgeschlagen, Versuch {analysis_retry_count}/{max_analysis_retries}...", log_type="warning")
# Retry analysis
analysis_result = await method_ai.process({
"aiPrompt": analysisPrompt,
"documentList": None,
"resultType": "json",
"simpleMode": True
})
continue
else:
error_msg = "Die Analyse Ihrer Anfrage konnte nach mehreren Versuchen nicht verarbeitet werden. Bitte versuchen Sie es später erneut oder formulieren Sie Ihre Frage anders."
logger.error(f"Failed to extract JSON from analysis response after {max_analysis_retries} attempts. Content: {analysis_content[:500] if analysis_content else 'None'}")
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: JSON-Extraktion nach {max_analysis_retries} Versuchen fehlgeschlagen", log_type="error")
# Store error message as assistant response
workflow = interfaceDbChat.getWorkflow(workflowId)
message_id = f"msg_{uuid.uuid4()}"
assistantMessageData = {
"id": message_id,
"workflowId": workflowId,
"parentMessageId": userMessageId,
"message": error_msg,
"role": "assistant",
"status": "last",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": getUtcTimestamp(),
"success": False,
"roundNumber": workflow.currentRound,
"taskNumber": 0,
"actionNumber": 0
}
assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
logger.info(f"Stored error message due to failed JSON extraction after {max_analysis_retries} attempts: {assistantMessage.id}")
return
# Successfully extracted analysis, break retry loop
break
# Extract analysis results
needsDatabaseQuery = analysis.get("needsDatabaseQuery", False) if analysis else False
needsWebResearch = analysis.get("needsWebResearch", False) if analysis else False
sql_queries = analysis.get("sqlQueries", []) if analysis else []
# Support legacy single query format for backward compatibility
if not sql_queries and analysis and analysis.get("sqlQuery"):
sql_queries = [{
"query": analysis.get("sqlQuery", ""),
"purpose": "Database query",
"table": "Unknown"
}]
reasoning = analysis.get("reasoning", "") if analysis else ""
# CRITICAL: If connectors are configured, ALWAYS use database if user asks about products/articles/inventory
# Override AI decision if it says "no database query" but connectors are available
if chatbot_config.connector_types and len(chatbot_config.connector_types) > 0:
user_prompt_lower = userInput.prompt.lower()
# Keywords that indicate database query is needed
db_keywords = [
"artikel", "produkt", "ware", "lager", "bestand", "preis", "lieferant",
"led", "lampe", "motor", "kabel", "schraube", "sensor", "netzteil",
"wie viele", "zeig mir", "suche", "finde", "gibt es", "haben wir",
"article", "product", "inventory", "stock", "price", "supplier",
"how many", "show me", "search", "find", "do we have"
]
has_db_intent = any(keyword in user_prompt_lower for keyword in db_keywords)
# If user asks about database-related topics but AI said no query needed, force it
if has_db_intent and not needsDatabaseQuery:
logger.warning(f"User asked about database-related topic but AI returned needsDatabaseQuery=false. Forcing needsDatabaseQuery=true because connectors are configured.")
needsDatabaseQuery = True
# Generate a default query if none were provided
if not sql_queries:
# Extract main search term from user prompt
search_terms = []
for keyword in db_keywords:
if keyword in user_prompt_lower:
# Try to extract the actual product/article name
words = user_prompt_lower.split()
keyword_idx = words.index(keyword) if keyword in words else -1
if keyword_idx >= 0 and keyword_idx < len(words) - 1:
# Take next word as potential product name
next_word = words[keyword_idx + 1]
if len(next_word) > 2: # Ignore short words like "die", "der", etc.
search_terms.append(next_word)
# Create a general search query
if search_terms:
search_term = search_terms[0]
else:
# Use the whole prompt as search term (limited)
search_term = userInput.prompt[:50] # Limit length
sql_queries = [{
"query": f'SELECT a."Artikelnummer", a."Artikelbezeichnung", a."Lieferant", a."Artikelkürzel" FROM Artikel a WHERE a."Artikelbezeichnung" LIKE \'%{search_term}%\' OR a."Artikelnummer" LIKE \'%{search_term}%\' OR a."Artikelkürzel" LIKE \'%{search_term}%\' LIMIT 20',
"purpose": f"Suche nach Artikeln die '{search_term}' enthalten",
"table": "Artikel"
}]
logger.info(f"Generated default database query for search term: {search_term}")
# Check if we need web research for certifications (only if enabled in config)
if chatbot_config.enable_web_research:
user_prompt_lower = userInput.prompt.lower()
certification_keywords = ["ul", "ce", "tüv", "vde", "iec", "iso", "zertifiziert", "certified", "certification"]
has_certification = any(keyword in user_prompt_lower for keyword in certification_keywords)
if has_certification and not needsWebResearch:
logger.warning("Certification detected but needsWebResearch is false - forcing to true")
needsWebResearch = True
else:
# Web research disabled in config
if needsWebResearch:
logger.info("Web research disabled in instance config, skipping")
needsWebResearch = False
# Limit query count based on configuration
max_queries_allowed = chatbot_config.max_queries
if needsDatabaseQuery and len(sql_queries) > max_queries_allowed:
logger.info(f"Limiting queries from {len(sql_queries)} to {max_queries_allowed} for performance")
sql_queries = sql_queries[:max_queries_allowed]
logger.info(f"Analysis: DB={needsDatabaseQuery}, Web={needsWebResearch}, SQL queries={len(sql_queries)}")
# Build initial enriched web research query if needed (for logging, will be rebuilt after DB queries)
# Only if web research is enabled in config
enriched_web_query = None
if needsWebResearch and chatbot_config.enable_web_research:
enriched_web_query = _buildWebResearchQuery(userInput.prompt, workflow.messages)
# Build list of queries to stream back
queries = []
if needsDatabaseQuery and sql_queries:
for i, sql_query_info in enumerate(sql_queries, 1):
queries.append({
"type": "database",
"query": sql_query_info.get("query", ""),
"purpose": sql_query_info.get("purpose", f"Query {i}"),
"table": sql_query_info.get("table", "Unknown"),
"reasoning": reasoning
})
if needsWebResearch and chatbot_config.enable_web_research:
queries.append({
"type": "web",
"query": enriched_web_query or userInput.prompt,
"reasoning": reasoning
})
# Format queries as log text
log_lines = []
if queries:
db_queries = [q for q in queries if q["type"] == "database"]
log_lines.append(f"Generiert: {len(db_queries)} Datenbankabfrage(n) und {len(queries) - len(db_queries)} Web-Recherche(n)\n\n")
for i, q in enumerate(queries, 1):
if q["type"] == "database":
log_lines.append(f"{i}. Datenbankabfrage ({q.get('table', 'Unknown')}):\n")
log_lines.append(f" Zweck: {q.get('purpose', 'Nicht angegeben')}\n")
log_lines.append(f"```sql\n{q['query']}\n```\n")
elif q["type"] == "web":
log_lines.append(f"{i}. Web-Recherche:\n")
log_lines.append(f" Suchbegriff: {q['query']}\n")
if q.get("reasoning"):
log_lines.append(f" Begründung: {q['reasoning']}\n")
log_lines.append("\n")
else:
log_lines.append("Keine Abfragen erforderlich.")
log_text = "".join(log_lines)
# Stream queries as a log
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, log_text)
# Check if workflow was stopped before executing queries
if await _check_workflow_stopped(interfaceDbChat, workflowId):
logger.info(f"Workflow {workflowId} was stopped, aborting query execution")
return
# Step 2: Execute queries
queryResults = {}
webResearchResults = ""
# Start web research early in parallel with DB queries if needed (only if enabled)
web_research_task = None
if needsWebResearch and chatbot_config.enable_web_research:
# Start with basic query (will enrich later with DB results if available)
basic_web_query = _buildWebResearchQuery(userInput.prompt, workflow.messages, None)
logger.info(f"Starting web research in parallel with DB queries using basic query: '{basic_web_query}'")
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Suche im Internet nach Informationen...")
async def perform_web_research():
"""Perform web research and return results."""
try:
researchResult = await services.web.performWebResearch(
prompt=basic_web_query,
urls=[],
country=None,
language=userInput.userLanguage or "de",
researchDepth="general",
operationId=None
)
return json.dumps(researchResult, ensure_ascii=False, indent=2) if isinstance(researchResult, dict) else str(researchResult)
except Exception as e:
logger.error(f"Web research failed: {e}", exc_info=True)
return f"Web research error: {str(e)}"
web_research_task = asyncio.create_task(perform_web_research())
# Check if connector is working before executing queries
if needsDatabaseQuery and sql_queries:
logger.info(f"Checking database connector before executing {len(sql_queries)} queries...")
try:
# Test connector with a simple query
test_connector = chatbot_config.get_connector_instance()
try:
# Try a simple test query to verify connector works
test_result = await test_connector.executeQuery("SELECT 1", return_json=True)
await test_connector.close()
if not test_result or test_result.get("text", "").startswith(("Error:", "Query failed:")):
raise Exception("Connector test query failed")
logger.info("Database connector test successful")
except Exception as connector_error:
await test_connector.close()
error_msg = f"Die Datenbankverbindung funktioniert derzeit nicht. Bitte versuchen Sie es später erneut. Fehler: {str(connector_error)}"
logger.error(f"Database connector test failed: {connector_error}", exc_info=True)
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: Datenbankverbindung fehlgeschlagen", log_type="error")
# Store error message as assistant response
workflow = interfaceDbChat.getWorkflow(workflowId)
message_id = f"msg_{uuid.uuid4()}"
assistantMessageData = {
"id": message_id,
"workflowId": workflowId,
"parentMessageId": userMessageId,
"message": error_msg,
"role": "assistant",
"status": "last",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": getUtcTimestamp(),
"success": False,
"roundNumber": workflow.currentRound,
"taskNumber": 0,
"actionNumber": 0
}
assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
logger.info(f"Stored error message due to connector failure: {assistantMessage.id}")
return
except Exception as e:
error_msg = f"Die Datenbankverbindung konnte nicht hergestellt werden. Bitte versuchen Sie es später erneut. Fehler: {str(e)}"
logger.error(f"Failed to initialize database connector: {e}", exc_info=True)
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: Datenbankverbindung konnte nicht hergestellt werden", log_type="error")
# Store error message as assistant response
workflow = interfaceDbChat.getWorkflow(workflowId)
message_id = f"msg_{uuid.uuid4()}"
assistantMessageData = {
"id": message_id,
"workflowId": workflowId,
"parentMessageId": userMessageId,
"message": error_msg,
"role": "assistant",
"status": "last",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": getUtcTimestamp(),
"success": False,
"roundNumber": workflow.currentRound,
"taskNumber": 0,
"actionNumber": 0
}
assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
logger.info(f"Stored error message due to connector initialization failure: {assistantMessage.id}")
return
# Execute database queries in parallel
if needsDatabaseQuery and sql_queries:
logger.info(f"Executing {len(sql_queries)} database queries in parallel...")
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Führe {len(sql_queries)} Datenbankabfrage(n) parallel aus...")
try:
queryResults = await _execute_queries_parallel(sql_queries, chatbot_config)
# Log results summary
successful_queries = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
failed_queries = [k for k in queryResults.keys() if k.endswith("_error")]
if successful_queries:
total_rows = sum(len(queryResults.get(f"{k}_data", [])) for k in successful_queries)
logger.info(f"Successfully executed {len(successful_queries)} query/queries, total {total_rows} rows")
await _emit_log_and_event(
interfaceDbChat,
workflowId,
event_manager,
f"Abgeschlossen: {len(successful_queries)} Abfrage(n) erfolgreich, {total_rows} Ergebnis{'e' if total_rows != 1 else ''} gefunden"
)
if failed_queries:
logger.warning(f"{len(failed_queries)} query/queries failed")
await _emit_log_and_event(
interfaceDbChat,
workflowId,
event_manager,
f"Warnung: {len(failed_queries)} Abfrage(n) fehlgeschlagen",
log_type="warning"
)
# Check if we got empty results and need to retry with alternative strategies
# Robust calculation: check all successful queries for empty data
total_rows = 0
queries_with_results = 0
queries_with_empty_results = 0
if successful_queries:
for query_key in successful_queries:
data_key = f"{query_key}_data"
if data_key in queryResults:
row_count = len(queryResults[data_key])
total_rows += row_count
if row_count > 0:
queries_with_results += 1
else:
queries_with_empty_results += 1
else:
# Query succeeded but no data key - treat as empty
queries_with_empty_results += 1
logger.debug(f"Query {query_key} succeeded but has no _data key")
else:
# No successful queries at all
logger.debug("No successful queries found")
# Also check if we have any query results at all
has_any_results = total_rows > 0
# Debug logging
logger.info(f"Query results analysis: total_rows={total_rows}, successful_queries={len(successful_queries)}, "
f"queries_with_results={queries_with_results}, queries_with_empty_results={queries_with_empty_results}, "
f"failed_queries={len(failed_queries)}")
# Trigger retry if: no results AND we have database queries AND we executed at least one query
# Also trigger if all successful queries returned empty results
# Only retry if enabled in config
should_retry = (
chatbot_config.enable_retry_on_empty and
not has_any_results and
needsDatabaseQuery and
len(sql_queries) > 0 and
(len(successful_queries) > 0 or len(failed_queries) == 0) # Either we have successful queries or no failures (queries executed but empty)
)
# Iterative retry loop: try up to configured max attempts with different strategies
max_empty_retry_attempts = chatbot_config.max_retry_attempts if chatbot_config.enable_retry_on_empty else 0
empty_retry_attempt = 0
original_sql_queries_count = len(sql_queries)
previous_retry_rows = 0
while should_retry and empty_retry_attempt < max_empty_retry_attempts:
empty_retry_attempt += 1
logger.info(f"No results found (attempt {empty_retry_attempt}/{max_empty_retry_attempts}), retrying with alternative query strategies...")
await _emit_log_and_event(
interfaceDbChat,
workflowId,
event_manager,
f"Keine Ergebnisse gefunden ({len(successful_queries)} erfolgreiche Abfrage(n), {total_rows} Zeilen). Versuch {empty_retry_attempt}/{max_empty_retry_attempts}: Versuche alternative Abfrage-Strategien...",
log_type="info"
)
# Retry analysis with empty results context - create NEW analysis with alternative strategies
# Build retry prompt with progressively different strategies
empty_count = len(sql_queries)
empty_results_instructions = get_empty_results_retry_instructions(empty_count)
retry_context = f"{context}\n\n"
if empty_retry_attempt == 1:
retry_context += "⚠️⚠️⚠️ WICHTIG - ALTERNATIVE STRATEGIEN ERFORDERLICH ⚠️⚠️⚠️\n"
retry_context += "Strategie: Breitere Suche, weniger Filter\n"
elif empty_retry_attempt == 2:
retry_context += "⚠️⚠️⚠️ KRITISCH - IMMER NOCH KEINE ERGEBNISSE ⚠️⚠️⚠️\n"
retry_context += "Strategie: Entferne spezifische Filter komplett, verwende nur Hauptkriterien\n"
else:
retry_context += "⚠️⚠️⚠️ LETZTER VERSUCH - MINIMALE FILTER ⚠️⚠️⚠️\n"
retry_context += "Strategie: Nur Hauptbegriffe, keine spezifischen Filter\n"
retry_context += f"Die bisherigen {len(sql_queries)} Abfragen haben 0 Zeilen zurückgegeben.\n"
retry_context += f"{empty_results_instructions}\n"
retry_context += f"Dies ist bereits Versuch {empty_retry_attempt} von {max_empty_retry_attempts}!\n"
retry_context += "Erstelle JETZT MAXIMAL 5 alternative SQL-Queries mit komplett anderen Strategien (für Performance):\n"
if empty_retry_attempt == 1:
retry_context += "- Breitere Suche ohne zu spezifische Filter\n"
retry_context += "- Suche ohne Zertifizierungsfilter (falls Zertifizierung nicht in DB)\n"
retry_context += "- Suche nur nach Hauptkriterien (z.B. nur Netzgerät + 10A, ohne einphasig)\n"
retry_context += "- Suche nach alternativen Begriffen (Netzteil statt Netzgerät, etc.)\n"
retry_context += "- COUNT-Queries für Statistik\n"
retry_context += "- Fallback-Queries mit minimalen Filtern\n"
elif empty_retry_attempt == 2:
retry_context += "- ENTFERNE alle Zertifizierungsfilter komplett\n"
retry_context += "- ENTFERNE Phasen-Filter (einphasig/dreiphasig)\n"
retry_context += "- Suche NUR nach: Netzgerät/Netzteil + Ampere-Angaben\n"
retry_context += "- Verwende breitere Ampere-Patterns (5A, 6A, 8A, 10A, 12A, 15A, 20A, etc.)\n"
retry_context += "- Suche auch in Keywords-Feld\n"
else:
retry_context += "- MINIMALE Filter: Nur 'Netzgerät' ODER 'Netzteil' ODER 'Power Supply'\n"
retry_context += "- KEINE spezifischen Filter auf Ampere, Phasen oder Zertifizierung\n"
retry_context += "- COUNT-Query: Wie viele Netzgeräte gibt es insgesamt?\n"
retry_context += "- Suche nach ALLEN verfügbaren Netzgeräten\n"
# Retry analysis - use custom prompt from configuration (already validated at start of chatProcess)
retry_analysis_prompt = chatbot_config.custom_analysis_prompt.replace("{userPrompt}", userInput.prompt).replace("{context}", retry_context or "")
# CRITICAL: Add explicit JSON format requirement to ensure AI returns JSON
json_format_instruction = """
⚠️⚠️⚠️ ABSOLUT KRITISCH - JSON-FORMAT ERFORDERLICH ⚠️⚠️⚠️
DU MUSST DEINE ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT GEBEN!
ANTWORTE NICHT MIT NORMALEM TEXT ODER EINER CHAT-ANTWORT!
DEINE ANTWORT MUSS EIN GÜLTIGES JSON-OBJEKT SEIN!
Erforderliches JSON-Format:
{
"needsDatabaseQuery": true/false,
"needsWebResearch": true/false,
"sqlQueries": [
{
"query": "SQL-Abfrage hier",
"purpose": "Zweck der Abfrage",
"table": "Haupttabelle"
}
],
"reasoning": "Begründung für die Abfragen"
}
⚠️⚠️⚠️ KRITISCH - WANN DATENBANKABFRAGE ERFORDERLICH ⚠️⚠️⚠️
SETZE "needsDatabaseQuery": true, WENN:
- Der Nutzer nach Artikeln, Produkten, Preisen, Lagerbeständen, Lieferanten fragt
- Der Nutzer nach Informationen aus der Datenbank fragt (auch allgemeine Fragen!)
- Der Nutzer eine Frage stellt, die mit Datenbank-Daten beantwortet werden kann
- Du dir nicht sicher bist - dann setze "needsDatabaseQuery": true und führe eine allgemeine Abfrage durch!
VERBOTEN:
- "needsDatabaseQuery": false setzen, nur weil die Frage allgemein klingt
- "needsDatabaseQuery": false setzen, ohne zu prüfen, ob Datenbank-Daten helfen könnten
- Chat-Antworten geben statt Datenbankabfragen durchzuführen
WICHTIG:
- Antworte NUR mit dem JSON-Objekt, KEIN zusätzlicher Text davor oder danach!
- KEINE Erklärungen, KEINE Begrüßungen, KEINE Chat-Antworten!
- NUR das JSON-Objekt!
- Bei Unsicherheit: IMMER "needsDatabaseQuery": true setzen!
"""
retry_analysis_prompt = retry_analysis_prompt + json_format_instruction
logger.info("Using custom analysis prompt for retry from instance config with JSON format requirement")
# AI call for retry analysis
retry_analysis_result = await method_ai.process({
"aiPrompt": retry_analysis_prompt,
"documentList": None,
"resultType": "json",
"simpleMode": True
})
# Extract retry analysis
retry_analysis_content = None
if retry_analysis_result.success and retry_analysis_result.documents:
retry_analysis_content = retry_analysis_result.documents[0].documentData
if isinstance(retry_analysis_content, bytes):
retry_analysis_content = retry_analysis_content.decode('utf-8')
if retry_analysis_content:
retry_analysis = _extractJsonFromResponse(retry_analysis_content)
if retry_analysis is None:
logger.warning("Failed to extract JSON from retry analysis response")
retry_analysis = {}
if retry_analysis and retry_analysis.get("needsDatabaseQuery", False):
retry_sql_queries = retry_analysis.get("sqlQueries", [])
# Limit to maximum 5 queries for performance
if len(retry_sql_queries) > 5:
logger.info(f"Limiting retry queries from {len(retry_sql_queries)} to 5 for performance")
retry_sql_queries = retry_sql_queries[:5]
if retry_sql_queries:
logger.info(f"Executing {len(retry_sql_queries)} retry queries (attempt {empty_retry_attempt}) with alternative strategies...")
await _emit_log_and_event(
interfaceDbChat,
workflowId,
event_manager,
f"Führe {len(retry_sql_queries)} alternative Abfrage(n) mit anderen Strategien aus (Versuch {empty_retry_attempt})...",
log_type="info"
)
# Execute retry queries
try:
retry_results = await _execute_queries_parallel(retry_sql_queries, chatbot_config)
# Merge retry results into main results (renumber to continue sequence)
base_query_num = len(sql_queries)
for key, value in retry_results.items():
if key.startswith("query_"):
# Extract query number from retry result
try:
query_num = int(key.split("_")[1])
new_query_num = base_query_num + query_num
new_key = f"query_{new_query_num}"
if not key.endswith("_data") and not key.endswith("_error"):
queryResults[new_key] = value
if f"{key}_data" in retry_results:
queryResults[f"{new_key}_data"] = retry_results[f"{key}_data"]
elif key.endswith("_error"):
queryResults[f"{new_key}_error"] = value
except (ValueError, IndexError):
# Fallback if parsing fails
new_key = f"query_{base_query_num + 1}"
if not key.endswith("_data") and not key.endswith("_error"):
queryResults[new_key] = value
# Recalculate results after retry
retry_successful = [k for k in retry_results.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
retry_rows = sum(len(retry_results.get(f"{k}_data", [])) for k in retry_successful) if retry_successful else 0
# Update successful_queries list to include retry results
successful_queries = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
total_rows = sum(len(queryResults.get(f"{k}_data", [])) for k in successful_queries)
logger.info(f"Retry attempt {empty_retry_attempt}: Found {retry_rows} rows from {len(retry_successful)} queries. Total: {total_rows} rows from {len(successful_queries)} queries")
if retry_rows > 0:
# Success! Found results
await _emit_log_and_event(
interfaceDbChat,
workflowId,
event_manager,
f"Alternative Abfragen erfolgreich: {len(retry_successful)} Abfrage(n) mit {retry_rows} Ergebnis{'en' if retry_rows != 1 else ''} gefunden",
log_type="info"
)
should_retry = False # Stop retry loop, we found results
break
elif retry_rows > previous_retry_rows:
# Made some progress (found more rows than before) - continue
previous_retry_rows = retry_rows
await _emit_log_and_event(
interfaceDbChat,
workflowId,
event_manager,
f"Versuch {empty_retry_attempt}: Fortschritt erzielt ({retry_rows} Zeilen gefunden). Versuche weitere Strategie...",
log_type="info"
)
else:
# No progress made - stop retrying
await _emit_log_and_event(
interfaceDbChat,
workflowId,
event_manager,
f"Versuch {empty_retry_attempt}: Keine Ergebnisse gefunden. Beende Retry-Versuche.",
log_type="warning"
)
should_retry = False # Stop retry loop, no progress
break
except Exception as retry_error:
logger.error(f"Error executing retry queries (attempt {empty_retry_attempt}): {retry_error}", exc_info=True)
# Continue to next attempt even on error
# Check if we should continue retrying (already handled in break conditions above)
if empty_retry_attempt >= max_empty_retry_attempts:
logger.warning(f"Reached maximum empty retry attempts ({max_empty_retry_attempts}), stopping retry loop")
await _emit_log_and_event(
interfaceDbChat,
workflowId,
event_manager,
f"⚠️ Maximale Anzahl Retry-Versuche ({max_empty_retry_attempts}) erreicht. Keine Ergebnisse gefunden.",
log_type="warning"
)
should_retry = False
except Exception as e:
logger.error(f"Error executing parallel queries: {e}")
queryResults["error"] = f"Error executing queries: {str(e)}"
await _emit_log_and_event(
interfaceDbChat,
workflowId,
event_manager,
"Fehler bei parallelen Datenbankabfragen",
log_type="error"
)
# Wait for web research to complete (if it was started in parallel)
if web_research_task:
try:
webResearchResults = await web_research_task
if webResearchResults and not webResearchResults.startswith("Web research error"):
logger.info("Web research completed successfully")
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche abgeschlossen")
else:
logger.warning("Web research completed with errors")
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche fehlgeschlagen", log_type="warning")
except Exception as e:
logger.error(f"Error waiting for web research: {e}", exc_info=True)
webResearchResults = f"Web research error: {str(e)}"
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche fehlgeschlagen", log_type="warning")
# Check if workflow was stopped before generating final answer
if await _check_workflow_stopped(interfaceDbChat, workflowId):
logger.info(f"Workflow {workflowId} was stopped, aborting final answer generation")
return
# Step 3: Generate final answer using AI
logger.info("Generating final answer with AI...")
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Formuliere finale Antwort...")
# Build prompt for final answer - use custom prompt from configuration (already validated at start of chatProcess)
system_prompt = chatbot_config.custom_final_answer_prompt
logger.info("Using custom final answer prompt from instance config")
# Build answer context with query results using efficient list-based building
answer_context_parts = [f"User question: {userInput.prompt}{context}\n"]
# Add database results - organize by query with metadata
db_results_parts = []
if queryResults:
successful_results = []
error_results = []
# Extract query metadata from sql_queries if available
query_metadata = {}
if sql_queries:
for i, q_info in enumerate(sql_queries, 1):
query_metadata[f"query_{i}"] = {
"purpose": q_info.get("purpose", f"Query {i}"),
"table": q_info.get("table", "Unknown")
}
# Organize results by query number
query_numbers = set()
for key in queryResults.keys():
if key.startswith("query_") and not key.endswith("_data"):
# Extract query number (e.g., "query_1" -> 1)
try:
num = int(key.split("_")[1])
query_numbers.add(num)
except (ValueError, IndexError):
pass
# Build results with metadata
for query_num in sorted(query_numbers):
query_key = f"query_{query_num}"
error_key = f"{query_key}_error"
if error_key in queryResults:
error_msg = queryResults[error_key]
metadata = query_metadata.get(query_key, {})
purpose = metadata.get("purpose", f"Query {query_num}")
table = metadata.get("table", "Unknown")
error_results.append(f"Abfrage {query_num} ({table} - {purpose}): {error_msg}")
elif query_key in queryResults:
result_text = queryResults[query_key]
metadata = query_metadata.get(query_key, {})
purpose = metadata.get("purpose", f"Query {query_num}")
table = metadata.get("table", "Unknown")
successful_results.append(f"=== Abfrage {query_num}: {purpose} (Tabelle: {table}) ===\n{result_text}")
# Handle general error if present
if "error" in queryResults:
error_results.append(f"Allgemeiner Fehler: {queryResults['error']}")
# Build db_results_part efficiently
if successful_results:
db_results_parts.append("\n\nDATENBANK-ERGEBNISSE:\n")
db_results_parts.append("\n\n".join(successful_results))
answer_context_parts.append("DATENBANK-ERGEBNISSE:\n")
answer_context_parts.append("\n\n".join(successful_results))
answer_context_parts.append("\n")
if error_results:
db_results_parts.append("\n\nDATENBANK-FEHLER:\n")
db_results_parts.append("\n".join(error_results))
answer_context_parts.append("DATENBANK-FEHLER:\n")
answer_context_parts.append("\n".join(error_results))
answer_context_parts.append("\n")
db_results_part = "".join(db_results_parts)
# Add web research results
web_results_part = ""
# Check if web research results exist and are valid (not empty and not an error)
if webResearchResults and webResearchResults.strip() and not webResearchResults.startswith("Web research error"):
web_results_part = f"\n\nINTERNET-RECHERCHE:\n{webResearchResults}"
answer_context_parts.append(f"INTERNET-RECHERCHE:\n{webResearchResults}\n")
# Join answer context efficiently
answerContext = "".join(answer_context_parts)
# Check if we have any actual data
successful_query_keys = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
has_query_results = bool(successful_query_keys)
error_query_keys = [k for k in queryResults.keys() if k.endswith("_error")]
has_only_errors = bool(error_query_keys and not successful_query_keys)
# Count total number of articles found across all queries
total_articles_found = 0
if successful_query_keys:
for query_key in successful_query_keys:
data_key = f"{query_key}_data"
if data_key in queryResults:
article_count = len(queryResults[data_key])
total_articles_found += article_count
logger.info(f"Query {query_key} returned {article_count} articles")
logger.info(f"Total articles found across all queries: {total_articles_found}")
# Add explicit article count information to prompt (using efficient list building)
if total_articles_found > 0:
article_count_parts = [
"\n\n⚠️⚠️⚠️ WICHTIG - ARTIKELANZAHL ⚠️⚠️⚠️\n",
f"In den DATENBANK-ERGEBNISSEN oben wurden INSGESAMT {total_articles_found} Artikel gefunden.\n",
f"DU MUSST ALLE {total_articles_found} Artikel in deiner Antwort zeigen!\n"
]
if total_articles_found <= 20:
article_count_parts.append(f"Zeige ALLE {total_articles_found} Artikel in einer Tabelle.\n")
else:
article_count_parts.append(f"Zeige die ersten 20 Artikel in einer Tabelle + Hinweis auf weitere {total_articles_found - 20} Artikel.\n")
article_count_parts.extend([
f"❌ VERBOTEN: Nur einen Artikel zu zeigen, wenn {total_articles_found} gefunden wurden!\n",
f"✓ OBLIGATORISCH: Zeige ALLE {total_articles_found} Artikel!\n"
])
article_count_info = "".join(article_count_parts)
if db_results_part:
db_results_part = article_count_info + db_results_part
else:
db_results_part = article_count_info
# Add warning messages if needed (using efficient list building)
warning_parts = []
if not has_query_results and needsDatabaseQuery:
warning_parts.append("\n\n⚠️⚠️⚠️ WICHTIG - DATENBANKABFRAGE AUSGEFÜHRT ⚠️⚠️⚠️\n")
warning_parts.append("Die Datenbankabfrage wurde AUSGEFÜHRT, hat aber KEINE Ergebnisse zurückgegeben.\n")
warning_parts.append("DU HAST ZUGRIFF AUF DIE DATENBANK - die Abfrage wurde durchgeführt!\n")
warning_parts.append("Antworte dem Nutzer: 'Es wurden keine Artikel gefunden' oder 'Keine passenden Artikel in der Datenbank gefunden'\n")
warning_parts.append("VERBOTEN: Sage NIEMALS 'Ich habe keinen Zugriff' oder 'Ich kann nicht auf die Datenbank zugreifen'!\n")
warning_parts.append("VERBOTEN: Sage NIEMALS 'Es tut mir leid, aber ich habe keinen Zugriff auf die Datenbank'!\n")
warning_parts.append("Die Datenbank wurde durchsucht, es wurden nur keine passenden Artikel gefunden.\n")
if has_only_errors:
warning_parts.extend([
"\n\n⚠️⚠️⚠️ KRITISCH - ALLE QUERIES FEHLGESCHLAGEN ⚠️⚠️⚠️\n",
"ALLE Datenbankabfragen sind fehlgeschlagen. Es gibt KEINE gültigen Daten aus der Datenbank.\n",
"DU DARFST KEINE DATEN ERFINDEN! Schreibe stattdessen: 'Es wurden keine Artikel gefunden' oder 'Die Datenbankabfrage ist fehlgeschlagen'."
])
if warning_parts:
db_results_part = db_results_part + "".join(warning_parts) if db_results_part else "".join(warning_parts)
# Determine if we have database results, web results, or both
has_db_results = bool(db_results_part and db_results_part.strip())
has_web_results = bool(web_results_part and web_results_part.strip())
# Add explicit instruction that AI has database access (if connectors are configured)
access_instruction = ""
if chatbot_config.connector_types and len(chatbot_config.connector_types) > 0:
access_instruction = "\n\n⚠️⚠️⚠️ ABSOLUT KRITISCH - DATENBANK-ZUGRIFF VERFÜGBAR ⚠️⚠️⚠️\n"
access_instruction += "DU HAST ZUGRIFF AUF DIE DATENBANK UND KANNST SQL-ABFRAGEN AUSFÜHREN!\n"
access_instruction += "Die Datenbankabfrage wurde bereits AUSGEFÜHRT (siehe DATENBANK-ERGEBNISSE oben)!\n"
access_instruction += "\nVERBOTEN - NIEMALS SAGEN:\n"
access_instruction += "- 'Ich habe keinen Zugriff auf die Datenbank'\n"
access_instruction += "- 'Ich kann nicht auf die Datenbank zugreifen'\n"
access_instruction += "- 'Es tut mir leid, aber ich habe keinen Zugriff'\n"
access_instruction += "- 'Ich habe keinen Zugriff auf Echtzeit-Datenbanken'\n"
access_instruction += "- Jede andere Formulierung, die suggeriert, dass du keinen Zugriff hast!\n"
access_instruction += "\nRICHTIG - WENN KEINE ERGEBNISSE:\n"
access_instruction += "- 'Es wurden keine Artikel gefunden'\n"
access_instruction += "- 'Keine passenden Artikel in der Datenbank gefunden'\n"
access_instruction += "- 'Die Datenbanksuche ergab keine Treffer'\n"
access_instruction += "- 'Ich habe in der Datenbank gesucht, aber keine passenden Artikel gefunden'\n"
access_instruction += "\nWICHTIG: Die Datenbank wurde durchsucht - es wurden nur keine passenden Artikel gefunden!\n"
if chatbot_config.enable_web_research:
access_instruction += "\n⚠️⚠️⚠️ ABSOLUT KRITISCH - INTERNET-ZUGRIFF VERFÜGBAR ⚠️⚠️⚠️\n"
access_instruction += "DU HAST ZUGRIFF AUF DAS INTERNET (Tavily)!\n"
access_instruction += "VERBOTEN: Sage NIEMALS, dass du keinen Zugriff auf das Internet hast!\n"
access_instruction += "VERBOTEN: Sage NIEMALS 'Ich habe keinen Zugriff auf das Internet'!\n"
# Build the final answer prompt using custom system prompt from config
answer_prompt = _build_final_answer_prompt_with_results(
system_prompt + access_instruction,
userInput.prompt,
context,
db_results_part,
web_results_part,
is_resumed,
has_db_results,
has_web_results
)
answerRequest = AiCallRequest(
prompt=answer_prompt,
context=answerContext if (queryResults or webResearchResults) else None,
options=AiCallOptions(
resultFormat="txt",
operationType=OperationTypeEnum.DATA_ANALYSE,
processingMode=ProcessingModeEnum.DETAILED
)
)
# Double-check workflow wasn't stopped right before AI call
if await _check_workflow_stopped(interfaceDbChat, workflowId):
logger.info(f"Workflow {workflowId} was stopped before final answer AI call, aborting")
return
answerResponse = await services.ai.callAi(answerRequest)
# Check immediately after AI call completes - if stopped, abort without processing or storing
if await _check_workflow_stopped(interfaceDbChat, workflowId):
logger.info(f"Workflow {workflowId} was stopped during final answer AI call, aborting without storing message")
return
# Check for errors in AI response
if answerResponse.errorCount > 0:
logger.error(f"AI call failed with errorCount={answerResponse.errorCount}: {answerResponse.content}")
finalAnswer = "Entschuldigung, ich konnte Ihre Anfrage derzeit nicht verarbeiten. Bitte versuchen Sie es später erneut."
else:
finalAnswer = answerResponse.content
logger.info("Final answer generated")
# Check again after generating answer (in case it was stopped while generating)
if await _check_workflow_stopped(interfaceDbChat, workflowId):
logger.info(f"Workflow {workflowId} was stopped after final answer generation, not storing message")
return
# Reload workflow to get current message count
workflow = interfaceDbChat.getWorkflow(workflowId)
# Double-check workflow wasn't stopped while we were reloading
if workflow and workflow.status == "stopped":
logger.info(f"Workflow {workflowId} was stopped, not storing final message")
return
# Create assistant message with final answer
message_id = f"msg_{uuid.uuid4()}"
assistantMessageData = {
"id": message_id,
"workflowId": workflowId,
"parentMessageId": userMessageId,
"message": finalAnswer,
"role": "assistant",
"status": "last",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": getUtcTimestamp(),
"success": answerResponse.errorCount == 0 if answerResponse else True,
"roundNumber": workflow.currentRound,
"taskNumber": 0,
"actionNumber": 0
}
assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
logger.info(f"Stored assistant message with final answer: {assistantMessage.id}")
# Emit message event for streaming (exact chatData format)
message_timestamp = parseTimestamp(assistantMessage.publishedAt, default=getUtcTimestamp())
await event_manager.emit_event(
context_id=workflowId,
event_type="chatdata",
data={
"type": "message",
"createdAt": message_timestamp,
"item": assistantMessage.dict()
},
event_category="chat"
)
# Update workflow status to completed (only if not stopped)
if not await _check_workflow_stopped(interfaceDbChat, workflowId):
interfaceDbChat.updateWorkflow(workflowId, {
"status": "completed",
"lastActivity": getUtcTimestamp()
})
else:
logger.info(f"Workflow {workflowId} was stopped, not updating status to completed")
logger.info(f"Chatbot processing completed for workflow {workflowId}, generated {len(queries)} queries and final answer")
# Emit completion event only if workflow wasn't stopped
if not await _check_workflow_stopped(interfaceDbChat, workflowId):
await event_manager.emit_event(
context_id=workflowId,
event_type="complete",
data={"workflowId": workflowId},
event_category="workflow",
message="Chatbot-Verarbeitung abgeschlossen",
step="complete"
)
# Schedule cleanup with longer delay to allow stream to stay open
await event_manager.cleanup(workflowId, delay=300.0) # 5 minutes delay
except Exception as e:
logger.error(f"Error processing chatbot message: {str(e)}", exc_info=True)
# Check if workflow was stopped - if so, don't store error message
if await _check_workflow_stopped(interfaceDbChat, workflowId):
logger.info(f"Workflow {workflowId} was stopped, not storing error message")
return
# Store error message
try:
# Reload workflow to get current message count
workflow = interfaceDbChat.getWorkflow(workflowId)
# Double-check workflow wasn't stopped while we were reloading
if workflow and workflow.status == "stopped":
logger.info(f"Workflow {workflowId} was stopped, not storing error message")
return
errorMessageData = {
"id": f"msg_{uuid.uuid4()}",
"workflowId": workflowId,
"parentMessageId": userMessageId,
"message": f"Sorry, I encountered an error: {str(e)}",
"role": "assistant",
"status": "last",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": getUtcTimestamp(),
"success": False,
"roundNumber": workflow.currentRound if workflow else 1,
"taskNumber": 0,
"actionNumber": 0
}
errorMessage = interfaceDbChat.createMessage(errorMessageData)
# Emit message event for streaming (exact chatData format)
message_timestamp = parseTimestamp(errorMessage.publishedAt, default=getUtcTimestamp())
await event_manager.emit_event(
context_id=workflowId,
event_type="chatdata",
data={
"type": "message",
"createdAt": message_timestamp,
"item": errorMessage.dict()
},
event_category="chat"
)
# Update workflow status to error (only if not stopped)
if not await _check_workflow_stopped(interfaceDbChat, workflowId):
interfaceDbChat.updateWorkflow(workflowId, {
"status": "error",
"lastActivity": getUtcTimestamp()
})
else:
logger.info(f"Workflow {workflowId} was stopped, not updating status to error")
# Schedule cleanup
await event_manager.cleanup(workflowId)
except Exception as storeError:
logger.error(f"Error storing error message: {storeError}")