2722 lines
129 KiB
Python
2722 lines
129 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Simple chatbot feature - basic implementation.
|
|
User input is processed by AI to create list of needed queries.
|
|
Those queries get streamed back.
|
|
|
|
This module also handles feature initialization and RBAC catalog registration.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Dict, List, Any
|
|
|
|
# Feature metadata for RBAC catalog
|
|
FEATURE_CODE = "chatbot"
|
|
FEATURE_LABEL = {"en": "Chatbot", "de": "Chatbot", "fr": "Chatbot"}
|
|
FEATURE_ICON = "mdi-robot"
|
|
|
|
# UI Objects for RBAC catalog
|
|
UI_OBJECTS = [
|
|
{
|
|
"objectKey": "ui.feature.chatbot.conversations",
|
|
"label": {"en": "Conversations", "de": "Konversationen", "fr": "Conversations"},
|
|
"meta": {"area": "conversations"}
|
|
},
|
|
{
|
|
"objectKey": "ui.feature.chatbot.settings",
|
|
"label": {"en": "Settings", "de": "Einstellungen", "fr": "Paramètres"},
|
|
"meta": {"area": "settings"}
|
|
},
|
|
]
|
|
|
|
# Resource Objects for RBAC catalog
|
|
RESOURCE_OBJECTS = [
|
|
{
|
|
"objectKey": "resource.feature.chatbot.start",
|
|
"label": {"en": "Start Chatbot", "de": "Chatbot starten", "fr": "Démarrer chatbot"},
|
|
"meta": {"endpoint": "/api/chatbot/{instanceId}/start/stream", "method": "POST"}
|
|
},
|
|
{
|
|
"objectKey": "resource.feature.chatbot.stop",
|
|
"label": {"en": "Stop Chatbot", "de": "Chatbot stoppen", "fr": "Arrêter chatbot"},
|
|
"meta": {"endpoint": "/api/chatbot/{instanceId}/stop/{workflowId}", "method": "POST"}
|
|
},
|
|
]
|
|
|
|
# DATA Objects for RBAC catalog (tables/entities)
|
|
# Used for AccessRules on data-level permissions
|
|
DATA_OBJECTS = [
|
|
{
|
|
"objectKey": "data.feature.chatbot.ChatWorkflow",
|
|
"label": {"en": "Chat Workflow", "de": "Chat-Workflow", "fr": "Workflow de chat"},
|
|
"meta": {"table": "ChatWorkflow", "fields": ["id", "name", "status", "mandateId", "featureInstanceId"]}
|
|
},
|
|
{
|
|
"objectKey": "data.feature.chatbot.ChatMessage",
|
|
"label": {"en": "Chat Message", "de": "Chat-Nachricht", "fr": "Message de chat"},
|
|
"meta": {"table": "ChatMessage", "fields": ["id", "workflowId", "message", "role", "publishedAt"]}
|
|
},
|
|
{
|
|
"objectKey": "data.feature.chatbot.ChatLog",
|
|
"label": {"en": "Chat Log", "de": "Chat-Log", "fr": "Journal de chat"},
|
|
"meta": {"table": "ChatLog", "fields": ["id", "workflowId", "message", "type", "timestamp"]}
|
|
},
|
|
{
|
|
"objectKey": "data.feature.chatbot.ChatDocument",
|
|
"label": {"en": "Chat Document", "de": "Chat-Dokument", "fr": "Document de chat"},
|
|
"meta": {"table": "ChatDocument", "fields": ["id", "messageId", "fileId", "fileName", "fileSize", "mimeType"]}
|
|
},
|
|
{
|
|
"objectKey": "data.feature.chatbot.ChatStat",
|
|
"label": {"en": "Chat Statistics", "de": "Chat-Statistiken", "fr": "Statistiques de chat"},
|
|
"meta": {"table": "ChatStat", "fields": ["id", "workflowId", "processingTime", "bytesSent", "bytesReceived", "errorCount"]}
|
|
},
|
|
{
|
|
"objectKey": "data.feature.chatbot.*",
|
|
"label": {"en": "All Chatbot Data", "de": "Alle Chatbot-Daten", "fr": "Toutes les données chatbot"},
|
|
"meta": {"wildcard": True, "description": "Wildcard for all chatbot data tables"}
|
|
},
|
|
]
|
|
|
|
# Template roles for this feature
|
|
TEMPLATE_ROLES = [
|
|
{
|
|
"roleLabel": "chatbot-admin",
|
|
"description": {
|
|
"en": "Chatbot Administrator - Full access to chatbot settings and all conversations",
|
|
"de": "Chatbot-Administrator - Vollzugriff auf Chatbot-Einstellungen und alle Konversationen",
|
|
"fr": "Administrateur chatbot - Accès complet aux paramètres et conversations"
|
|
},
|
|
"accessRules": [
|
|
# Full UI access
|
|
{"context": "UI", "item": None, "view": True},
|
|
# Full DATA access
|
|
{"context": "DATA", "item": None, "view": True, "read": "a", "create": "a", "update": "a", "delete": "a"},
|
|
# Resource access
|
|
{"context": "RESOURCE", "item": "resource.feature.chatbot.start", "view": True},
|
|
]
|
|
},
|
|
{
|
|
"roleLabel": "chatbot-user",
|
|
"description": {
|
|
"en": "Chatbot User - Use chatbot and view own conversations",
|
|
"de": "Chatbot-Benutzer - Chatbot nutzen und eigene Konversationen einsehen",
|
|
"fr": "Utilisateur chatbot - Utiliser le chatbot et consulter ses conversations"
|
|
},
|
|
"accessRules": [
|
|
# UI access to conversations - vollqualifizierte ObjectKeys
|
|
{"context": "UI", "item": "ui.feature.chatbot.conversations", "view": True},
|
|
# Own DATA access (my level)
|
|
{"context": "DATA", "item": None, "view": True, "read": "m", "create": "m", "update": "m", "delete": "m"},
|
|
# Resource access
|
|
{"context": "RESOURCE", "item": "resource.feature.chatbot.start", "view": True},
|
|
]
|
|
},
|
|
]
|
|
|
|
|
|
def getFeatureDefinition():
|
|
"""Return the feature definition for registration."""
|
|
return {
|
|
"code": FEATURE_CODE,
|
|
"label": FEATURE_LABEL,
|
|
"icon": FEATURE_ICON
|
|
}
|
|
|
|
|
|
def getUiObjects():
|
|
"""Return UI objects for RBAC catalog registration."""
|
|
return UI_OBJECTS
|
|
|
|
|
|
def getResourceObjects():
|
|
"""Return resource objects for RBAC catalog registration."""
|
|
return RESOURCE_OBJECTS
|
|
|
|
|
|
def getTemplateRoles():
|
|
"""Return template roles for this feature."""
|
|
return TEMPLATE_ROLES
|
|
|
|
|
|
def getDataObjects():
|
|
"""Return DATA objects for RBAC catalog registration."""
|
|
return DATA_OBJECTS
|
|
|
|
|
|
def registerFeature(catalogService) -> bool:
|
|
"""
|
|
Register this feature's RBAC objects in the catalog.
|
|
|
|
Args:
|
|
catalogService: The RBAC catalog service instance
|
|
|
|
Returns:
|
|
True if registration was successful
|
|
"""
|
|
try:
|
|
# Register UI objects
|
|
for uiObj in UI_OBJECTS:
|
|
catalogService.registerUiObject(
|
|
featureCode=FEATURE_CODE,
|
|
objectKey=uiObj["objectKey"],
|
|
label=uiObj["label"],
|
|
meta=uiObj.get("meta")
|
|
)
|
|
|
|
# Register Resource objects
|
|
for resObj in RESOURCE_OBJECTS:
|
|
catalogService.registerResourceObject(
|
|
featureCode=FEATURE_CODE,
|
|
objectKey=resObj["objectKey"],
|
|
label=resObj["label"],
|
|
meta=resObj.get("meta")
|
|
)
|
|
|
|
# Register DATA objects (tables/entities)
|
|
for dataObj in DATA_OBJECTS:
|
|
catalogService.registerDataObject(
|
|
featureCode=FEATURE_CODE,
|
|
objectKey=dataObj["objectKey"],
|
|
label=dataObj["label"],
|
|
meta=dataObj.get("meta")
|
|
)
|
|
|
|
# Sync template roles to database (with AccessRules)
|
|
_syncTemplateRolesToDb()
|
|
|
|
logger.info(f"Feature '{FEATURE_CODE}' registered {len(UI_OBJECTS)} UI, {len(RESOURCE_OBJECTS)} resource, {len(DATA_OBJECTS)} data objects")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to register feature '{FEATURE_CODE}': {e}")
|
|
return False
|
|
|
|
|
|
def _syncTemplateRolesToDb() -> int:
|
|
"""
|
|
Sync template roles and their AccessRules to the database.
|
|
Creates global template roles (mandateId=None) if they don't exist.
|
|
|
|
Returns:
|
|
Number of roles created/updated
|
|
"""
|
|
try:
|
|
from modules.interfaces.interfaceDbApp import getRootInterface
|
|
from modules.datamodels.datamodelRbac import Role, AccessRule, AccessRuleContext
|
|
|
|
rootInterface = getRootInterface()
|
|
db = rootInterface.db
|
|
|
|
# Get existing template roles for this feature
|
|
existingRoles = db.getRecordset(
|
|
Role,
|
|
recordFilter={"featureCode": FEATURE_CODE, "mandateId": None}
|
|
)
|
|
existingRoleLabels = {r.get("roleLabel"): r.get("id") for r in existingRoles}
|
|
|
|
createdCount = 0
|
|
for roleTemplate in TEMPLATE_ROLES:
|
|
roleLabel = roleTemplate["roleLabel"]
|
|
|
|
if roleLabel in existingRoleLabels:
|
|
roleId = existingRoleLabels[roleLabel]
|
|
logger.debug(f"Template role '{roleLabel}' already exists with ID {roleId}")
|
|
|
|
# Ensure AccessRules exist for this role
|
|
_ensureAccessRulesForRole(db, roleId, roleTemplate.get("accessRules", []))
|
|
else:
|
|
# Create new template role
|
|
newRole = Role(
|
|
roleLabel=roleLabel,
|
|
description=roleTemplate.get("description", {}),
|
|
featureCode=FEATURE_CODE,
|
|
mandateId=None, # Global template
|
|
featureInstanceId=None,
|
|
isSystemRole=False
|
|
)
|
|
createdRole = db.recordCreate(Role, newRole.model_dump())
|
|
roleId = createdRole.get("id")
|
|
|
|
# Create AccessRules for this role
|
|
_ensureAccessRulesForRole(db, roleId, roleTemplate.get("accessRules", []))
|
|
|
|
logger.info(f"Created template role '{roleLabel}' with ID {roleId}")
|
|
createdCount += 1
|
|
|
|
if createdCount > 0:
|
|
logger.info(f"Feature '{FEATURE_CODE}': Created {createdCount} template roles")
|
|
|
|
# Repair instance-specific roles that are missing AccessRules
|
|
_repairInstanceRolesAccessRules(db, existingRoleLabels)
|
|
|
|
return createdCount
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error syncing template roles for feature '{FEATURE_CODE}': {e}")
|
|
return 0
|
|
|
|
|
|
def _repairInstanceRolesAccessRules(db, templateRoleLabels: Dict[str, str]) -> int:
|
|
"""
|
|
Repair instance-specific roles by copying AccessRules from their template roles.
|
|
This ensures instance roles created before AccessRules were defined get updated.
|
|
|
|
Args:
|
|
db: Database connector
|
|
templateRoleLabels: Dict mapping roleLabel to template role ID
|
|
|
|
Returns:
|
|
Number of instance roles repaired
|
|
"""
|
|
from modules.datamodels.datamodelRbac import Role, AccessRule, AccessRuleContext
|
|
|
|
repairedCount = 0
|
|
|
|
# Get all instance-specific roles for this feature (mandateId is NOT None)
|
|
allRoles = db.getRecordset(Role, recordFilter={"featureCode": FEATURE_CODE})
|
|
instanceRoles = [r for r in allRoles if r.get("mandateId") is not None]
|
|
|
|
for instanceRole in instanceRoles:
|
|
roleLabel = instanceRole.get("roleLabel")
|
|
instanceRoleId = instanceRole.get("id")
|
|
|
|
# Find matching template role
|
|
templateRoleId = templateRoleLabels.get(roleLabel)
|
|
if not templateRoleId:
|
|
continue
|
|
|
|
# Check if instance role has AccessRules
|
|
existingRules = db.getRecordset(AccessRule, recordFilter={"roleId": instanceRoleId})
|
|
if existingRules:
|
|
continue # Already has rules, skip
|
|
|
|
# Copy AccessRules from template role
|
|
templateRules = db.getRecordset(AccessRule, recordFilter={"roleId": templateRoleId})
|
|
if not templateRules:
|
|
continue # Template has no rules
|
|
|
|
for rule in templateRules:
|
|
newRule = AccessRule(
|
|
roleId=instanceRoleId,
|
|
context=rule.get("context"),
|
|
item=rule.get("item"),
|
|
view=rule.get("view", False),
|
|
read=rule.get("read"),
|
|
create=rule.get("create"),
|
|
update=rule.get("update"),
|
|
delete=rule.get("delete"),
|
|
)
|
|
db.recordCreate(AccessRule, newRule.model_dump())
|
|
|
|
logger.info(f"Repaired instance role '{roleLabel}' (ID: {instanceRoleId}): copied {len(templateRules)} AccessRules from template")
|
|
repairedCount += 1
|
|
|
|
if repairedCount > 0:
|
|
logger.info(f"Feature '{FEATURE_CODE}': Repaired {repairedCount} instance roles with missing AccessRules")
|
|
|
|
return repairedCount
|
|
|
|
|
|
def _ensureAccessRulesForRole(db, roleId: str, ruleTemplates: List[Dict[str, Any]]) -> int:
|
|
"""
|
|
Ensure AccessRules exist for a role based on templates.
|
|
|
|
Args:
|
|
db: Database connector
|
|
roleId: Role ID
|
|
ruleTemplates: List of rule templates
|
|
|
|
Returns:
|
|
Number of rules created
|
|
"""
|
|
from modules.datamodels.datamodelRbac import AccessRule, AccessRuleContext
|
|
|
|
# Get existing rules for this role
|
|
existingRules = db.getRecordset(AccessRule, recordFilter={"roleId": roleId})
|
|
|
|
# Create a set of existing rule signatures to avoid duplicates
|
|
existingSignatures = set()
|
|
for rule in existingRules:
|
|
sig = (rule.get("context"), rule.get("item"))
|
|
existingSignatures.add(sig)
|
|
|
|
createdCount = 0
|
|
for template in ruleTemplates:
|
|
context = template.get("context", "UI")
|
|
item = template.get("item")
|
|
sig = (context, item)
|
|
|
|
if sig in existingSignatures:
|
|
continue
|
|
|
|
# Map context string to enum
|
|
if context == "UI":
|
|
contextEnum = AccessRuleContext.UI
|
|
elif context == "DATA":
|
|
contextEnum = AccessRuleContext.DATA
|
|
elif context == "RESOURCE":
|
|
contextEnum = AccessRuleContext.RESOURCE
|
|
else:
|
|
contextEnum = context
|
|
|
|
newRule = AccessRule(
|
|
roleId=roleId,
|
|
context=contextEnum,
|
|
item=item,
|
|
view=template.get("view", False),
|
|
read=template.get("read"),
|
|
create=template.get("create"),
|
|
update=template.get("update"),
|
|
delete=template.get("delete"),
|
|
)
|
|
db.recordCreate(AccessRule, newRule.model_dump())
|
|
createdCount += 1
|
|
|
|
if createdCount > 0:
|
|
logger.debug(f"Created {createdCount} AccessRules for role {roleId}")
|
|
|
|
return createdCount
|
|
import json
|
|
import uuid
|
|
import asyncio
|
|
import re
|
|
from typing import Optional, Dict, Any, List
|
|
|
|
from modules.features.chatbot.datamodelFeatureChatbot import ChatWorkflow, UserInputRequest, WorkflowModeEnum, ChatLog, ChatDocument
|
|
from modules.datamodels.datamodelUam import User
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, ProcessingModeEnum
|
|
from modules.datamodels.datamodelDocref import DocumentReferenceList, DocumentItemReference
|
|
from modules.shared.timeUtils import getUtcTimestamp, parseTimestamp
|
|
from modules.services import getInterface as getServices
|
|
from modules.features.chatbot import interfaceFeatureChatbot
|
|
from modules.features.chatbot.eventManager import get_event_manager
|
|
from modules.features.chatbot.chatbotUtils import (
|
|
generate_conversation_name,
|
|
)
|
|
from modules.features.chatbot.chatbotConfig import get_chatbot_config, ChatbotConfig
|
|
from modules.features.chatbot.langgraphChatbot import LangGraphChatbot
|
|
from langchain_core.messages import HumanMessage
|
|
import base64
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _extractJsonFromResponse(content: str) -> Optional[dict]:
|
|
"""Extract JSON from AI response, handling markdown code blocks."""
|
|
# Try direct JSON parse first
|
|
try:
|
|
return json.loads(content.strip())
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
# Try to extract JSON from markdown code blocks
|
|
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', content, re.DOTALL)
|
|
if json_match:
|
|
try:
|
|
return json.loads(json_match.group(1))
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
# Try to find JSON object in the text
|
|
json_match = re.search(r'\{.*\}', content, re.DOTALL)
|
|
if json_match:
|
|
try:
|
|
return json.loads(json_match.group(0))
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
return None
|
|
|
|
|
|
async def chatProcess(
|
|
currentUser: User,
|
|
mandateId: str,
|
|
userInput: UserInputRequest,
|
|
workflowId: Optional[str] = None,
|
|
featureInstanceId: Optional[str] = None
|
|
) -> ChatWorkflow:
|
|
"""
|
|
Simple chatbot processing - analyze user input and generate queries.
|
|
|
|
Flow:
|
|
1. Create or load workflow
|
|
2. Store user message
|
|
3. AI analyzes user input to create list of needed queries
|
|
4. Stream queries back
|
|
|
|
Args:
|
|
currentUser: Current user
|
|
mandateId: Mandate context (from RequestContext / X-Mandate-Id header)
|
|
userInput: User input request
|
|
workflowId: Optional workflow ID to continue existing conversation
|
|
featureInstanceId: Optional feature instance ID for instance-level isolation
|
|
|
|
Returns:
|
|
ChatWorkflow instance
|
|
"""
|
|
try:
|
|
# Load chatbot configuration for this instance
|
|
chatbot_config = get_chatbot_config(featureInstanceId)
|
|
logger.info(f"Loaded chatbot config for instance {featureInstanceId}: connector={chatbot_config.connector_type}, maxQueries={chatbot_config.max_queries}")
|
|
|
|
# Validate that required system prompt is configured
|
|
if not chatbot_config.custom_system_prompt:
|
|
error_msg = f"Chatbot instance {featureInstanceId} is missing required customSystemPrompt configuration"
|
|
logger.error(error_msg)
|
|
raise ValueError(error_msg)
|
|
|
|
# Get services normally (for other services like chat, ai, etc.)
|
|
services = getServices(currentUser, None, mandateId=mandateId)
|
|
|
|
# Replace interfaceDbChat with chatbot-specific interface that supports featureInstanceId
|
|
# This ensures instance-level data isolation
|
|
interfaceDbChat = interfaceFeatureChatbot.getInterface(
|
|
currentUser,
|
|
mandateId=mandateId,
|
|
featureInstanceId=featureInstanceId
|
|
)
|
|
|
|
# Update services to use the chatbot-specific interface
|
|
services.interfaceDbChat = interfaceDbChat
|
|
|
|
# Get event manager and create queue if needed
|
|
event_manager = get_event_manager()
|
|
|
|
# Create or load workflow
|
|
if workflowId:
|
|
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
if not workflow:
|
|
raise ValueError(f"Workflow {workflowId} not found")
|
|
|
|
# Verify workflow belongs to this instance if instanceId is provided
|
|
if featureInstanceId and workflow.featureInstanceId != featureInstanceId:
|
|
raise ValueError(f"Workflow {workflowId} does not belong to instance '{featureInstanceId}'")
|
|
|
|
# Resume workflow: increment round number
|
|
new_round = workflow.currentRound + 1
|
|
interfaceDbChat.updateWorkflow(workflowId, {
|
|
"status": "running",
|
|
"currentRound": new_round,
|
|
"lastActivity": getUtcTimestamp()
|
|
})
|
|
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
logger.info(f"Resumed workflow {workflowId}, round incremented to {new_round}")
|
|
|
|
# Create event queue if it doesn't exist (for streaming)
|
|
if not event_manager.has_queue(workflowId):
|
|
event_manager.create_queue(workflowId)
|
|
else:
|
|
# Generate conversation name based on user's prompt
|
|
conversation_name = await generate_conversation_name(
|
|
services,
|
|
userInput.prompt,
|
|
userInput.userLanguage
|
|
)
|
|
|
|
# Create new workflow
|
|
workflowData = {
|
|
"id": str(uuid.uuid4()),
|
|
"mandateId": mandateId,
|
|
"featureInstanceId": featureInstanceId,
|
|
"status": "running",
|
|
"name": conversation_name,
|
|
"currentRound": 1,
|
|
"currentTask": 0,
|
|
"currentAction": 0,
|
|
"totalTasks": 0,
|
|
"totalActions": 0,
|
|
"workflowMode": WorkflowModeEnum.WORKFLOW_CHATBOT.value,
|
|
"startedAt": getUtcTimestamp(),
|
|
"lastActivity": getUtcTimestamp()
|
|
}
|
|
workflow = interfaceDbChat.createWorkflow(workflowData)
|
|
logger.info(f"Created new chatbot workflow: {workflow.id} with name: {conversation_name}")
|
|
|
|
# Create event queue for new workflow (for streaming)
|
|
event_manager.create_queue(workflow.id)
|
|
|
|
# Reload workflow to get current message count
|
|
workflow_id = workflow.id
|
|
workflow = interfaceDbChat.getWorkflow(workflow_id)
|
|
if not workflow:
|
|
raise ValueError(f"Failed to reload workflow {workflow_id}")
|
|
|
|
# Process uploaded files and create ChatDocuments
|
|
user_documents = []
|
|
if userInput.listFileId and len(userInput.listFileId) > 0:
|
|
logger.info(f"Processing {len(userInput.listFileId)} uploaded file(s) for user message")
|
|
for fileId in userInput.listFileId:
|
|
try:
|
|
# Get file info from chat service
|
|
fileInfo = services.chat.getFileInfo(fileId)
|
|
if not fileInfo:
|
|
logger.warning(f"No file info found for file ID {fileId}")
|
|
continue
|
|
|
|
originalFileName = fileInfo.get("fileName", "unknown")
|
|
originalMimeType = fileInfo.get("mimeType", "application/octet-stream")
|
|
fileSizeToUse = fileInfo.get("size", 0)
|
|
|
|
# Create ChatDocument for the file
|
|
document = ChatDocument(
|
|
id=str(uuid.uuid4()),
|
|
messageId="", # Will be set when message is created
|
|
fileId=fileId,
|
|
fileName=originalFileName,
|
|
fileSize=fileSizeToUse,
|
|
mimeType=originalMimeType,
|
|
roundNumber=workflow.currentRound,
|
|
taskNumber=0,
|
|
actionNumber=0
|
|
)
|
|
user_documents.append(document)
|
|
logger.info(f"Created ChatDocument for file {fileId} -> {originalFileName}")
|
|
except Exception as e:
|
|
logger.error(f"Error processing file ID {fileId}: {e}", exc_info=True)
|
|
|
|
# Store user message
|
|
# Get message count safely (workflow.messages might be None or empty)
|
|
message_count = len(workflow.messages) if workflow.messages else 0
|
|
userMessageData = {
|
|
"id": f"msg_{uuid.uuid4()}",
|
|
"workflowId": workflow.id,
|
|
"message": userInput.prompt,
|
|
"role": "user",
|
|
"status": "first" if workflowId is None else "step",
|
|
"sequenceNr": message_count + 1,
|
|
"publishedAt": getUtcTimestamp(),
|
|
"roundNumber": workflow.currentRound,
|
|
"taskNumber": 0,
|
|
"actionNumber": 0
|
|
}
|
|
|
|
userMessage = interfaceDbChat.createMessage(userMessageData)
|
|
logger.info(f"Stored user message: {userMessage.id} with {len(user_documents)} document(s)")
|
|
|
|
# Emit message event for streaming (exact chatData format)
|
|
message_timestamp = parseTimestamp(userMessage.publishedAt, default=getUtcTimestamp())
|
|
await event_manager.emit_event(
|
|
context_id=workflow.id,
|
|
event_type="chatdata",
|
|
data={
|
|
"type": "message",
|
|
"createdAt": message_timestamp,
|
|
"item": userMessage.dict()
|
|
},
|
|
event_category="chat"
|
|
)
|
|
|
|
# Update workflow status
|
|
interfaceDbChat.updateWorkflow(workflow.id, {
|
|
"status": "running",
|
|
"lastActivity": getUtcTimestamp()
|
|
})
|
|
|
|
# Process in background (async)
|
|
asyncio.create_task(_processChatbotMessage(
|
|
services,
|
|
workflow.id,
|
|
userInput,
|
|
userMessage.id,
|
|
chatbot_config
|
|
))
|
|
|
|
# Reload workflow to include new message
|
|
workflow = interfaceDbChat.getWorkflow(workflow.id)
|
|
return workflow
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in chatProcess: {str(e)}", exc_info=True)
|
|
raise
|
|
|
|
|
|
async def _execute_queries_parallel(queries: List[Dict[str, Any]], chatbot_config: ChatbotConfig) -> Dict[str, Any]:
|
|
"""
|
|
Execute multiple SQL queries in parallel with shared connector.
|
|
|
|
Args:
|
|
queries: List of query dictionaries, each containing:
|
|
- "query": SQL query string
|
|
- "purpose": Description of what the query retrieves
|
|
- "table": Primary table name
|
|
chatbot_config: ChatbotConfig instance for connector selection
|
|
|
|
Returns:
|
|
Dictionary mapping query indices to results:
|
|
- "query_1", "query_2", etc.: Success result text
|
|
- "query_1_data", "query_2_data", etc.: Raw data arrays
|
|
- "query_1_error", "query_2_error", etc.: Error messages if query failed
|
|
"""
|
|
# Create connector instance based on configuration
|
|
connector = chatbot_config.get_connector_instance()
|
|
try:
|
|
async def execute_single_query(idx: int, query_info: Dict[str, Any]):
|
|
"""Execute a single query using shared connector."""
|
|
try:
|
|
query_text = query_info.get("query", "")
|
|
result = await connector.executeQuery(query_text, return_json=True)
|
|
return idx, result, None
|
|
except Exception as e:
|
|
return idx, None, str(e)
|
|
|
|
# Execute all queries in parallel with shared connector
|
|
tasks = [execute_single_query(i, q) for i, q in enumerate(queries)]
|
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
finally:
|
|
# Close connector once after all queries complete
|
|
await connector.close()
|
|
|
|
# Process results into dictionary
|
|
query_results = {}
|
|
for result in results:
|
|
if isinstance(result, Exception):
|
|
# Handle exceptions from gather
|
|
logger.error(f"Exception in parallel query execution: {result}")
|
|
continue
|
|
|
|
idx, result_data, error = result
|
|
|
|
if error:
|
|
query_results[f"query_{idx+1}_error"] = error
|
|
logger.error(f"Query {idx+1} failed: {error}")
|
|
else:
|
|
if result_data and not result_data.get("text", "").startswith(("Error:", "Query failed:")):
|
|
query_results[f"query_{idx+1}"] = result_data.get("text", "")
|
|
query_results[f"query_{idx+1}_data"] = result_data.get("data", [])
|
|
row_count = len(result_data.get('data', []))
|
|
logger.info(f"Query {idx+1} executed successfully, returned {row_count} rows")
|
|
else:
|
|
error_text = result_data.get("text", "Query failed") if result_data else "Query failed: No response"
|
|
query_results[f"query_{idx+1}_error"] = error_text
|
|
logger.error(f"Query {idx+1} failed: {error_text}")
|
|
|
|
return query_results
|
|
|
|
|
|
async def _emit_log_and_event(
|
|
interfaceDbChat,
|
|
workflowId: str,
|
|
event_manager,
|
|
message: str,
|
|
log_type: str = "info",
|
|
status: str = "running",
|
|
round_number: Optional[int] = None
|
|
) -> None:
|
|
"""
|
|
Store log in database and emit event for streaming.
|
|
|
|
Args:
|
|
interfaceDbChat: Database interface
|
|
workflowId: Workflow ID
|
|
event_manager: Event manager for streaming
|
|
message: Log message
|
|
log_type: Log type (info, warning, error)
|
|
status: Status string
|
|
round_number: Optional round number (will be fetched from workflow if not provided)
|
|
"""
|
|
try:
|
|
# Get round number from workflow if not provided
|
|
if round_number is None:
|
|
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
if workflow:
|
|
round_number = workflow.currentRound
|
|
|
|
log_timestamp = getUtcTimestamp()
|
|
log_data = {
|
|
"id": f"log_{uuid.uuid4()}",
|
|
"workflowId": workflowId,
|
|
"message": message,
|
|
"type": log_type,
|
|
"timestamp": log_timestamp,
|
|
"status": status,
|
|
"roundNumber": round_number
|
|
}
|
|
# Store log in database
|
|
created_log = interfaceDbChat.createLog(log_data)
|
|
|
|
# Emit event directly for streaming (using correct signature)
|
|
if created_log and event_manager:
|
|
try:
|
|
# Convert to dict if it's a Pydantic model
|
|
if hasattr(created_log, "model_dump"):
|
|
log_dict = created_log.model_dump()
|
|
elif hasattr(created_log, "dict"):
|
|
log_dict = created_log.dict()
|
|
else:
|
|
log_dict = log_data
|
|
|
|
await event_manager.emit_event(
|
|
context_id=workflowId,
|
|
event_type="chatdata",
|
|
data={
|
|
"type": "log",
|
|
"createdAt": log_timestamp,
|
|
"item": log_dict
|
|
},
|
|
event_category="chat",
|
|
message="New log",
|
|
step="log"
|
|
)
|
|
except Exception as emit_error:
|
|
logger.warning(f"Error emitting log event: {emit_error}")
|
|
except Exception as e:
|
|
logger.error(f"Error storing log: {e}", exc_info=True)
|
|
|
|
|
|
async def _check_workflow_stopped(interfaceDbChat, workflowId: str) -> bool:
|
|
"""
|
|
Check if workflow was stopped.
|
|
|
|
Args:
|
|
interfaceDbChat: Database interface
|
|
workflowId: Workflow ID
|
|
|
|
Returns:
|
|
True if workflow is stopped, False otherwise
|
|
"""
|
|
try:
|
|
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
return workflow and workflow.status == "stopped"
|
|
except Exception as e:
|
|
logger.warning(f"Error checking workflow status: {e}")
|
|
return False
|
|
|
|
|
|
def _build_final_answer_prompt_with_results(
|
|
system_prompt: str,
|
|
user_prompt: str,
|
|
context: str,
|
|
db_results_part: str,
|
|
web_results_part: str,
|
|
is_resumed: bool = False,
|
|
has_db_results: bool = False,
|
|
has_web_results: bool = False
|
|
) -> str:
|
|
"""
|
|
Build the complete prompt for generating the final answer with database and web results.
|
|
Uses the provided system_prompt from configuration instead of hardcoded prompts.
|
|
|
|
Args:
|
|
system_prompt: System prompt from chatbot configuration
|
|
user_prompt: User's original prompt
|
|
context: Conversation context
|
|
db_results_part: Formatted database results section
|
|
web_results_part: Formatted web research results section
|
|
is_resumed: If True, exclude system prompt (already in context from previous messages)
|
|
has_db_results: Whether database results are available
|
|
has_web_results: Whether web research results are available
|
|
|
|
Returns:
|
|
Complete formatted prompt string
|
|
"""
|
|
if is_resumed:
|
|
# System prompt already in context, don't repeat it
|
|
# Emphasize that the current question is primary
|
|
if context:
|
|
context_section = f"""
|
|
⚠️⚠️⚠️ KONTEXT (NUR FÜR REFERENZ - IGNORIEREN WENN NICHT BENÖTIGT) ⚠️⚠️⚠️
|
|
{context}
|
|
⚠️⚠️⚠️ ENDE KONTEXT ⚠️⚠️⚠️
|
|
|
|
"""
|
|
else:
|
|
context_section = ""
|
|
|
|
# Build instructions based on what data sources are available
|
|
if has_web_results and not has_db_results:
|
|
# Only web research - emphasize web research
|
|
instructions = f"""⚠️⚠️⚠️ WICHTIG - NUR INTERNET-RECHERCHE VERFÜGBAR ⚠️⚠️⚠️
|
|
- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss
|
|
- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf
|
|
- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext
|
|
|
|
{db_results_part}{web_results_part}
|
|
|
|
KRITISCH: Verwende NUR die oben angegebenen Daten aus der INTERNET-RECHERCHE. Erfinde KEINE Werte.
|
|
|
|
⚠️⚠️⚠️ WICHTIG - INTERNET-RECHERCHE VERWENDEN ⚠️⚠️⚠️
|
|
- ✓ OBLIGATORISCH: Verwende die Informationen aus der INTERNET-RECHERCHE oben
|
|
- ✓ OBLIGATORISCH: Beginne mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..."
|
|
- ✓ OBLIGATORISCH: Gib Quellen an: [Info] ([Quelle: Name](URL))
|
|
- ✓ OBLIGATORISCH: Präsentiere die Informationen ausführlich und strukturiert
|
|
- ❌ ABSOLUT VERBOTEN: Erwähne Datenbank-Ergebnisse, wenn keine vorhanden sind
|
|
- ❌ ABSOLUT VERBOTEN: Daten erfinden
|
|
|
|
WICHTIG:
|
|
- Beginne DIREKT mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..."
|
|
- Klare, strukturierte Antwort mit Quellenangaben
|
|
- Präsentiere die gefundenen Informationen ausführlich"""
|
|
elif has_db_results and not has_web_results:
|
|
# Only database - use existing database-focused instructions
|
|
instructions = f"""⚠️⚠️⚠️ WICHTIG ⚠️⚠️⚠️
|
|
- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss
|
|
- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf
|
|
- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext
|
|
|
|
{db_results_part}{web_results_part}
|
|
|
|
KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar".
|
|
|
|
⚠️⚠️⚠️ ABSOLUT KRITISCH - ALLE ARTIKEL ZURÜCKGEBEN ⚠️⚠️⚠️
|
|
- ✓ OBLIGATORISCH: Du MUSST ALLE Artikel zurückgeben, die die Kriterien erfüllen
|
|
- ✓ OBLIGATORISCH: Kombiniere Ergebnisse aus ALLEN erfolgreichen Abfragen
|
|
- ✓ OBLIGATORISCH: Zähle ALLE Artikel in den DATENBANK-ERGEBNISSEN oben
|
|
- ✓ OBLIGATORISCH: Zeige ALLE gefundenen Artikel in deiner Antwort (bis zu 20 in der Tabelle)
|
|
- ❌ ABSOLUT VERBOTEN: Nur einen Artikel zurückgeben, wenn mehrere gefunden wurden
|
|
- ❌ ABSOLUT VERBOTEN: Nur den ersten Artikel zeigen
|
|
- ❌ ABSOLUT VERBOTEN: Artikel auslassen, die in den DATENBANK-ERGEBNISSEN stehen
|
|
|
|
WICHTIG:
|
|
- Beginne DIREKT mit "Aus der Datenbank habe ich..." (keine Planungsschritte!)
|
|
- Klare, strukturierte Antwort
|
|
- Markdown-Tabellen (max 20 Zeilen)
|
|
- Artikelnummern als Link: [ARTIKELNUMMER](/details/ARTIKELNUMMER)"""
|
|
elif not has_db_results and not has_web_results:
|
|
# No results from either source - but database query was executed
|
|
instructions = f"""⚠️⚠️⚠️ KRITISCH - DATENBANKABFRAGE WURDE AUSGEFÜHRT ⚠️⚠️⚠️
|
|
- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss
|
|
- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf
|
|
- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext
|
|
|
|
{db_results_part}{web_results_part}
|
|
|
|
⚠️⚠️⚠️ ABSOLUT KRITISCH - DATENBANKABFRAGE WURDE AUSGEFÜHRT ⚠️⚠️⚠️
|
|
Die Datenbankabfrage wurde AUSGEFÜHRT, hat aber keine Ergebnisse zurückgegeben.
|
|
DU HAST ZUGRIFF AUF DIE DATENBANK - die Abfrage wurde durchgeführt!
|
|
|
|
VERBOTEN - NIEMALS SAGEN:
|
|
- "Ich habe keinen Zugriff auf die Datenbank"
|
|
- "Ich kann nicht auf die Datenbank zugreifen"
|
|
- "Es tut mir leid, aber ich habe keinen Zugriff"
|
|
- "Ich habe keinen Zugriff auf Echtzeit-Datenbanken"
|
|
- Jede andere Formulierung, die suggeriert, dass du keinen Zugriff hast!
|
|
|
|
RICHTIG - SAGE STATTDESSEN:
|
|
- "Es wurden keine Artikel gefunden"
|
|
- "Keine passenden Artikel in der Datenbank gefunden"
|
|
- "Die Datenbanksuche ergab keine Treffer"
|
|
- "Ich habe in der Datenbank gesucht, aber keine passenden Artikel gefunden"
|
|
|
|
WICHTIG: Die Datenbank wurde durchsucht - es wurden nur keine passenden Artikel gefunden!
|
|
Beginne deine Antwort mit: "Ich habe in der Datenbank gesucht, aber..." oder "Es wurden keine Artikel gefunden..." oder ähnlich."""
|
|
else:
|
|
# Both database and web research
|
|
instructions = f"""⚠️⚠️⚠️ WICHTIG ⚠️⚠️⚠️
|
|
- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss
|
|
- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf
|
|
- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext
|
|
|
|
{db_results_part}{web_results_part}
|
|
|
|
KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar".
|
|
|
|
⚠️⚠️⚠️ WICHTIG - BEIDE QUELLEN VERWENDEN ⚠️⚠️⚠️
|
|
- ✓ OBLIGATORISCH: Verwende sowohl DATENBANK-ERGEBNISSE als auch INTERNET-RECHERCHE
|
|
- ✓ OBLIGATORISCH: Beginne mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse
|
|
- ✓ OBLIGATORISCH: Verwende "Aus meiner Web-Recherche..." für Internet-Informationen
|
|
- ✓ OBLIGATORISCH: Gib Quellen für Web-Informationen an: [Info] ([Quelle: Name](URL))
|
|
- ✓ OBLIGATORISCH: Zeige ALLE Artikel aus den DATENBANK-ERGEBNISSEN (bis zu 20 in Tabelle)
|
|
|
|
WICHTIG:
|
|
- Beginne DIREKT mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse
|
|
- Dann "Aus meiner Web-Recherche..." für Internet-Informationen
|
|
- Klare, strukturierte Antwort mit Quellenangaben"""
|
|
|
|
return f"""⚠️⚠️⚠️ AKTUELLE FRAGE (PRIMÄR - DIESE MUSS BEANTWORTET WERDEN) ⚠️⚠️⚠️
|
|
Antworte auf die folgende Frage des Nutzers: {user_prompt}
|
|
{context_section}{instructions}"""
|
|
else:
|
|
# New chat: include system prompt
|
|
# Build instructions based on what data sources are available
|
|
if has_web_results and not has_db_results:
|
|
# Only web research - emphasize web research
|
|
return f"""{system_prompt}
|
|
|
|
Antworte auf die folgende Frage des Nutzers: {user_prompt}{context}
|
|
|
|
{db_results_part}{web_results_part}
|
|
|
|
KRITISCH: Verwende NUR die oben angegebenen Daten aus der INTERNET-RECHERCHE. Erfinde KEINE Werte.
|
|
|
|
⚠️⚠️⚠️ WICHTIG - INTERNET-RECHERCHE VERWENDEN ⚠️⚠️⚠️
|
|
- ✓ OBLIGATORISCH: Verwende die Informationen aus der INTERNET-RECHERCHE oben
|
|
- ✓ OBLIGATORISCH: Beginne mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..."
|
|
- ✓ OBLIGATORISCH: Gib Quellen an: [Info] ([Quelle: Name](URL))
|
|
- ✓ OBLIGATORISCH: Präsentiere die Informationen ausführlich und strukturiert
|
|
- ❌ ABSOLUT VERBOTEN: Erwähne Datenbank-Ergebnisse, wenn keine vorhanden sind
|
|
- ❌ ABSOLUT VERBOTEN: Daten erfinden
|
|
|
|
WICHTIG:
|
|
- Beginne DIREKT mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..."
|
|
- Klare, strukturierte Antwort mit Quellenangaben
|
|
- Präsentiere die gefundenen Informationen ausführlich"""
|
|
elif has_db_results and not has_web_results:
|
|
# Only database - use existing database-focused instructions
|
|
return f"""{system_prompt}
|
|
|
|
Antworte auf die folgende Frage des Nutzers: {user_prompt}{context}
|
|
|
|
{db_results_part}{web_results_part}
|
|
|
|
KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar".
|
|
|
|
⚠️⚠️⚠️ ABSOLUT KRITISCH - ALLE ARTIKEL ZURÜCKGEBEN ⚠️⚠️⚠️
|
|
- ✓ OBLIGATORISCH: Du MUSST ALLE Artikel zurückgeben, die die Kriterien erfüllen
|
|
- ✓ OBLIGATORISCH: Kombiniere Ergebnisse aus ALLEN erfolgreichen Abfragen
|
|
- ✓ OBLIGATORISCH: Zähle ALLE Artikel in den DATENBANK-ERGEBNISSEN oben
|
|
- ✓ OBLIGATORISCH: Zeige ALLE gefundenen Artikel in deiner Antwort (bis zu 20 in der Tabelle)
|
|
- ❌ ABSOLUT VERBOTEN: Nur einen Artikel zurückgeben, wenn mehrere gefunden wurden
|
|
- ❌ ABSOLUT VERBOTEN: Nur den ersten Artikel zeigen
|
|
- ❌ ABSOLUT VERBOTEN: Artikel auslassen, die in den DATENBANK-ERGEBNISSEN stehen
|
|
|
|
WICHTIG:
|
|
- Beginne DIREKT mit "Aus der Datenbank habe ich..." (keine Planungsschritte!)
|
|
- Klare, strukturierte Antwort
|
|
- Markdown-Tabellen (max 20 Zeilen)
|
|
- Artikelnummern als Link: [ARTIKELNUMMER](/details/ARTIKELNUMMER)"""
|
|
elif not has_db_results and not has_web_results:
|
|
# No results from either source - but database query was executed
|
|
return f"""{system_prompt}
|
|
|
|
Antworte auf die folgende Frage des Nutzers: {user_prompt}{context}
|
|
|
|
{db_results_part}{web_results_part}
|
|
|
|
⚠️⚠️⚠️ KRITISCH - DATENBANKABFRAGE WURDE AUSGEFÜHRT ⚠️⚠️⚠️
|
|
Die Datenbankabfrage wurde AUSGEFÜHRT, hat aber keine Ergebnisse zurückgegeben.
|
|
DU HAST ZUGRIFF AUF DIE DATENBANK - die Abfrage wurde durchgeführt!
|
|
|
|
VERBOTEN - NIEMALS SAGEN:
|
|
- "Ich habe keinen Zugriff auf die Datenbank"
|
|
- "Ich kann nicht auf die Datenbank zugreifen"
|
|
- "Es tut mir leid, aber ich habe keinen Zugriff"
|
|
- "Ich habe keinen Zugriff auf Echtzeit-Datenbanken"
|
|
- Jede andere Formulierung, die suggeriert, dass du keinen Zugriff hast!
|
|
|
|
RICHTIG - SAGE STATTDESSEN:
|
|
- "Es wurden keine Artikel gefunden"
|
|
- "Keine passenden Artikel in der Datenbank gefunden"
|
|
- "Die Datenbanksuche ergab keine Treffer"
|
|
- "Ich habe in der Datenbank gesucht, aber keine passenden Artikel gefunden"
|
|
|
|
WICHTIG: Die Datenbank wurde durchsucht - es wurden nur keine passenden Artikel gefunden!
|
|
Beginne deine Antwort mit: "Ich habe in der Datenbank gesucht, aber..." oder "Es wurden keine Artikel gefunden..." oder ähnlich."""
|
|
else:
|
|
# Both database and web research
|
|
return f"""{system_prompt}
|
|
|
|
Antworte auf die folgende Frage des Nutzers: {user_prompt}{context}
|
|
|
|
{db_results_part}{web_results_part}
|
|
|
|
KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar".
|
|
|
|
⚠️⚠️⚠️ WICHTIG - BEIDE QUELLEN VERWENDEN ⚠️⚠️⚠️
|
|
- ✓ OBLIGATORISCH: Verwende sowohl DATENBANK-ERGEBNISSE als auch INTERNET-RECHERCHE
|
|
- ✓ OBLIGATORISCH: Beginne mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse
|
|
- ✓ OBLIGATORISCH: Verwende "Aus meiner Web-Recherche..." für Internet-Informationen
|
|
- ✓ OBLIGATORISCH: Gib Quellen für Web-Informationen an: [Info] ([Quelle: Name](URL))
|
|
- ✓ OBLIGATORISCH: Zeige ALLE Artikel aus den DATENBANK-ERGEBNISSEN (bis zu 20 in Tabelle)
|
|
|
|
WICHTIG:
|
|
- Beginne DIREKT mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse
|
|
- Dann "Aus meiner Web-Recherche..." für Internet-Informationen
|
|
- Klare, strukturierte Antwort mit Quellenangaben"""
|
|
|
|
|
|
def _buildWebResearchQuery(userPrompt: str, workflowMessages: List, queryResults: Optional[Dict[str, Any]] = None) -> str:
|
|
"""
|
|
Build enriched web research query by extracting product context from conversation history and current prompt.
|
|
|
|
Extracts product information from:
|
|
1. Current user prompt (article numbers, product mentions)
|
|
2. Database query results (if available)
|
|
3. Previous assistant messages (conversation history)
|
|
|
|
Args:
|
|
userPrompt: Current user prompt
|
|
workflowMessages: List of workflow messages (conversation history)
|
|
queryResults: Optional database query results to extract product info from
|
|
|
|
Returns:
|
|
Enriched search query string
|
|
"""
|
|
# Normalize user prompt for detection
|
|
prompt_lower = userPrompt.lower().strip()
|
|
|
|
# Patterns that indicate a search request
|
|
search_patterns = [
|
|
"ja", "yes", "oui", "si",
|
|
"such", "suche", "search", "recherche", "recherchier",
|
|
"internet", "web", "online",
|
|
"datenblatt", "datasheet", "fiche technique",
|
|
"mehr informationen", "more information", "plus d'information",
|
|
"weitere informationen", "further information", "additional information"
|
|
]
|
|
|
|
# Certification patterns that require web research
|
|
certification_patterns = [
|
|
"ul", "ce", "tüv", "vde", "iec", "en", "iso",
|
|
"zertifiziert", "certified", "certification", "zertifizierung",
|
|
"geprüft", "approved", "compliance"
|
|
]
|
|
|
|
# Check if current prompt contains search-related keywords
|
|
has_search_intent = any(pattern in prompt_lower for pattern in search_patterns)
|
|
|
|
# Check if prompt contains certification-related keywords
|
|
has_certification_intent = any(pattern in prompt_lower for pattern in certification_patterns)
|
|
|
|
# Extract product information - try multiple sources
|
|
article_number = None
|
|
article_description = None
|
|
supplier = None
|
|
|
|
# Pattern for article numbers like "6AV2 181-8XP00-0AX0" or "6AV2181-8XP00-0AX0"
|
|
article_patterns = [
|
|
r'\b[A-Z0-9]{2,}\s+[0-9]{3,}-[A-Z0-9-]+\b', # With space: "6AV2 181-8XP00-0AX0"
|
|
r'\b[A-Z0-9]{4,}[\s-][A-Z0-9-]{6,}\b', # General pattern
|
|
r'\b[A-Z]{2,}[0-9]+\s+[0-9]+-[A-Z0-9-]+\b', # Specific Siemens pattern
|
|
]
|
|
|
|
# 1. First, try to extract from current user prompt
|
|
for pattern in article_patterns:
|
|
matches = re.findall(pattern, userPrompt)
|
|
if matches:
|
|
article_number = matches[0]
|
|
logger.info(f"Extracted article number from user prompt: {article_number}")
|
|
break
|
|
|
|
# 2. Try to extract from database query results if available
|
|
# Always check queryResults to enrich with product description and supplier, even if article_number was already found
|
|
if queryResults:
|
|
# Look for article numbers in query result text (if not already found)
|
|
if not article_number:
|
|
for key in queryResults.keys():
|
|
if key.startswith("query_") and not key.endswith("_error") and not key.endswith("_data"):
|
|
result_text = queryResults.get(key, "")
|
|
if isinstance(result_text, str):
|
|
for pattern in article_patterns:
|
|
matches = re.findall(pattern, result_text)
|
|
if matches:
|
|
article_number = matches[0]
|
|
logger.info(f"Extracted article number from query results: {article_number}")
|
|
break
|
|
if article_number:
|
|
break
|
|
|
|
# Always check data arrays for product description and supplier (even if article_number already found)
|
|
for key in queryResults.keys():
|
|
if key.startswith("query_") and not key.endswith("_error") and not key.endswith("_data"):
|
|
data_key = f"{key}_data"
|
|
if data_key in queryResults:
|
|
data_array = queryResults[data_key]
|
|
if isinstance(data_array, list) and len(data_array) > 0:
|
|
# Look for article number in first row (if not already found)
|
|
first_row = data_array[0]
|
|
if isinstance(first_row, dict):
|
|
# Check common article number fields (if not already found)
|
|
if not article_number:
|
|
for field in ["Artikelnummer", "Artikelkürzel", "article_number", "articleNumber"]:
|
|
if field in first_row and first_row[field]:
|
|
article_number = str(first_row[field])
|
|
logger.info(f"Extracted article number from query data: {article_number}")
|
|
break
|
|
|
|
# Always check article description (can enrich even if article_number already found)
|
|
if not article_description:
|
|
for field in ["Artikelbezeichnung", "Bezeichnung", "article_description", "description"]:
|
|
if field in first_row and first_row[field]:
|
|
article_description = str(first_row[field])
|
|
logger.info(f"Extracted article description from query data: {article_description}")
|
|
break
|
|
|
|
# Always check supplier (can enrich even if article_number already found)
|
|
if not supplier:
|
|
for field in ["Lieferant", "Supplier", "supplier"]:
|
|
if field in first_row and first_row[field]:
|
|
supplier = str(first_row[field])
|
|
logger.info(f"Extracted supplier from query data: {supplier}")
|
|
break
|
|
|
|
# If we found all needed info, we can stop
|
|
if article_number and article_description and supplier:
|
|
break
|
|
|
|
# Check if current prompt is an explicit search request that should NOT use context
|
|
# If user explicitly asks to search for something, prioritize that over previous messages
|
|
explicit_search_patterns = [
|
|
r"recherchier\s+(?:im\s+internet\s+)?nach\s+(.+)",
|
|
r"suche\s+(?:im\s+internet\s+)?nach\s+(.+)",
|
|
r"search\s+(?:the\s+internet\s+)?for\s+(.+)",
|
|
r"find\s+(?:information\s+)?(?:about\s+)?(.+)",
|
|
r"recherche\s+(?:sur\s+internet\s+)?(.+)"
|
|
]
|
|
|
|
explicit_search_term = None
|
|
for pattern in explicit_search_patterns:
|
|
match = re.search(pattern, userPrompt, re.IGNORECASE)
|
|
if match:
|
|
explicit_search_term = match.group(1).strip()
|
|
logger.info(f"Found explicit search term in prompt: '{explicit_search_term}'")
|
|
break
|
|
|
|
# 3. Extract from previous assistant messages (conversation history)
|
|
# ONLY if there's no explicit search term (to avoid using old context for new searches)
|
|
if not explicit_search_term and (not article_number or not article_description):
|
|
for msg in reversed(workflowMessages[-10:]):
|
|
if msg.role == "assistant":
|
|
message_text = msg.message
|
|
|
|
# Extract article number if not found yet
|
|
if not article_number:
|
|
for pattern in article_patterns:
|
|
matches = re.findall(pattern, message_text)
|
|
if matches:
|
|
article_number = matches[0]
|
|
break
|
|
|
|
# Extract article description if not found yet
|
|
if not article_description:
|
|
description_patterns = [
|
|
r'Es handelt sich um\s+([^\.]+)',
|
|
r'It is a\s+([^\.]+)',
|
|
r'C\'est\s+([^\.]+)',
|
|
r'Bezeichnung:\s*([^\n]+)',
|
|
r'Description:\s*([^\n]+)',
|
|
r'Artikelbezeichnung:\s*([^\n]+)',
|
|
r'Artikelbezeichnung:\s*([^\n]+)'
|
|
]
|
|
for pattern in description_patterns:
|
|
match = re.search(pattern, message_text, re.IGNORECASE)
|
|
if match:
|
|
article_description = match.group(1).strip()
|
|
break
|
|
|
|
# Extract supplier if not found yet
|
|
if not supplier:
|
|
supplier_patterns = [
|
|
r'von\s+([A-Z][A-Za-z\s]+(?:AG|GmbH|Ltd|Inc|Corp)?)',
|
|
r'from\s+([A-Z][A-Za-z\s]+(?:AG|GmbH|Ltd|Inc|Corp)?)',
|
|
r'Lieferant:\s*([^\n]+)',
|
|
r'Supplier:\s*([^\n]+)'
|
|
]
|
|
for pattern in supplier_patterns:
|
|
match = re.search(pattern, message_text, re.IGNORECASE)
|
|
if match:
|
|
supplier = match.group(1).strip()
|
|
break
|
|
|
|
# Stop if we found everything
|
|
if article_number and article_description and supplier:
|
|
break
|
|
|
|
# Build enriched search query
|
|
query_parts = []
|
|
|
|
# If we have an explicit search term, use it as the primary query
|
|
if explicit_search_term:
|
|
query_parts.append(explicit_search_term)
|
|
logger.info(f"Using explicit search term as primary query: '{explicit_search_term}'")
|
|
# If we have search intent but no product info, try to use the user prompt intelligently
|
|
elif has_search_intent and not article_number and not article_description:
|
|
# Try to extract meaningful parts from the prompt
|
|
# Remove common search phrases and keep the product-related parts
|
|
cleaned_prompt = userPrompt
|
|
for phrase in ["recherchier", "recherche", "suche nach", "search for", "find", "informationen zu", "information about", "weitere informationen", "further information", "im internet", "the internet", "sur internet"]:
|
|
cleaned_prompt = re.sub(phrase, "", cleaned_prompt, flags=re.IGNORECASE)
|
|
cleaned_prompt = cleaned_prompt.strip()
|
|
|
|
# Use cleaned prompt if it has meaningful content
|
|
if cleaned_prompt and len(cleaned_prompt) > 2:
|
|
query_parts.append(cleaned_prompt)
|
|
|
|
# Add article description if found (but NOT if we have an explicit search term)
|
|
if article_description and not explicit_search_term:
|
|
query_parts.append(article_description)
|
|
|
|
# Add article number if found (but NOT if we have an explicit search term)
|
|
if article_number and not explicit_search_term:
|
|
query_parts.append(article_number)
|
|
|
|
# Add supplier if found (but NOT if we have an explicit search term)
|
|
if supplier and not explicit_search_term:
|
|
query_parts.append(supplier)
|
|
|
|
# Extract certification information from prompt if present
|
|
certification_terms = []
|
|
if has_certification_intent:
|
|
# Extract specific certification mentions
|
|
cert_keywords = {
|
|
"ul": "UL certification",
|
|
"ce": "CE certification",
|
|
"tüv": "TÜV certification",
|
|
"vde": "VDE certification",
|
|
"iec": "IEC certification",
|
|
"iso": "ISO certification"
|
|
}
|
|
for cert_key, cert_term in cert_keywords.items():
|
|
if cert_key in prompt_lower:
|
|
certification_terms.append(cert_term)
|
|
|
|
# If no specific certification found but certification intent detected, add generic term
|
|
if not certification_terms:
|
|
certification_terms.append("certification")
|
|
|
|
# Add certification terms to query if found
|
|
if certification_terms:
|
|
query_parts.extend(certification_terms)
|
|
|
|
# Add "Datenblatt" or "datasheet" if user requested it or if we have product info
|
|
# But NOT if we have an explicit search term (user wants to search for something specific)
|
|
if not explicit_search_term:
|
|
if "datenblatt" in prompt_lower or "datasheet" in prompt_lower or "fiche technique" in prompt_lower:
|
|
query_parts.append("Datenblatt")
|
|
elif query_parts and (article_number or article_description):
|
|
# If we have product info but no explicit request for datasheet, add it anyway
|
|
query_parts.append("Datenblatt")
|
|
|
|
# If we found product information or built a meaningful query, use it
|
|
if query_parts:
|
|
enriched_query = " ".join(query_parts)
|
|
logger.info(f"Built enriched search query: '{enriched_query}' from context (original: '{userPrompt}')")
|
|
return enriched_query
|
|
else:
|
|
# Fall back to original prompt, but try to clean it up
|
|
logger.info(f"No product context found, using original prompt: '{userPrompt}'")
|
|
return userPrompt
|
|
|
|
|
|
async def _convert_file_ids_to_document_references(
|
|
services,
|
|
file_ids: List[str]
|
|
) -> DocumentReferenceList:
|
|
"""
|
|
Convert file IDs to DocumentReferenceList for use with ai.process.
|
|
|
|
Args:
|
|
services: Services instance
|
|
file_ids: List of file IDs to convert
|
|
|
|
Returns:
|
|
DocumentReferenceList with docItem references
|
|
"""
|
|
references = []
|
|
|
|
# Get workflow to search for ChatDocuments
|
|
workflow = services.workflow
|
|
if not workflow:
|
|
logger.error("Cannot convert file IDs to document references: workflow not set in services")
|
|
return DocumentReferenceList(references=[])
|
|
|
|
for file_id in file_ids:
|
|
try:
|
|
# Get file info to verify it exists
|
|
file_info = services.chat.getFileInfo(file_id)
|
|
if not file_info:
|
|
logger.warning(f"File {file_id} not found, skipping")
|
|
continue
|
|
|
|
# Find ChatDocument that has this fileId
|
|
document_id = None
|
|
if workflow.messages:
|
|
for message in workflow.messages:
|
|
if hasattr(message, 'documents') and message.documents:
|
|
for doc in message.documents:
|
|
if getattr(doc, 'fileId', None) == file_id:
|
|
document_id = getattr(doc, 'id', None)
|
|
break
|
|
if document_id:
|
|
break
|
|
|
|
# Search database if not found in messages
|
|
if not document_id:
|
|
try:
|
|
from modules.interfaces.interfaceRbac import getRecordsetWithRBAC
|
|
documents = getRecordsetWithRBAC(
|
|
services.interfaceDbChat.db,
|
|
ChatDocument,
|
|
services.user,
|
|
recordFilter={"fileId": file_id},
|
|
mandateId=services.mandateId
|
|
)
|
|
if documents:
|
|
workflow_message_ids = {msg.id for msg in workflow.messages} if workflow.messages else set()
|
|
for doc in documents:
|
|
if doc.get("messageId") in workflow_message_ids:
|
|
document_id = doc.get("id")
|
|
break
|
|
except Exception:
|
|
pass # Fallback to fileId
|
|
|
|
# Use ChatDocument ID if found, otherwise use fileId as fallback
|
|
ref = DocumentItemReference(documentId=document_id if document_id else file_id)
|
|
references.append(ref)
|
|
except Exception as e:
|
|
logger.error(f"Error converting fileId {file_id}: {e}", exc_info=True)
|
|
|
|
logger.info(f"Converted {len(references)} file IDs to document references")
|
|
return DocumentReferenceList(references=references)
|
|
|
|
|
|
def _format_query_results_as_lookup(query_data: Dict[str, List[Dict]]) -> str:
|
|
"""
|
|
Format database query results as JSON lookup table for Excel matching.
|
|
Converts query result data into structured JSON format: {Artikelnummer: {columns...}}
|
|
|
|
Args:
|
|
query_data: Dict with query_key -> list of row dicts (from connector with return_json=True)
|
|
|
|
Returns:
|
|
JSON string formatted as lookup table
|
|
"""
|
|
lookup_table = {}
|
|
|
|
for query_key, rows in query_data.items():
|
|
if query_key == "error" or not rows:
|
|
logger.warning(f"Skipping query key '{query_key}' - no rows or error")
|
|
continue
|
|
|
|
logger.info(f"Processing {len(rows)} rows from query '{query_key}'")
|
|
|
|
for row in rows:
|
|
if not isinstance(row, dict):
|
|
logger.warning(f"Skipping non-dict row: {type(row)}")
|
|
continue
|
|
|
|
# Find Artikelnummer field (case-insensitive)
|
|
artikelnummer = None
|
|
for key in row.keys():
|
|
if key.lower() in ['artikelnummer', 'artikel_nummer', 'art_nr', 'part_number']:
|
|
artikelnummer = str(row[key])
|
|
break
|
|
|
|
if artikelnummer:
|
|
lookup_table[artikelnummer] = row
|
|
else:
|
|
logger.warning(f"No Artikelnummer found in row with keys: {list(row.keys())}")
|
|
|
|
logger.info(f"Generated lookup table with {len(lookup_table)} entries")
|
|
if lookup_table:
|
|
sample_keys = list(lookup_table.keys())[:3]
|
|
logger.info(f"Sample Artikelnummern: {sample_keys}")
|
|
if sample_keys:
|
|
sample_entry = lookup_table[sample_keys[0]]
|
|
logger.info(f"Sample entry keys: {list(sample_entry.keys())}")
|
|
|
|
return json.dumps(lookup_table, ensure_ascii=False, indent=2)
|
|
|
|
|
|
async def _create_chat_document_from_action_document(
|
|
services,
|
|
action_document,
|
|
message_id: str,
|
|
workflow_id: str,
|
|
round_number: int
|
|
) -> ChatDocument:
|
|
"""
|
|
Create a ChatDocument from an ActionDocument by storing the file data.
|
|
|
|
Args:
|
|
services: Services instance
|
|
action_document: ActionDocument from ai.process result
|
|
message_id: ID of the message to attach to
|
|
workflow_id: Workflow ID
|
|
round_number: Round number
|
|
|
|
Returns:
|
|
ChatDocument instance
|
|
"""
|
|
try:
|
|
# Get file data (could be bytes or string)
|
|
document_data = action_document.documentData
|
|
|
|
# Convert to bytes if needed
|
|
if isinstance(document_data, str):
|
|
# Check if it's base64 encoded
|
|
try:
|
|
# Try to decode as base64 first
|
|
file_bytes = base64.b64decode(document_data)
|
|
except Exception:
|
|
# Not base64, encode as UTF-8
|
|
file_bytes = document_data.encode('utf-8')
|
|
elif isinstance(document_data, bytes):
|
|
file_bytes = document_data
|
|
else:
|
|
# Try to convert to bytes
|
|
try:
|
|
file_bytes = bytes(document_data)
|
|
except Exception:
|
|
# Last resort: convert to string then encode
|
|
file_bytes = str(document_data).encode('utf-8')
|
|
|
|
# Get MIME type (default to Excel)
|
|
mime_type = action_document.mimeType or "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
|
|
# Get file name
|
|
file_name = action_document.documentName or "data_export.xlsx"
|
|
# Ensure it has .xlsx extension
|
|
if not file_name.lower().endswith('.xlsx'):
|
|
# Remove any existing extension and add .xlsx
|
|
file_name = file_name.rsplit('.', 1)[0] + '.xlsx'
|
|
|
|
# Store file using component interface
|
|
file_item = services.interfaceDbComponent.createFile(
|
|
name=file_name,
|
|
mimeType=mime_type,
|
|
content=file_bytes
|
|
)
|
|
|
|
# Store file data
|
|
success = services.interfaceDbComponent.createFileData(file_item.id, file_bytes)
|
|
if not success:
|
|
logger.warning(f"Failed to store file data for {file_item.id}, but continuing...")
|
|
|
|
# Create ChatDocument
|
|
chat_document = ChatDocument(
|
|
id=str(uuid.uuid4()),
|
|
messageId=message_id,
|
|
fileId=file_item.id,
|
|
fileName=file_name,
|
|
fileSize=len(file_bytes),
|
|
mimeType=mime_type,
|
|
roundNumber=round_number,
|
|
taskNumber=0,
|
|
actionNumber=0
|
|
)
|
|
|
|
logger.info(f"Created ChatDocument {chat_document.id} from ActionDocument {file_name} (size: {len(file_bytes)} bytes)")
|
|
return chat_document
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error creating ChatDocument from ActionDocument: {e}", exc_info=True)
|
|
raise
|
|
|
|
|
|
async def _processChatbotMessage(
|
|
services,
|
|
workflowId: str,
|
|
userInput: UserInputRequest,
|
|
userMessageId: str,
|
|
chatbot_config: ChatbotConfig
|
|
):
|
|
"""
|
|
Process chatbot message using LangGraph workflow.
|
|
Uses LangGraph to handle the conversation flow with tools (SQL, Tavily, streaming).
|
|
"""
|
|
event_manager = get_event_manager()
|
|
|
|
try:
|
|
interfaceDbChat = services.interfaceDbChat
|
|
|
|
# Reload workflow to get current messages
|
|
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
if not workflow:
|
|
logger.error(f"Workflow {workflowId} not found during processing")
|
|
await event_manager.emit_event(
|
|
context_id=workflowId,
|
|
event_type="error",
|
|
data={"error": f"Workflow {workflowId} nicht gefunden"},
|
|
event_category="workflow",
|
|
message=f"Workflow {workflowId} nicht gefunden",
|
|
step="error"
|
|
)
|
|
return
|
|
|
|
# Check if workflow was stopped before starting
|
|
if await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
logger.info(f"Workflow {workflowId} was stopped, aborting processing")
|
|
return
|
|
|
|
await services.ai.ensureAiObjectsInitialized()
|
|
|
|
# Get connector instance
|
|
connector = chatbot_config.get_connector_instance()
|
|
|
|
# Get system prompt
|
|
system_prompt = chatbot_config.custom_system_prompt
|
|
if not system_prompt:
|
|
raise ValueError(f"System prompt not configured for chatbot instance")
|
|
|
|
# Create LangGraph chatbot instance
|
|
logger.info(f"Creating LangGraph chatbot for workflow {workflowId}")
|
|
chatbot = await LangGraphChatbot.create(
|
|
services=services,
|
|
system_prompt=system_prompt,
|
|
connector_instance=connector,
|
|
enable_web_research=chatbot_config.enable_web_research,
|
|
context_window_size=8000
|
|
)
|
|
|
|
# Process message using LangGraph streaming
|
|
logger.info(f"Processing message with LangGraph for workflow {workflowId}")
|
|
final_answer = None
|
|
chat_history = []
|
|
|
|
async for event in chatbot.stream_events(message=userInput.prompt, chat_id=workflowId):
|
|
# Check if workflow was stopped
|
|
if await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
logger.info(f"Workflow {workflowId} was stopped during processing")
|
|
return
|
|
|
|
event_type = event.get("type")
|
|
|
|
if event_type == "status":
|
|
# Emit status update
|
|
label = event.get("label", "")
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, label, log_type="info")
|
|
|
|
elif event_type == "final":
|
|
# Final response received
|
|
response_data = event.get("response", {})
|
|
chat_history = response_data.get("chat_history", [])
|
|
# Extract final answer from chat history (last assistant message)
|
|
for msg in reversed(chat_history):
|
|
if msg.get("role") == "assistant":
|
|
final_answer = msg.get("content", "")
|
|
break
|
|
|
|
elif event_type == "error":
|
|
# Error occurred
|
|
error_msg = event.get("message", "Unknown error")
|
|
logger.error(f"LangGraph error: {error_msg}")
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: {error_msg}", log_type="error")
|
|
final_answer = f"Entschuldigung, ein Fehler ist aufgetreten: {error_msg}"
|
|
|
|
# Close connector
|
|
try:
|
|
await connector.close()
|
|
except Exception as e:
|
|
logger.warning(f"Error closing connector: {e}")
|
|
|
|
# Check if workflow was stopped before storing answer
|
|
if await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
logger.info(f"Workflow {workflowId} was stopped, not storing final message")
|
|
return
|
|
|
|
# Store final answer if we have one
|
|
if final_answer:
|
|
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
message_id = f"msg_{uuid.uuid4()}"
|
|
assistantMessageData = {
|
|
"id": message_id,
|
|
"workflowId": workflowId,
|
|
"parentMessageId": userMessageId,
|
|
"message": final_answer,
|
|
"role": "assistant",
|
|
"status": "last",
|
|
"sequenceNr": len(workflow.messages) + 1,
|
|
"publishedAt": getUtcTimestamp(),
|
|
"success": True,
|
|
"roundNumber": workflow.currentRound,
|
|
"taskNumber": 0,
|
|
"actionNumber": 0
|
|
}
|
|
|
|
assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
|
|
logger.info(f"Stored assistant message: {assistantMessage.id}")
|
|
|
|
# Emit message event for streaming
|
|
message_timestamp = parseTimestamp(assistantMessage.publishedAt, default=getUtcTimestamp())
|
|
await event_manager.emit_event(
|
|
context_id=workflowId,
|
|
event_type="chatdata",
|
|
data={
|
|
"type": "message",
|
|
"createdAt": message_timestamp,
|
|
"item": assistantMessage.dict()
|
|
},
|
|
event_category="chat"
|
|
)
|
|
|
|
# Update workflow status to completed
|
|
if not await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
interfaceDbChat.updateWorkflow(workflowId, {
|
|
"status": "completed",
|
|
"lastActivity": getUtcTimestamp()
|
|
})
|
|
|
|
await event_manager.emit_event(
|
|
context_id=workflowId,
|
|
event_type="complete",
|
|
data={"workflowId": workflowId},
|
|
event_category="workflow",
|
|
message="Chatbot-Verarbeitung abgeschlossen",
|
|
step="complete"
|
|
)
|
|
|
|
# Schedule cleanup
|
|
await event_manager.cleanup(workflowId, delay=300.0)
|
|
|
|
logger.info(f"LangGraph processing completed for workflow {workflowId}")
|
|
|
|
except Exception as e:
|
|
logger.info("Analyzing user input to generate queries...")
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Analysiere Benutzeranfrage...")
|
|
|
|
# Use custom prompt from configuration (already validated at start of chatProcess)
|
|
analysisPrompt = chatbot_config.custom_analysis_prompt.replace("{userPrompt}", userInput.prompt).replace("{context}", context or "")
|
|
|
|
# CRITICAL: Add explicit JSON format requirement to ensure AI returns JSON
|
|
json_format_instruction = """
|
|
|
|
⚠️⚠️⚠️ ABSOLUT KRITISCH - JSON-FORMAT ERFORDERLICH ⚠️⚠️⚠️
|
|
DU MUSST DEINE ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT GEBEN!
|
|
ANTWORTE NICHT MIT NORMALEM TEXT ODER EINER CHAT-ANTWORT!
|
|
DEINE ANTWORT MUSS EIN GÜLTIGES JSON-OBJEKT SEIN!
|
|
|
|
Erforderliches JSON-Format:
|
|
{
|
|
"needsDatabaseQuery": true/false,
|
|
"needsWebResearch": true/false,
|
|
"sqlQueries": [
|
|
{
|
|
"query": "SQL-Abfrage hier",
|
|
"purpose": "Zweck der Abfrage",
|
|
"table": "Haupttabelle"
|
|
}
|
|
],
|
|
"reasoning": "Begründung für die Abfragen"
|
|
}
|
|
|
|
⚠️⚠️⚠️ KRITISCH - WANN DATENBANKABFRAGE ERFORDERLICH ⚠️⚠️⚠️
|
|
SETZE "needsDatabaseQuery": true, WENN:
|
|
- Der Nutzer nach Artikeln, Produkten, Preisen, Lagerbeständen, Lieferanten fragt
|
|
- Der Nutzer nach Informationen aus der Datenbank fragt (auch allgemeine Fragen!)
|
|
- Der Nutzer eine Frage stellt, die mit Datenbank-Daten beantwortet werden kann
|
|
- Du dir nicht sicher bist - dann setze "needsDatabaseQuery": true und führe eine allgemeine Abfrage durch!
|
|
|
|
VERBOTEN:
|
|
- "needsDatabaseQuery": false setzen, nur weil die Frage allgemein klingt
|
|
- "needsDatabaseQuery": false setzen, ohne zu prüfen, ob Datenbank-Daten helfen könnten
|
|
- Chat-Antworten geben statt Datenbankabfragen durchzuführen
|
|
|
|
WICHTIG:
|
|
- Antworte NUR mit dem JSON-Objekt, KEIN zusätzlicher Text davor oder danach!
|
|
- KEINE Erklärungen, KEINE Begrüßungen, KEINE Chat-Antworten!
|
|
- NUR das JSON-Objekt!
|
|
- Bei Unsicherheit: IMMER "needsDatabaseQuery": true setzen!
|
|
"""
|
|
analysisPrompt = analysisPrompt + json_format_instruction
|
|
logger.info("Using custom analysis prompt from instance config with JSON format requirement")
|
|
|
|
# AI call for analysis
|
|
method_ai = MethodAi(services)
|
|
analysis_result = await method_ai.process({
|
|
"aiPrompt": analysisPrompt,
|
|
"documentList": None,
|
|
"resultType": "json",
|
|
"simpleMode": True
|
|
})
|
|
|
|
# Check if workflow was stopped during analysis
|
|
if await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
logger.info(f"Workflow {workflowId} was stopped during analysis, aborting processing")
|
|
return
|
|
|
|
# Retry logic for failed analysis (max 3 attempts)
|
|
max_analysis_retries = 3
|
|
analysis_retry_count = 0
|
|
analysis = None
|
|
analysis_content = None
|
|
|
|
while analysis_retry_count < max_analysis_retries:
|
|
# Extract content from ActionResult
|
|
analysis_content = None
|
|
if analysis_result.success and analysis_result.documents:
|
|
analysis_content = analysis_result.documents[0].documentData
|
|
if isinstance(analysis_content, bytes):
|
|
analysis_content = analysis_content.decode('utf-8')
|
|
|
|
# Validate analysis was successful
|
|
if not analysis_content:
|
|
analysis_retry_count += 1
|
|
if analysis_retry_count < max_analysis_retries:
|
|
logger.warning(f"Analysis failed (attempt {analysis_retry_count}/{max_analysis_retries}): No content returned from AI, retrying...")
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Analyse fehlgeschlagen, Versuch {analysis_retry_count}/{max_analysis_retries}...", log_type="warning")
|
|
# Retry analysis
|
|
analysis_result = await method_ai.process({
|
|
"aiPrompt": analysisPrompt,
|
|
"documentList": None,
|
|
"resultType": "json",
|
|
"simpleMode": True
|
|
})
|
|
continue
|
|
else:
|
|
error_msg = "Die Analyse Ihrer Anfrage ist nach mehreren Versuchen fehlgeschlagen. Bitte versuchen Sie es später erneut oder formulieren Sie Ihre Frage anders."
|
|
logger.error(f"Analysis failed after {max_analysis_retries} attempts: No content returned from AI")
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: Analyse nach {max_analysis_retries} Versuchen fehlgeschlagen", log_type="error")
|
|
# Store error message as assistant response
|
|
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
message_id = f"msg_{uuid.uuid4()}"
|
|
assistantMessageData = {
|
|
"id": message_id,
|
|
"workflowId": workflowId,
|
|
"parentMessageId": userMessageId,
|
|
"message": error_msg,
|
|
"role": "assistant",
|
|
"status": "last",
|
|
"sequenceNr": len(workflow.messages) + 1,
|
|
"publishedAt": getUtcTimestamp(),
|
|
"success": False,
|
|
"roundNumber": workflow.currentRound,
|
|
"taskNumber": 0,
|
|
"actionNumber": 0
|
|
}
|
|
assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
|
|
logger.info(f"Stored error message due to failed analysis after {max_analysis_retries} attempts: {assistantMessage.id}")
|
|
return
|
|
|
|
analysis = _extractJsonFromResponse(analysis_content)
|
|
if analysis is None:
|
|
analysis_retry_count += 1
|
|
if analysis_retry_count < max_analysis_retries:
|
|
logger.warning(f"Failed to extract JSON from analysis response (attempt {analysis_retry_count}/{max_analysis_retries}), retrying...")
|
|
logger.debug(f"Analysis content: {analysis_content[:500] if analysis_content else 'None'}")
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"JSON-Extraktion fehlgeschlagen, Versuch {analysis_retry_count}/{max_analysis_retries}...", log_type="warning")
|
|
# Retry analysis
|
|
analysis_result = await method_ai.process({
|
|
"aiPrompt": analysisPrompt,
|
|
"documentList": None,
|
|
"resultType": "json",
|
|
"simpleMode": True
|
|
})
|
|
continue
|
|
else:
|
|
error_msg = "Die Analyse Ihrer Anfrage konnte nach mehreren Versuchen nicht verarbeitet werden. Bitte versuchen Sie es später erneut oder formulieren Sie Ihre Frage anders."
|
|
logger.error(f"Failed to extract JSON from analysis response after {max_analysis_retries} attempts. Content: {analysis_content[:500] if analysis_content else 'None'}")
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: JSON-Extraktion nach {max_analysis_retries} Versuchen fehlgeschlagen", log_type="error")
|
|
# Store error message as assistant response
|
|
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
message_id = f"msg_{uuid.uuid4()}"
|
|
assistantMessageData = {
|
|
"id": message_id,
|
|
"workflowId": workflowId,
|
|
"parentMessageId": userMessageId,
|
|
"message": error_msg,
|
|
"role": "assistant",
|
|
"status": "last",
|
|
"sequenceNr": len(workflow.messages) + 1,
|
|
"publishedAt": getUtcTimestamp(),
|
|
"success": False,
|
|
"roundNumber": workflow.currentRound,
|
|
"taskNumber": 0,
|
|
"actionNumber": 0
|
|
}
|
|
assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
|
|
logger.info(f"Stored error message due to failed JSON extraction after {max_analysis_retries} attempts: {assistantMessage.id}")
|
|
return
|
|
|
|
# Successfully extracted analysis, break retry loop
|
|
break
|
|
|
|
# Extract analysis results
|
|
needsDatabaseQuery = analysis.get("needsDatabaseQuery", False) if analysis else False
|
|
needsWebResearch = analysis.get("needsWebResearch", False) if analysis else False
|
|
sql_queries = analysis.get("sqlQueries", []) if analysis else []
|
|
# Support legacy single query format for backward compatibility
|
|
if not sql_queries and analysis and analysis.get("sqlQuery"):
|
|
sql_queries = [{
|
|
"query": analysis.get("sqlQuery", ""),
|
|
"purpose": "Database query",
|
|
"table": "Unknown"
|
|
}]
|
|
reasoning = analysis.get("reasoning", "") if analysis else ""
|
|
|
|
# CRITICAL: If connectors are configured, ALWAYS use database if user asks about products/articles/inventory
|
|
# Override AI decision if it says "no database query" but connectors are available
|
|
if chatbot_config.connector_types and len(chatbot_config.connector_types) > 0:
|
|
user_prompt_lower = userInput.prompt.lower()
|
|
# Keywords that indicate database query is needed
|
|
db_keywords = [
|
|
"artikel", "produkt", "ware", "lager", "bestand", "preis", "lieferant",
|
|
"led", "lampe", "motor", "kabel", "schraube", "sensor", "netzteil",
|
|
"wie viele", "zeig mir", "suche", "finde", "gibt es", "haben wir",
|
|
"article", "product", "inventory", "stock", "price", "supplier",
|
|
"how many", "show me", "search", "find", "do we have"
|
|
]
|
|
has_db_intent = any(keyword in user_prompt_lower for keyword in db_keywords)
|
|
|
|
# If user asks about database-related topics but AI said no query needed, force it
|
|
if has_db_intent and not needsDatabaseQuery:
|
|
logger.warning(f"User asked about database-related topic but AI returned needsDatabaseQuery=false. Forcing needsDatabaseQuery=true because connectors are configured.")
|
|
needsDatabaseQuery = True
|
|
# Generate a default query if none were provided
|
|
if not sql_queries:
|
|
# Extract main search term from user prompt
|
|
search_terms = []
|
|
for keyword in db_keywords:
|
|
if keyword in user_prompt_lower:
|
|
# Try to extract the actual product/article name
|
|
words = user_prompt_lower.split()
|
|
keyword_idx = words.index(keyword) if keyword in words else -1
|
|
if keyword_idx >= 0 and keyword_idx < len(words) - 1:
|
|
# Take next word as potential product name
|
|
next_word = words[keyword_idx + 1]
|
|
if len(next_word) > 2: # Ignore short words like "die", "der", etc.
|
|
search_terms.append(next_word)
|
|
|
|
# Create a general search query
|
|
if search_terms:
|
|
search_term = search_terms[0]
|
|
else:
|
|
# Use the whole prompt as search term (limited)
|
|
search_term = userInput.prompt[:50] # Limit length
|
|
|
|
sql_queries = [{
|
|
"query": f'SELECT a."Artikelnummer", a."Artikelbezeichnung", a."Lieferant", a."Artikelkürzel" FROM Artikel a WHERE a."Artikelbezeichnung" LIKE \'%{search_term}%\' OR a."Artikelnummer" LIKE \'%{search_term}%\' OR a."Artikelkürzel" LIKE \'%{search_term}%\' LIMIT 20',
|
|
"purpose": f"Suche nach Artikeln die '{search_term}' enthalten",
|
|
"table": "Artikel"
|
|
}]
|
|
logger.info(f"Generated default database query for search term: {search_term}")
|
|
|
|
# Check if we need web research for certifications (only if enabled in config)
|
|
if chatbot_config.enable_web_research:
|
|
user_prompt_lower = userInput.prompt.lower()
|
|
certification_keywords = ["ul", "ce", "tüv", "vde", "iec", "iso", "zertifiziert", "certified", "certification"]
|
|
has_certification = any(keyword in user_prompt_lower for keyword in certification_keywords)
|
|
if has_certification and not needsWebResearch:
|
|
logger.warning("Certification detected but needsWebResearch is false - forcing to true")
|
|
needsWebResearch = True
|
|
else:
|
|
# Web research disabled in config
|
|
if needsWebResearch:
|
|
logger.info("Web research disabled in instance config, skipping")
|
|
needsWebResearch = False
|
|
|
|
# Limit query count based on configuration
|
|
max_queries_allowed = chatbot_config.max_queries
|
|
if needsDatabaseQuery and len(sql_queries) > max_queries_allowed:
|
|
logger.info(f"Limiting queries from {len(sql_queries)} to {max_queries_allowed} for performance")
|
|
sql_queries = sql_queries[:max_queries_allowed]
|
|
|
|
logger.info(f"Analysis: DB={needsDatabaseQuery}, Web={needsWebResearch}, SQL queries={len(sql_queries)}")
|
|
|
|
# Build initial enriched web research query if needed (for logging, will be rebuilt after DB queries)
|
|
# Only if web research is enabled in config
|
|
enriched_web_query = None
|
|
if needsWebResearch and chatbot_config.enable_web_research:
|
|
enriched_web_query = _buildWebResearchQuery(userInput.prompt, workflow.messages)
|
|
|
|
# Build list of queries to stream back
|
|
queries = []
|
|
|
|
if needsDatabaseQuery and sql_queries:
|
|
for i, sql_query_info in enumerate(sql_queries, 1):
|
|
queries.append({
|
|
"type": "database",
|
|
"query": sql_query_info.get("query", ""),
|
|
"purpose": sql_query_info.get("purpose", f"Query {i}"),
|
|
"table": sql_query_info.get("table", "Unknown"),
|
|
"reasoning": reasoning
|
|
})
|
|
|
|
if needsWebResearch and chatbot_config.enable_web_research:
|
|
queries.append({
|
|
"type": "web",
|
|
"query": enriched_web_query or userInput.prompt,
|
|
"reasoning": reasoning
|
|
})
|
|
|
|
# Format queries as log text
|
|
log_lines = []
|
|
if queries:
|
|
db_queries = [q for q in queries if q["type"] == "database"]
|
|
log_lines.append(f"Generiert: {len(db_queries)} Datenbankabfrage(n) und {len(queries) - len(db_queries)} Web-Recherche(n)\n\n")
|
|
for i, q in enumerate(queries, 1):
|
|
if q["type"] == "database":
|
|
log_lines.append(f"{i}. Datenbankabfrage ({q.get('table', 'Unknown')}):\n")
|
|
log_lines.append(f" Zweck: {q.get('purpose', 'Nicht angegeben')}\n")
|
|
log_lines.append(f"```sql\n{q['query']}\n```\n")
|
|
elif q["type"] == "web":
|
|
log_lines.append(f"{i}. Web-Recherche:\n")
|
|
log_lines.append(f" Suchbegriff: {q['query']}\n")
|
|
if q.get("reasoning"):
|
|
log_lines.append(f" Begründung: {q['reasoning']}\n")
|
|
log_lines.append("\n")
|
|
else:
|
|
log_lines.append("Keine Abfragen erforderlich.")
|
|
|
|
log_text = "".join(log_lines)
|
|
|
|
# Stream queries as a log
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, log_text)
|
|
|
|
# Check if workflow was stopped before executing queries
|
|
if await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
logger.info(f"Workflow {workflowId} was stopped, aborting query execution")
|
|
return
|
|
|
|
# Step 2: Execute queries
|
|
queryResults = {}
|
|
webResearchResults = ""
|
|
|
|
# Start web research early in parallel with DB queries if needed (only if enabled)
|
|
web_research_task = None
|
|
if needsWebResearch and chatbot_config.enable_web_research:
|
|
# Start with basic query (will enrich later with DB results if available)
|
|
basic_web_query = _buildWebResearchQuery(userInput.prompt, workflow.messages, None)
|
|
logger.info(f"Starting web research in parallel with DB queries using basic query: '{basic_web_query}'")
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Suche im Internet nach Informationen...")
|
|
|
|
async def perform_web_research():
|
|
"""Perform web research and return results."""
|
|
try:
|
|
researchResult = await services.web.performWebResearch(
|
|
prompt=basic_web_query,
|
|
urls=[],
|
|
country=None,
|
|
language=userInput.userLanguage or "de",
|
|
researchDepth="general",
|
|
operationId=None
|
|
)
|
|
return json.dumps(researchResult, ensure_ascii=False, indent=2) if isinstance(researchResult, dict) else str(researchResult)
|
|
except Exception as e:
|
|
logger.error(f"Web research failed: {e}", exc_info=True)
|
|
return f"Web research error: {str(e)}"
|
|
|
|
web_research_task = asyncio.create_task(perform_web_research())
|
|
|
|
# Check if connector is working before executing queries
|
|
if needsDatabaseQuery and sql_queries:
|
|
logger.info(f"Checking database connector before executing {len(sql_queries)} queries...")
|
|
try:
|
|
# Test connector with a simple query
|
|
test_connector = chatbot_config.get_connector_instance()
|
|
try:
|
|
# Try a simple test query to verify connector works
|
|
test_result = await test_connector.executeQuery("SELECT 1", return_json=True)
|
|
await test_connector.close()
|
|
if not test_result or test_result.get("text", "").startswith(("Error:", "Query failed:")):
|
|
raise Exception("Connector test query failed")
|
|
logger.info("Database connector test successful")
|
|
except Exception as connector_error:
|
|
await test_connector.close()
|
|
error_msg = f"Die Datenbankverbindung funktioniert derzeit nicht. Bitte versuchen Sie es später erneut. Fehler: {str(connector_error)}"
|
|
logger.error(f"Database connector test failed: {connector_error}", exc_info=True)
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: Datenbankverbindung fehlgeschlagen", log_type="error")
|
|
# Store error message as assistant response
|
|
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
message_id = f"msg_{uuid.uuid4()}"
|
|
assistantMessageData = {
|
|
"id": message_id,
|
|
"workflowId": workflowId,
|
|
"parentMessageId": userMessageId,
|
|
"message": error_msg,
|
|
"role": "assistant",
|
|
"status": "last",
|
|
"sequenceNr": len(workflow.messages) + 1,
|
|
"publishedAt": getUtcTimestamp(),
|
|
"success": False,
|
|
"roundNumber": workflow.currentRound,
|
|
"taskNumber": 0,
|
|
"actionNumber": 0
|
|
}
|
|
assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
|
|
logger.info(f"Stored error message due to connector failure: {assistantMessage.id}")
|
|
return
|
|
except Exception as e:
|
|
error_msg = f"Die Datenbankverbindung konnte nicht hergestellt werden. Bitte versuchen Sie es später erneut. Fehler: {str(e)}"
|
|
logger.error(f"Failed to initialize database connector: {e}", exc_info=True)
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Fehler: Datenbankverbindung konnte nicht hergestellt werden", log_type="error")
|
|
# Store error message as assistant response
|
|
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
message_id = f"msg_{uuid.uuid4()}"
|
|
assistantMessageData = {
|
|
"id": message_id,
|
|
"workflowId": workflowId,
|
|
"parentMessageId": userMessageId,
|
|
"message": error_msg,
|
|
"role": "assistant",
|
|
"status": "last",
|
|
"sequenceNr": len(workflow.messages) + 1,
|
|
"publishedAt": getUtcTimestamp(),
|
|
"success": False,
|
|
"roundNumber": workflow.currentRound,
|
|
"taskNumber": 0,
|
|
"actionNumber": 0
|
|
}
|
|
assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
|
|
logger.info(f"Stored error message due to connector initialization failure: {assistantMessage.id}")
|
|
return
|
|
|
|
# Execute database queries in parallel
|
|
if needsDatabaseQuery and sql_queries:
|
|
logger.info(f"Executing {len(sql_queries)} database queries in parallel...")
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, f"Führe {len(sql_queries)} Datenbankabfrage(n) parallel aus...")
|
|
|
|
try:
|
|
queryResults = await _execute_queries_parallel(sql_queries, chatbot_config)
|
|
|
|
# Log results summary
|
|
successful_queries = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
|
|
failed_queries = [k for k in queryResults.keys() if k.endswith("_error")]
|
|
|
|
if successful_queries:
|
|
total_rows = sum(len(queryResults.get(f"{k}_data", [])) for k in successful_queries)
|
|
logger.info(f"Successfully executed {len(successful_queries)} query/queries, total {total_rows} rows")
|
|
await _emit_log_and_event(
|
|
interfaceDbChat,
|
|
workflowId,
|
|
event_manager,
|
|
f"Abgeschlossen: {len(successful_queries)} Abfrage(n) erfolgreich, {total_rows} Ergebnis{'e' if total_rows != 1 else ''} gefunden"
|
|
)
|
|
|
|
if failed_queries:
|
|
logger.warning(f"{len(failed_queries)} query/queries failed")
|
|
await _emit_log_and_event(
|
|
interfaceDbChat,
|
|
workflowId,
|
|
event_manager,
|
|
f"Warnung: {len(failed_queries)} Abfrage(n) fehlgeschlagen",
|
|
log_type="warning"
|
|
)
|
|
|
|
# Check if we got empty results and need to retry with alternative strategies
|
|
# Robust calculation: check all successful queries for empty data
|
|
total_rows = 0
|
|
queries_with_results = 0
|
|
queries_with_empty_results = 0
|
|
|
|
if successful_queries:
|
|
for query_key in successful_queries:
|
|
data_key = f"{query_key}_data"
|
|
if data_key in queryResults:
|
|
row_count = len(queryResults[data_key])
|
|
total_rows += row_count
|
|
if row_count > 0:
|
|
queries_with_results += 1
|
|
else:
|
|
queries_with_empty_results += 1
|
|
else:
|
|
# Query succeeded but no data key - treat as empty
|
|
queries_with_empty_results += 1
|
|
logger.debug(f"Query {query_key} succeeded but has no _data key")
|
|
else:
|
|
# No successful queries at all
|
|
logger.debug("No successful queries found")
|
|
|
|
# Also check if we have any query results at all
|
|
has_any_results = total_rows > 0
|
|
|
|
# Debug logging
|
|
logger.info(f"Query results analysis: total_rows={total_rows}, successful_queries={len(successful_queries)}, "
|
|
f"queries_with_results={queries_with_results}, queries_with_empty_results={queries_with_empty_results}, "
|
|
f"failed_queries={len(failed_queries)}")
|
|
|
|
# Trigger retry if: no results AND we have database queries AND we executed at least one query
|
|
# Also trigger if all successful queries returned empty results
|
|
# Only retry if enabled in config
|
|
should_retry = (
|
|
chatbot_config.enable_retry_on_empty and
|
|
not has_any_results and
|
|
needsDatabaseQuery and
|
|
len(sql_queries) > 0 and
|
|
(len(successful_queries) > 0 or len(failed_queries) == 0) # Either we have successful queries or no failures (queries executed but empty)
|
|
)
|
|
|
|
# Iterative retry loop: try up to configured max attempts with different strategies
|
|
max_empty_retry_attempts = chatbot_config.max_retry_attempts if chatbot_config.enable_retry_on_empty else 0
|
|
empty_retry_attempt = 0
|
|
original_sql_queries_count = len(sql_queries)
|
|
previous_retry_rows = 0
|
|
|
|
while should_retry and empty_retry_attempt < max_empty_retry_attempts:
|
|
empty_retry_attempt += 1
|
|
logger.info(f"No results found (attempt {empty_retry_attempt}/{max_empty_retry_attempts}), retrying with alternative query strategies...")
|
|
await _emit_log_and_event(
|
|
interfaceDbChat,
|
|
workflowId,
|
|
event_manager,
|
|
f"Keine Ergebnisse gefunden ({len(successful_queries)} erfolgreiche Abfrage(n), {total_rows} Zeilen). Versuch {empty_retry_attempt}/{max_empty_retry_attempts}: Versuche alternative Abfrage-Strategien...",
|
|
log_type="info"
|
|
)
|
|
|
|
# Retry analysis with empty results context - create NEW analysis with alternative strategies
|
|
|
|
# Build retry prompt with progressively different strategies
|
|
empty_count = len(sql_queries)
|
|
empty_results_instructions = get_empty_results_retry_instructions(empty_count)
|
|
|
|
retry_context = f"{context}\n\n"
|
|
if empty_retry_attempt == 1:
|
|
retry_context += "⚠️⚠️⚠️ WICHTIG - ALTERNATIVE STRATEGIEN ERFORDERLICH ⚠️⚠️⚠️\n"
|
|
retry_context += "Strategie: Breitere Suche, weniger Filter\n"
|
|
elif empty_retry_attempt == 2:
|
|
retry_context += "⚠️⚠️⚠️ KRITISCH - IMMER NOCH KEINE ERGEBNISSE ⚠️⚠️⚠️\n"
|
|
retry_context += "Strategie: Entferne spezifische Filter komplett, verwende nur Hauptkriterien\n"
|
|
else:
|
|
retry_context += "⚠️⚠️⚠️ LETZTER VERSUCH - MINIMALE FILTER ⚠️⚠️⚠️\n"
|
|
retry_context += "Strategie: Nur Hauptbegriffe, keine spezifischen Filter\n"
|
|
|
|
retry_context += f"Die bisherigen {len(sql_queries)} Abfragen haben 0 Zeilen zurückgegeben.\n"
|
|
retry_context += f"{empty_results_instructions}\n"
|
|
retry_context += f"Dies ist bereits Versuch {empty_retry_attempt} von {max_empty_retry_attempts}!\n"
|
|
retry_context += "Erstelle JETZT MAXIMAL 5 alternative SQL-Queries mit komplett anderen Strategien (für Performance):\n"
|
|
|
|
if empty_retry_attempt == 1:
|
|
retry_context += "- Breitere Suche ohne zu spezifische Filter\n"
|
|
retry_context += "- Suche ohne Zertifizierungsfilter (falls Zertifizierung nicht in DB)\n"
|
|
retry_context += "- Suche nur nach Hauptkriterien (z.B. nur Netzgerät + 10A, ohne einphasig)\n"
|
|
retry_context += "- Suche nach alternativen Begriffen (Netzteil statt Netzgerät, etc.)\n"
|
|
retry_context += "- COUNT-Queries für Statistik\n"
|
|
retry_context += "- Fallback-Queries mit minimalen Filtern\n"
|
|
elif empty_retry_attempt == 2:
|
|
retry_context += "- ENTFERNE alle Zertifizierungsfilter komplett\n"
|
|
retry_context += "- ENTFERNE Phasen-Filter (einphasig/dreiphasig)\n"
|
|
retry_context += "- Suche NUR nach: Netzgerät/Netzteil + Ampere-Angaben\n"
|
|
retry_context += "- Verwende breitere Ampere-Patterns (5A, 6A, 8A, 10A, 12A, 15A, 20A, etc.)\n"
|
|
retry_context += "- Suche auch in Keywords-Feld\n"
|
|
else:
|
|
retry_context += "- MINIMALE Filter: Nur 'Netzgerät' ODER 'Netzteil' ODER 'Power Supply'\n"
|
|
retry_context += "- KEINE spezifischen Filter auf Ampere, Phasen oder Zertifizierung\n"
|
|
retry_context += "- COUNT-Query: Wie viele Netzgeräte gibt es insgesamt?\n"
|
|
retry_context += "- Suche nach ALLEN verfügbaren Netzgeräten\n"
|
|
|
|
# Retry analysis - use custom prompt from configuration (already validated at start of chatProcess)
|
|
retry_analysis_prompt = chatbot_config.custom_analysis_prompt.replace("{userPrompt}", userInput.prompt).replace("{context}", retry_context or "")
|
|
|
|
# CRITICAL: Add explicit JSON format requirement to ensure AI returns JSON
|
|
json_format_instruction = """
|
|
|
|
⚠️⚠️⚠️ ABSOLUT KRITISCH - JSON-FORMAT ERFORDERLICH ⚠️⚠️⚠️
|
|
DU MUSST DEINE ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT GEBEN!
|
|
ANTWORTE NICHT MIT NORMALEM TEXT ODER EINER CHAT-ANTWORT!
|
|
DEINE ANTWORT MUSS EIN GÜLTIGES JSON-OBJEKT SEIN!
|
|
|
|
Erforderliches JSON-Format:
|
|
{
|
|
"needsDatabaseQuery": true/false,
|
|
"needsWebResearch": true/false,
|
|
"sqlQueries": [
|
|
{
|
|
"query": "SQL-Abfrage hier",
|
|
"purpose": "Zweck der Abfrage",
|
|
"table": "Haupttabelle"
|
|
}
|
|
],
|
|
"reasoning": "Begründung für die Abfragen"
|
|
}
|
|
|
|
⚠️⚠️⚠️ KRITISCH - WANN DATENBANKABFRAGE ERFORDERLICH ⚠️⚠️⚠️
|
|
SETZE "needsDatabaseQuery": true, WENN:
|
|
- Der Nutzer nach Artikeln, Produkten, Preisen, Lagerbeständen, Lieferanten fragt
|
|
- Der Nutzer nach Informationen aus der Datenbank fragt (auch allgemeine Fragen!)
|
|
- Der Nutzer eine Frage stellt, die mit Datenbank-Daten beantwortet werden kann
|
|
- Du dir nicht sicher bist - dann setze "needsDatabaseQuery": true und führe eine allgemeine Abfrage durch!
|
|
|
|
VERBOTEN:
|
|
- "needsDatabaseQuery": false setzen, nur weil die Frage allgemein klingt
|
|
- "needsDatabaseQuery": false setzen, ohne zu prüfen, ob Datenbank-Daten helfen könnten
|
|
- Chat-Antworten geben statt Datenbankabfragen durchzuführen
|
|
|
|
WICHTIG:
|
|
- Antworte NUR mit dem JSON-Objekt, KEIN zusätzlicher Text davor oder danach!
|
|
- KEINE Erklärungen, KEINE Begrüßungen, KEINE Chat-Antworten!
|
|
- NUR das JSON-Objekt!
|
|
- Bei Unsicherheit: IMMER "needsDatabaseQuery": true setzen!
|
|
"""
|
|
retry_analysis_prompt = retry_analysis_prompt + json_format_instruction
|
|
logger.info("Using custom analysis prompt for retry from instance config with JSON format requirement")
|
|
|
|
# AI call for retry analysis
|
|
retry_analysis_result = await method_ai.process({
|
|
"aiPrompt": retry_analysis_prompt,
|
|
"documentList": None,
|
|
"resultType": "json",
|
|
"simpleMode": True
|
|
})
|
|
|
|
# Extract retry analysis
|
|
retry_analysis_content = None
|
|
if retry_analysis_result.success and retry_analysis_result.documents:
|
|
retry_analysis_content = retry_analysis_result.documents[0].documentData
|
|
if isinstance(retry_analysis_content, bytes):
|
|
retry_analysis_content = retry_analysis_content.decode('utf-8')
|
|
|
|
if retry_analysis_content:
|
|
retry_analysis = _extractJsonFromResponse(retry_analysis_content)
|
|
if retry_analysis is None:
|
|
logger.warning("Failed to extract JSON from retry analysis response")
|
|
retry_analysis = {}
|
|
if retry_analysis and retry_analysis.get("needsDatabaseQuery", False):
|
|
retry_sql_queries = retry_analysis.get("sqlQueries", [])
|
|
# Limit to maximum 5 queries for performance
|
|
if len(retry_sql_queries) > 5:
|
|
logger.info(f"Limiting retry queries from {len(retry_sql_queries)} to 5 for performance")
|
|
retry_sql_queries = retry_sql_queries[:5]
|
|
if retry_sql_queries:
|
|
logger.info(f"Executing {len(retry_sql_queries)} retry queries (attempt {empty_retry_attempt}) with alternative strategies...")
|
|
await _emit_log_and_event(
|
|
interfaceDbChat,
|
|
workflowId,
|
|
event_manager,
|
|
f"Führe {len(retry_sql_queries)} alternative Abfrage(n) mit anderen Strategien aus (Versuch {empty_retry_attempt})...",
|
|
log_type="info"
|
|
)
|
|
|
|
# Execute retry queries
|
|
try:
|
|
retry_results = await _execute_queries_parallel(retry_sql_queries, chatbot_config)
|
|
|
|
# Merge retry results into main results (renumber to continue sequence)
|
|
base_query_num = len(sql_queries)
|
|
for key, value in retry_results.items():
|
|
if key.startswith("query_"):
|
|
# Extract query number from retry result
|
|
try:
|
|
query_num = int(key.split("_")[1])
|
|
new_query_num = base_query_num + query_num
|
|
new_key = f"query_{new_query_num}"
|
|
|
|
if not key.endswith("_data") and not key.endswith("_error"):
|
|
queryResults[new_key] = value
|
|
if f"{key}_data" in retry_results:
|
|
queryResults[f"{new_key}_data"] = retry_results[f"{key}_data"]
|
|
elif key.endswith("_error"):
|
|
queryResults[f"{new_key}_error"] = value
|
|
except (ValueError, IndexError):
|
|
# Fallback if parsing fails
|
|
new_key = f"query_{base_query_num + 1}"
|
|
if not key.endswith("_data") and not key.endswith("_error"):
|
|
queryResults[new_key] = value
|
|
|
|
# Recalculate results after retry
|
|
retry_successful = [k for k in retry_results.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
|
|
retry_rows = sum(len(retry_results.get(f"{k}_data", [])) for k in retry_successful) if retry_successful else 0
|
|
|
|
# Update successful_queries list to include retry results
|
|
successful_queries = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
|
|
total_rows = sum(len(queryResults.get(f"{k}_data", [])) for k in successful_queries)
|
|
|
|
logger.info(f"Retry attempt {empty_retry_attempt}: Found {retry_rows} rows from {len(retry_successful)} queries. Total: {total_rows} rows from {len(successful_queries)} queries")
|
|
|
|
if retry_rows > 0:
|
|
# Success! Found results
|
|
await _emit_log_and_event(
|
|
interfaceDbChat,
|
|
workflowId,
|
|
event_manager,
|
|
f"Alternative Abfragen erfolgreich: {len(retry_successful)} Abfrage(n) mit {retry_rows} Ergebnis{'en' if retry_rows != 1 else ''} gefunden",
|
|
log_type="info"
|
|
)
|
|
should_retry = False # Stop retry loop, we found results
|
|
break
|
|
elif retry_rows > previous_retry_rows:
|
|
# Made some progress (found more rows than before) - continue
|
|
previous_retry_rows = retry_rows
|
|
await _emit_log_and_event(
|
|
interfaceDbChat,
|
|
workflowId,
|
|
event_manager,
|
|
f"Versuch {empty_retry_attempt}: Fortschritt erzielt ({retry_rows} Zeilen gefunden). Versuche weitere Strategie...",
|
|
log_type="info"
|
|
)
|
|
else:
|
|
# No progress made - stop retrying
|
|
await _emit_log_and_event(
|
|
interfaceDbChat,
|
|
workflowId,
|
|
event_manager,
|
|
f"Versuch {empty_retry_attempt}: Keine Ergebnisse gefunden. Beende Retry-Versuche.",
|
|
log_type="warning"
|
|
)
|
|
should_retry = False # Stop retry loop, no progress
|
|
break
|
|
except Exception as retry_error:
|
|
logger.error(f"Error executing retry queries (attempt {empty_retry_attempt}): {retry_error}", exc_info=True)
|
|
# Continue to next attempt even on error
|
|
|
|
# Check if we should continue retrying (already handled in break conditions above)
|
|
if empty_retry_attempt >= max_empty_retry_attempts:
|
|
logger.warning(f"Reached maximum empty retry attempts ({max_empty_retry_attempts}), stopping retry loop")
|
|
await _emit_log_and_event(
|
|
interfaceDbChat,
|
|
workflowId,
|
|
event_manager,
|
|
f"⚠️ Maximale Anzahl Retry-Versuche ({max_empty_retry_attempts}) erreicht. Keine Ergebnisse gefunden.",
|
|
log_type="warning"
|
|
)
|
|
should_retry = False
|
|
except Exception as e:
|
|
logger.error(f"Error executing parallel queries: {e}")
|
|
queryResults["error"] = f"Error executing queries: {str(e)}"
|
|
await _emit_log_and_event(
|
|
interfaceDbChat,
|
|
workflowId,
|
|
event_manager,
|
|
"Fehler bei parallelen Datenbankabfragen",
|
|
log_type="error"
|
|
)
|
|
|
|
# Wait for web research to complete (if it was started in parallel)
|
|
if web_research_task:
|
|
try:
|
|
webResearchResults = await web_research_task
|
|
if webResearchResults and not webResearchResults.startswith("Web research error"):
|
|
logger.info("Web research completed successfully")
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche abgeschlossen")
|
|
else:
|
|
logger.warning("Web research completed with errors")
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche fehlgeschlagen", log_type="warning")
|
|
except Exception as e:
|
|
logger.error(f"Error waiting for web research: {e}", exc_info=True)
|
|
webResearchResults = f"Web research error: {str(e)}"
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Internet-Recherche fehlgeschlagen", log_type="warning")
|
|
|
|
# Check if workflow was stopped before generating final answer
|
|
if await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
logger.info(f"Workflow {workflowId} was stopped, aborting final answer generation")
|
|
return
|
|
|
|
# Step 3: Generate final answer using AI
|
|
logger.info("Generating final answer with AI...")
|
|
await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Formuliere finale Antwort...")
|
|
|
|
# Build prompt for final answer - use custom prompt from configuration (already validated at start of chatProcess)
|
|
system_prompt = chatbot_config.custom_final_answer_prompt
|
|
logger.info("Using custom final answer prompt from instance config")
|
|
|
|
# Build answer context with query results using efficient list-based building
|
|
answer_context_parts = [f"User question: {userInput.prompt}{context}\n"]
|
|
|
|
# Add database results - organize by query with metadata
|
|
db_results_parts = []
|
|
if queryResults:
|
|
successful_results = []
|
|
error_results = []
|
|
|
|
# Extract query metadata from sql_queries if available
|
|
query_metadata = {}
|
|
if sql_queries:
|
|
for i, q_info in enumerate(sql_queries, 1):
|
|
query_metadata[f"query_{i}"] = {
|
|
"purpose": q_info.get("purpose", f"Query {i}"),
|
|
"table": q_info.get("table", "Unknown")
|
|
}
|
|
|
|
# Organize results by query number
|
|
query_numbers = set()
|
|
for key in queryResults.keys():
|
|
if key.startswith("query_") and not key.endswith("_data"):
|
|
# Extract query number (e.g., "query_1" -> 1)
|
|
try:
|
|
num = int(key.split("_")[1])
|
|
query_numbers.add(num)
|
|
except (ValueError, IndexError):
|
|
pass
|
|
|
|
# Build results with metadata
|
|
for query_num in sorted(query_numbers):
|
|
query_key = f"query_{query_num}"
|
|
error_key = f"{query_key}_error"
|
|
|
|
if error_key in queryResults:
|
|
error_msg = queryResults[error_key]
|
|
metadata = query_metadata.get(query_key, {})
|
|
purpose = metadata.get("purpose", f"Query {query_num}")
|
|
table = metadata.get("table", "Unknown")
|
|
error_results.append(f"Abfrage {query_num} ({table} - {purpose}): {error_msg}")
|
|
elif query_key in queryResults:
|
|
result_text = queryResults[query_key]
|
|
metadata = query_metadata.get(query_key, {})
|
|
purpose = metadata.get("purpose", f"Query {query_num}")
|
|
table = metadata.get("table", "Unknown")
|
|
successful_results.append(f"=== Abfrage {query_num}: {purpose} (Tabelle: {table}) ===\n{result_text}")
|
|
|
|
# Handle general error if present
|
|
if "error" in queryResults:
|
|
error_results.append(f"Allgemeiner Fehler: {queryResults['error']}")
|
|
|
|
# Build db_results_part efficiently
|
|
if successful_results:
|
|
db_results_parts.append("\n\nDATENBANK-ERGEBNISSE:\n")
|
|
db_results_parts.append("\n\n".join(successful_results))
|
|
answer_context_parts.append("DATENBANK-ERGEBNISSE:\n")
|
|
answer_context_parts.append("\n\n".join(successful_results))
|
|
answer_context_parts.append("\n")
|
|
|
|
if error_results:
|
|
db_results_parts.append("\n\nDATENBANK-FEHLER:\n")
|
|
db_results_parts.append("\n".join(error_results))
|
|
answer_context_parts.append("DATENBANK-FEHLER:\n")
|
|
answer_context_parts.append("\n".join(error_results))
|
|
answer_context_parts.append("\n")
|
|
|
|
db_results_part = "".join(db_results_parts)
|
|
|
|
# Add web research results
|
|
web_results_part = ""
|
|
# Check if web research results exist and are valid (not empty and not an error)
|
|
if webResearchResults and webResearchResults.strip() and not webResearchResults.startswith("Web research error"):
|
|
web_results_part = f"\n\nINTERNET-RECHERCHE:\n{webResearchResults}"
|
|
answer_context_parts.append(f"INTERNET-RECHERCHE:\n{webResearchResults}\n")
|
|
|
|
# Join answer context efficiently
|
|
answerContext = "".join(answer_context_parts)
|
|
|
|
# Check if we have any actual data
|
|
successful_query_keys = [k for k in queryResults.keys() if k.startswith("query_") and not k.endswith("_error") and not k.endswith("_data")]
|
|
has_query_results = bool(successful_query_keys)
|
|
error_query_keys = [k for k in queryResults.keys() if k.endswith("_error")]
|
|
has_only_errors = bool(error_query_keys and not successful_query_keys)
|
|
|
|
# Count total number of articles found across all queries
|
|
total_articles_found = 0
|
|
if successful_query_keys:
|
|
for query_key in successful_query_keys:
|
|
data_key = f"{query_key}_data"
|
|
if data_key in queryResults:
|
|
article_count = len(queryResults[data_key])
|
|
total_articles_found += article_count
|
|
logger.info(f"Query {query_key} returned {article_count} articles")
|
|
|
|
logger.info(f"Total articles found across all queries: {total_articles_found}")
|
|
|
|
# Add explicit article count information to prompt (using efficient list building)
|
|
if total_articles_found > 0:
|
|
article_count_parts = [
|
|
"\n\n⚠️⚠️⚠️ WICHTIG - ARTIKELANZAHL ⚠️⚠️⚠️\n",
|
|
f"In den DATENBANK-ERGEBNISSEN oben wurden INSGESAMT {total_articles_found} Artikel gefunden.\n",
|
|
f"DU MUSST ALLE {total_articles_found} Artikel in deiner Antwort zeigen!\n"
|
|
]
|
|
if total_articles_found <= 20:
|
|
article_count_parts.append(f"Zeige ALLE {total_articles_found} Artikel in einer Tabelle.\n")
|
|
else:
|
|
article_count_parts.append(f"Zeige die ersten 20 Artikel in einer Tabelle + Hinweis auf weitere {total_articles_found - 20} Artikel.\n")
|
|
article_count_parts.extend([
|
|
f"❌ VERBOTEN: Nur einen Artikel zu zeigen, wenn {total_articles_found} gefunden wurden!\n",
|
|
f"✓ OBLIGATORISCH: Zeige ALLE {total_articles_found} Artikel!\n"
|
|
])
|
|
article_count_info = "".join(article_count_parts)
|
|
|
|
if db_results_part:
|
|
db_results_part = article_count_info + db_results_part
|
|
else:
|
|
db_results_part = article_count_info
|
|
|
|
# Add warning messages if needed (using efficient list building)
|
|
warning_parts = []
|
|
if not has_query_results and needsDatabaseQuery:
|
|
warning_parts.append("\n\n⚠️⚠️⚠️ WICHTIG - DATENBANKABFRAGE AUSGEFÜHRT ⚠️⚠️⚠️\n")
|
|
warning_parts.append("Die Datenbankabfrage wurde AUSGEFÜHRT, hat aber KEINE Ergebnisse zurückgegeben.\n")
|
|
warning_parts.append("DU HAST ZUGRIFF AUF DIE DATENBANK - die Abfrage wurde durchgeführt!\n")
|
|
warning_parts.append("Antworte dem Nutzer: 'Es wurden keine Artikel gefunden' oder 'Keine passenden Artikel in der Datenbank gefunden'\n")
|
|
warning_parts.append("VERBOTEN: Sage NIEMALS 'Ich habe keinen Zugriff' oder 'Ich kann nicht auf die Datenbank zugreifen'!\n")
|
|
warning_parts.append("VERBOTEN: Sage NIEMALS 'Es tut mir leid, aber ich habe keinen Zugriff auf die Datenbank'!\n")
|
|
warning_parts.append("Die Datenbank wurde durchsucht, es wurden nur keine passenden Artikel gefunden.\n")
|
|
|
|
if has_only_errors:
|
|
warning_parts.extend([
|
|
"\n\n⚠️⚠️⚠️ KRITISCH - ALLE QUERIES FEHLGESCHLAGEN ⚠️⚠️⚠️\n",
|
|
"ALLE Datenbankabfragen sind fehlgeschlagen. Es gibt KEINE gültigen Daten aus der Datenbank.\n",
|
|
"DU DARFST KEINE DATEN ERFINDEN! Schreibe stattdessen: 'Es wurden keine Artikel gefunden' oder 'Die Datenbankabfrage ist fehlgeschlagen'."
|
|
])
|
|
|
|
if warning_parts:
|
|
db_results_part = db_results_part + "".join(warning_parts) if db_results_part else "".join(warning_parts)
|
|
|
|
# Determine if we have database results, web results, or both
|
|
has_db_results = bool(db_results_part and db_results_part.strip())
|
|
has_web_results = bool(web_results_part and web_results_part.strip())
|
|
|
|
# Add explicit instruction that AI has database access (if connectors are configured)
|
|
access_instruction = ""
|
|
if chatbot_config.connector_types and len(chatbot_config.connector_types) > 0:
|
|
access_instruction = "\n\n⚠️⚠️⚠️ ABSOLUT KRITISCH - DATENBANK-ZUGRIFF VERFÜGBAR ⚠️⚠️⚠️\n"
|
|
access_instruction += "DU HAST ZUGRIFF AUF DIE DATENBANK UND KANNST SQL-ABFRAGEN AUSFÜHREN!\n"
|
|
access_instruction += "Die Datenbankabfrage wurde bereits AUSGEFÜHRT (siehe DATENBANK-ERGEBNISSE oben)!\n"
|
|
access_instruction += "\nVERBOTEN - NIEMALS SAGEN:\n"
|
|
access_instruction += "- 'Ich habe keinen Zugriff auf die Datenbank'\n"
|
|
access_instruction += "- 'Ich kann nicht auf die Datenbank zugreifen'\n"
|
|
access_instruction += "- 'Es tut mir leid, aber ich habe keinen Zugriff'\n"
|
|
access_instruction += "- 'Ich habe keinen Zugriff auf Echtzeit-Datenbanken'\n"
|
|
access_instruction += "- Jede andere Formulierung, die suggeriert, dass du keinen Zugriff hast!\n"
|
|
access_instruction += "\nRICHTIG - WENN KEINE ERGEBNISSE:\n"
|
|
access_instruction += "- 'Es wurden keine Artikel gefunden'\n"
|
|
access_instruction += "- 'Keine passenden Artikel in der Datenbank gefunden'\n"
|
|
access_instruction += "- 'Die Datenbanksuche ergab keine Treffer'\n"
|
|
access_instruction += "- 'Ich habe in der Datenbank gesucht, aber keine passenden Artikel gefunden'\n"
|
|
access_instruction += "\nWICHTIG: Die Datenbank wurde durchsucht - es wurden nur keine passenden Artikel gefunden!\n"
|
|
|
|
if chatbot_config.enable_web_research:
|
|
access_instruction += "\n⚠️⚠️⚠️ ABSOLUT KRITISCH - INTERNET-ZUGRIFF VERFÜGBAR ⚠️⚠️⚠️\n"
|
|
access_instruction += "DU HAST ZUGRIFF AUF DAS INTERNET (Tavily)!\n"
|
|
access_instruction += "VERBOTEN: Sage NIEMALS, dass du keinen Zugriff auf das Internet hast!\n"
|
|
access_instruction += "VERBOTEN: Sage NIEMALS 'Ich habe keinen Zugriff auf das Internet'!\n"
|
|
|
|
# Build the final answer prompt using custom system prompt from config
|
|
answer_prompt = _build_final_answer_prompt_with_results(
|
|
system_prompt + access_instruction,
|
|
userInput.prompt,
|
|
context,
|
|
db_results_part,
|
|
web_results_part,
|
|
is_resumed,
|
|
has_db_results,
|
|
has_web_results
|
|
)
|
|
|
|
answerRequest = AiCallRequest(
|
|
prompt=answer_prompt,
|
|
context=answerContext if (queryResults or webResearchResults) else None,
|
|
options=AiCallOptions(
|
|
resultFormat="txt",
|
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
processingMode=ProcessingModeEnum.DETAILED
|
|
)
|
|
)
|
|
|
|
# Double-check workflow wasn't stopped right before AI call
|
|
if await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
logger.info(f"Workflow {workflowId} was stopped before final answer AI call, aborting")
|
|
return
|
|
|
|
answerResponse = await services.ai.callAi(answerRequest)
|
|
|
|
# Check immediately after AI call completes - if stopped, abort without processing or storing
|
|
if await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
logger.info(f"Workflow {workflowId} was stopped during final answer AI call, aborting without storing message")
|
|
return
|
|
|
|
# Check for errors in AI response
|
|
if answerResponse.errorCount > 0:
|
|
logger.error(f"AI call failed with errorCount={answerResponse.errorCount}: {answerResponse.content}")
|
|
finalAnswer = "Entschuldigung, ich konnte Ihre Anfrage derzeit nicht verarbeiten. Bitte versuchen Sie es später erneut."
|
|
else:
|
|
finalAnswer = answerResponse.content
|
|
logger.info("Final answer generated")
|
|
|
|
# Check again after generating answer (in case it was stopped while generating)
|
|
if await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
logger.info(f"Workflow {workflowId} was stopped after final answer generation, not storing message")
|
|
return
|
|
|
|
# Reload workflow to get current message count
|
|
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
|
|
# Double-check workflow wasn't stopped while we were reloading
|
|
if workflow and workflow.status == "stopped":
|
|
logger.info(f"Workflow {workflowId} was stopped, not storing final message")
|
|
return
|
|
|
|
# Create assistant message with final answer
|
|
message_id = f"msg_{uuid.uuid4()}"
|
|
assistantMessageData = {
|
|
"id": message_id,
|
|
"workflowId": workflowId,
|
|
"parentMessageId": userMessageId,
|
|
"message": finalAnswer,
|
|
"role": "assistant",
|
|
"status": "last",
|
|
"sequenceNr": len(workflow.messages) + 1,
|
|
"publishedAt": getUtcTimestamp(),
|
|
"success": answerResponse.errorCount == 0 if answerResponse else True,
|
|
"roundNumber": workflow.currentRound,
|
|
"taskNumber": 0,
|
|
"actionNumber": 0
|
|
}
|
|
|
|
assistantMessage = interfaceDbChat.createMessage(assistantMessageData)
|
|
logger.info(f"Stored assistant message with final answer: {assistantMessage.id}")
|
|
|
|
# Emit message event for streaming (exact chatData format)
|
|
message_timestamp = parseTimestamp(assistantMessage.publishedAt, default=getUtcTimestamp())
|
|
await event_manager.emit_event(
|
|
context_id=workflowId,
|
|
event_type="chatdata",
|
|
data={
|
|
"type": "message",
|
|
"createdAt": message_timestamp,
|
|
"item": assistantMessage.dict()
|
|
},
|
|
event_category="chat"
|
|
)
|
|
|
|
# Update workflow status to completed (only if not stopped)
|
|
if not await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
interfaceDbChat.updateWorkflow(workflowId, {
|
|
"status": "completed",
|
|
"lastActivity": getUtcTimestamp()
|
|
})
|
|
else:
|
|
logger.info(f"Workflow {workflowId} was stopped, not updating status to completed")
|
|
|
|
logger.info(f"Chatbot processing completed for workflow {workflowId}, generated {len(queries)} queries and final answer")
|
|
|
|
# Emit completion event only if workflow wasn't stopped
|
|
if not await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
await event_manager.emit_event(
|
|
context_id=workflowId,
|
|
event_type="complete",
|
|
data={"workflowId": workflowId},
|
|
event_category="workflow",
|
|
message="Chatbot-Verarbeitung abgeschlossen",
|
|
step="complete"
|
|
)
|
|
|
|
# Schedule cleanup with longer delay to allow stream to stay open
|
|
await event_manager.cleanup(workflowId, delay=300.0) # 5 minutes delay
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing chatbot message: {str(e)}", exc_info=True)
|
|
|
|
# Check if workflow was stopped - if so, don't store error message
|
|
if await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
logger.info(f"Workflow {workflowId} was stopped, not storing error message")
|
|
return
|
|
|
|
# Store error message
|
|
try:
|
|
# Reload workflow to get current message count
|
|
workflow = interfaceDbChat.getWorkflow(workflowId)
|
|
|
|
# Double-check workflow wasn't stopped while we were reloading
|
|
if workflow and workflow.status == "stopped":
|
|
logger.info(f"Workflow {workflowId} was stopped, not storing error message")
|
|
return
|
|
|
|
errorMessageData = {
|
|
"id": f"msg_{uuid.uuid4()}",
|
|
"workflowId": workflowId,
|
|
"parentMessageId": userMessageId,
|
|
"message": f"Sorry, I encountered an error: {str(e)}",
|
|
"role": "assistant",
|
|
"status": "last",
|
|
"sequenceNr": len(workflow.messages) + 1,
|
|
"publishedAt": getUtcTimestamp(),
|
|
"success": False,
|
|
"roundNumber": workflow.currentRound if workflow else 1,
|
|
"taskNumber": 0,
|
|
"actionNumber": 0
|
|
}
|
|
errorMessage = interfaceDbChat.createMessage(errorMessageData)
|
|
|
|
# Emit message event for streaming (exact chatData format)
|
|
message_timestamp = parseTimestamp(errorMessage.publishedAt, default=getUtcTimestamp())
|
|
await event_manager.emit_event(
|
|
context_id=workflowId,
|
|
event_type="chatdata",
|
|
data={
|
|
"type": "message",
|
|
"createdAt": message_timestamp,
|
|
"item": errorMessage.dict()
|
|
},
|
|
event_category="chat"
|
|
)
|
|
|
|
# Update workflow status to error (only if not stopped)
|
|
if not await _check_workflow_stopped(interfaceDbChat, workflowId):
|
|
interfaceDbChat.updateWorkflow(workflowId, {
|
|
"status": "error",
|
|
"lastActivity": getUtcTimestamp()
|
|
})
|
|
else:
|
|
logger.info(f"Workflow {workflowId} was stopped, not updating status to error")
|
|
|
|
# Schedule cleanup
|
|
await event_manager.cleanup(workflowId)
|
|
except Exception as storeError:
|
|
logger.error(f"Error storing error message: {storeError}")
|