fix: context in prompts improved

context only used if prompt specifically asks for it, otherwise ignored to improve answers
2026-01-14 12:20:25 +01:00 · 2026-01-14 12:20:25 +01:00 · 1bd183ca4c
commit 1bd183ca4c
parent 6f6ee25ef2
2 changed files with 279 additions and 30 deletions
--- a/modules/features/chatbot/chatbotConstants.py
+++ b/modules/features/chatbot/chatbotConstants.py
@ -367,19 +367,74 @@ def get_system_prompt() -> str:
    return get_final_answer_system_prompt()


-def get_initial_analysis_prompt(user_prompt: str, context: str) -> str:
+def get_initial_analysis_prompt(user_prompt: str, context: str, is_resumed: bool = False) -> str:
    """
    Get the prompt for initial user input analysis.
    
    Args:
        user_prompt: User's input prompt
        context: Conversation context
+        is_resumed: If True, exclude system prompt (already in context from previous messages)
        
    Returns:
        Formatted prompt string
    """
    system_prompt = get_analysis_system_prompt()
-    return f"""{system_prompt}
+    
+    if is_resumed:
+        # System prompt already in context, don't repeat it
+        # Emphasize that the current question is primary
+        if context:
+            context_section = f"""
+⚠️⚠️⚠️ KONTEXT (NUR FÜR REFERENZ - IGNORIEREN WENN NICHT BENÖTIGT) ⚠️⚠️⚠️
+{context}
+⚠️⚠️⚠️ ENDE KONTEXT ⚠️⚠️⚠️
+
+"""
+        else:
+            context_section = ""
+        
+        return f"""⚠️⚠️⚠️ AKTUELLE FRAGE (PRIMÄR - DIESE MUSS BEANTWORTET WERDEN) ⚠️⚠️⚠️
+User question: {user_prompt}
+{context_section}
+⚠️⚠️⚠️ WICHTIG ⚠️⚠️⚠️
+- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss
+- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf (z.B. "Was war das nochmal?", "Erkläre das genauer")
+- Analysiere NUR die aktuelle Frage, nicht die Kontext-Fragen
+
+⚠️ WICHTIG - QUERY-ANZAHL FÜR PERFORMANCE ⚠️
+✓ Erstelle MAXIMAL 5 SQL-Queries (für bessere Performance)
+✓ Jede Query muss eine andere Strategie verfolgen
+✓ Alle Queries werden parallel ausgeführt
+
+Analysiere die AKTUELLE Benutzeranfrage OBEN und bestimme:
+1. Ob eine Datenbankabfrage benötigt wird (needsDatabaseQuery)
+2. Ob eine Web-Recherche benötigt wird (needsWebResearch)
+3. Falls eine Datenbankabfrage benötigt wird: Erstelle MAXIMAL 5 separate, vollständige, ausführbare SQL-Abfragen mit unterschiedlichen Strategien
+
+⚠️ WICHTIGE REGELN:
+- Bei "mindestens X": Höhere Werte einschließen (z.B. "mindestens 10A" → 10A OR 12A OR 15A OR 20A)
+- Bei Zertifizierungen (UL, CE, TÜV, etc.): IMMER needsWebResearch = true setzen
+- SQL: Doppelte Anführungszeichen für Spaltennamen, JOIN mit Lagerplatz bei Beständen
+- Bei Lagerbeständen: Breite Suche über Artikelkürzel, Artikelnummer UND Artikelbezeichnung
+
+Return ONLY valid JSON:
+{{
+  "needsDatabaseQuery": boolean,
+  "needsWebResearch": boolean,
+  "sqlQueries": [
+    {{
+      "query": string (ready-to-execute SQL with double quotes for column names),
+      "purpose": string (description of what this query retrieves),
+      "table": string (primary table name, e.g., "Artikel", "Lagerplatz_Artikel")
+    }}
+  ] (MAXIMAL 5 queries für Performance!),
+  "reasoning": string
+}}
+"""
+    else:
+        # New chat: include system prompt
+        return f"""{system_prompt}

 User question: {user_prompt}{context}

@ -754,7 +809,10 @@ def get_final_answer_prompt_with_results(
    user_prompt: str,
    context: str,
    db_results_part: str,
-    web_results_part: str
+    web_results_part: str,
+    is_resumed: bool = False,
+    has_db_results: bool = False,
+    has_web_results: bool = False
 ) -> str:
    """
    Get the complete prompt for generating the final answer with database and web results.
@ -764,13 +822,129 @@ def get_final_answer_prompt_with_results(
        context: Conversation context
        db_results_part: Formatted database results section
        web_results_part: Formatted web research results section
+        is_resumed: If True, exclude system prompt (already in context from previous messages)
        
    Returns:
        Complete formatted prompt string
    """
    system_prompt = get_final_answer_system_prompt()
    
-    return f"""{system_prompt}
+    if is_resumed:
+        # System prompt already in context, don't repeat it
+        # Emphasize that the current question is primary
+        if context:
+            context_section = f"""
+⚠️⚠️⚠️ KONTEXT (NUR FÜR REFERENZ - IGNORIEREN WENN NICHT BENÖTIGT) ⚠️⚠️⚠️
+{context}
+⚠️⚠️⚠️ ENDE KONTEXT ⚠️⚠️⚠️
+
+"""
+        else:
+            context_section = ""
+        
+        # Build instructions based on what data sources are available
+        if has_web_results and not has_db_results:
+            # Only web research - emphasize web research
+            instructions = f"""⚠️⚠️⚠️ WICHTIG - NUR INTERNET-RECHERCHE VERFÜGBAR ⚠️⚠️⚠️
+- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss
+- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf
+- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext
+
+{db_results_part}{web_results_part}
+
+KRITISCH: Verwende NUR die oben angegebenen Daten aus der INTERNET-RECHERCHE. Erfinde KEINE Werte.
+
+⚠️⚠️⚠️ WICHTIG - INTERNET-RECHERCHE VERWENDEN ⚠️⚠️⚠️
+- ✓ OBLIGATORISCH: Verwende die Informationen aus der INTERNET-RECHERCHE oben
+- ✓ OBLIGATORISCH: Beginne mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..."
+- ✓ OBLIGATORISCH: Gib Quellen an: [Info] ([Quelle: Name](URL))
+- ✓ OBLIGATORISCH: Präsentiere die Informationen ausführlich und strukturiert
+- ❌ ABSOLUT VERBOTEN: Erwähne Datenbank-Ergebnisse, wenn keine vorhanden sind
+- ❌ ABSOLUT VERBOTEN: Daten erfinden
+
+WICHTIG:
+- Beginne DIREKT mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..."
+- Klare, strukturierte Antwort mit Quellenangaben
+- Präsentiere die gefundenen Informationen ausführlich"""
+        elif has_db_results and not has_web_results:
+            # Only database - use existing database-focused instructions
+            instructions = f"""⚠️⚠️⚠️ WICHTIG ⚠️⚠️⚠️
+- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss
+- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf
+- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext
+
+{db_results_part}{web_results_part}
+
+KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar".
+
+⚠️⚠️⚠️ ABSOLUT KRITISCH - ALLE ARTIKEL ZURÜCKGEBEN ⚠️⚠️⚠️
+- ✓ OBLIGATORISCH: Du MUSST ALLE Artikel zurückgeben, die die Kriterien erfüllen
+- ✓ OBLIGATORISCH: Kombiniere Ergebnisse aus ALLEN erfolgreichen Abfragen
+- ✓ OBLIGATORISCH: Zähle ALLE Artikel in den DATENBANK-ERGEBNISSEN oben
+- ✓ OBLIGATORISCH: Zeige ALLE gefundenen Artikel in deiner Antwort (bis zu 20 in der Tabelle)
+- ❌ ABSOLUT VERBOTEN: Nur einen Artikel zurückgeben, wenn mehrere gefunden wurden
+- ❌ ABSOLUT VERBOTEN: Nur den ersten Artikel zeigen
+- ❌ ABSOLUT VERBOTEN: Artikel auslassen, die in den DATENBANK-ERGEBNISSEN stehen
+
+WICHTIG:
+- Beginne DIREKT mit "Aus der Datenbank habe ich..." (keine Planungsschritte!)
+- Klare, strukturierte Antwort
+- Markdown-Tabellen (max 20 Zeilen)
+- Artikelnummern als Link: [ARTIKELNUMMER](/details/ARTIKELNUMMER)"""
+        else:
+            # Both database and web research
+            instructions = f"""⚠️⚠️⚠️ WICHTIG ⚠️⚠️⚠️
+- Die AKTUELLE FRAGE OBEN ist die einzige Frage, die beantwortet werden muss
+- Ignoriere den Kontext komplett, es sei denn die aktuelle Frage bezieht sich explizit darauf
+- Antworte NUR auf die aktuelle Frage, nicht auf Fragen aus dem Kontext
+
+{db_results_part}{web_results_part}
+
+KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar".
+
+⚠️⚠️⚠️ WICHTIG - BEIDE QUELLEN VERWENDEN ⚠️⚠️⚠️
+- ✓ OBLIGATORISCH: Verwende sowohl DATENBANK-ERGEBNISSE als auch INTERNET-RECHERCHE
+- ✓ OBLIGATORISCH: Beginne mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse
+- ✓ OBLIGATORISCH: Verwende "Aus meiner Web-Recherche..." für Internet-Informationen
+- ✓ OBLIGATORISCH: Gib Quellen für Web-Informationen an: [Info] ([Quelle: Name](URL))
+- ✓ OBLIGATORISCH: Zeige ALLE Artikel aus den DATENBANK-ERGEBNISSEN (bis zu 20 in Tabelle)
+
+WICHTIG:
+- Beginne DIREKT mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse
+- Dann "Aus meiner Web-Recherche..." für Internet-Informationen
+- Klare, strukturierte Antwort mit Quellenangaben"""
+        
+        return f"""⚠️⚠️⚠️ AKTUELLE FRAGE (PRIMÄR - DIESE MUSS BEANTWORTET WERDEN) ⚠️⚠️⚠️
+Antworte auf die folgende Frage des Nutzers: {user_prompt}
+{context_section}{instructions}"""
+    else:
+        # New chat: include system prompt
+        # Build instructions based on what data sources are available
+        if has_web_results and not has_db_results:
+            # Only web research - emphasize web research
+            return f"""{system_prompt}
+
+Antworte auf die folgende Frage des Nutzers: {user_prompt}{context}
+
+{db_results_part}{web_results_part}
+
+KRITISCH: Verwende NUR die oben angegebenen Daten aus der INTERNET-RECHERCHE. Erfinde KEINE Werte.
+
+⚠️⚠️⚠️ WICHTIG - INTERNET-RECHERCHE VERWENDEN ⚠️⚠️⚠️
+- ✓ OBLIGATORISCH: Verwende die Informationen aus der INTERNET-RECHERCHE oben
+- ✓ OBLIGATORISCH: Beginne mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..."
+- ✓ OBLIGATORISCH: Gib Quellen an: [Info] ([Quelle: Name](URL))
+- ✓ OBLIGATORISCH: Präsentiere die Informationen ausführlich und strukturiert
+- ❌ ABSOLUT VERBOTEN: Erwähne Datenbank-Ergebnisse, wenn keine vorhanden sind
+- ❌ ABSOLUT VERBOTEN: Daten erfinden
+
+WICHTIG:
+- Beginne DIREKT mit "Aus meiner Web-Recherche..." oder "Aus meiner Internet-Recherche..."
+- Klare, strukturierte Antwort mit Quellenangaben
+- Präsentiere die gefundenen Informationen ausführlich"""
+        elif has_db_results and not has_web_results:
+            # Only database - use existing database-focused instructions
+            return f"""{system_prompt}

 Antworte auf die folgende Frage des Nutzers: {user_prompt}{context}

@ -810,4 +984,25 @@ WICHTIG:
 - Klare, strukturierte Antwort
 - Markdown-Tabellen (max 20 Zeilen)
 - Artikelnummern als Link: [ARTIKELNUMMER](/details/ARTIKELNUMMER)"""
+        else:
+            # Both database and web research
+            return f"""{system_prompt}
+
+Antworte auf die folgende Frage des Nutzers: {user_prompt}{context}
+
+{db_results_part}{web_results_part}
+
+KRITISCH: Verwende NUR die oben angegebenen Daten. Erfinde KEINE Werte. Wenn Daten fehlen, schreibe "Nicht verfügbar".
+
+⚠️⚠️⚠️ WICHTIG - BEIDE QUELLEN VERWENDEN ⚠️⚠️⚠️
+- ✓ OBLIGATORISCH: Verwende sowohl DATENBANK-ERGEBNISSE als auch INTERNET-RECHERCHE
+- ✓ OBLIGATORISCH: Beginne mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse
+- ✓ OBLIGATORISCH: Verwende "Aus meiner Web-Recherche..." für Internet-Informationen
+- ✓ OBLIGATORISCH: Gib Quellen für Web-Informationen an: [Info] ([Quelle: Name](URL))
+- ✓ OBLIGATORISCH: Zeige ALLE Artikel aus den DATENBANK-ERGEBNISSEN (bis zu 20 in Tabelle)
+
+WICHTIG:
+- Beginne DIREKT mit "Aus der Datenbank habe ich..." für Datenbank-Ergebnisse
+- Dann "Aus meiner Web-Recherche..." für Internet-Informationen
+- Klare, strukturierte Antwort mit Quellenangaben"""

--- a/modules/features/chatbot/mainChatbot.py
+++ b/modules/features/chatbot/mainChatbot.py
@ -498,8 +498,27 @@ def _buildWebResearchQuery(userPrompt: str, workflowMessages: List, queryResults
                            if article_number and article_description and supplier:
                                break
    
+    # Check if current prompt is an explicit search request that should NOT use context
+    # If user explicitly asks to search for something, prioritize that over previous messages
+    explicit_search_patterns = [
+        r"recherchier\s+(?:im\s+internet\s+)?nach\s+(.+)",
+        r"suche\s+(?:im\s+internet\s+)?nach\s+(.+)",
+        r"search\s+(?:the\s+internet\s+)?for\s+(.+)",
+        r"find\s+(?:information\s+)?(?:about\s+)?(.+)",
+        r"recherche\s+(?:sur\s+internet\s+)?(.+)"
+    ]
+    
+    explicit_search_term = None
+    for pattern in explicit_search_patterns:
+        match = re.search(pattern, userPrompt, re.IGNORECASE)
+        if match:
+            explicit_search_term = match.group(1).strip()
+            logger.info(f"Found explicit search term in prompt: '{explicit_search_term}'")
+            break
+    
    # 3. Extract from previous assistant messages (conversation history)
-    if not article_number or not article_description:
+    # ONLY if there's no explicit search term (to avoid using old context for new searches)
+    if not explicit_search_term and (not article_number or not article_description):
        for msg in reversed(workflowMessages[-10:]):
            if msg.role == "assistant":
                message_text = msg.message
@ -550,29 +569,33 @@ def _buildWebResearchQuery(userPrompt: str, workflowMessages: List, queryResults
    # Build enriched search query
    query_parts = []
    
+    # If we have an explicit search term, use it as the primary query
+    if explicit_search_term:
+        query_parts.append(explicit_search_term)
+        logger.info(f"Using explicit search term as primary query: '{explicit_search_term}'")
    # If we have search intent but no product info, try to use the user prompt intelligently
-    if has_search_intent and not article_number and not article_description:
+    elif has_search_intent and not article_number and not article_description:
        # Try to extract meaningful parts from the prompt
        # Remove common search phrases and keep the product-related parts
        cleaned_prompt = userPrompt
-        for phrase in ["recherchier nach", "recherche", "suche nach", "search for", "find", "informationen zu", "information about", "weitere informationen", "further information"]:
+        for phrase in ["recherchier", "recherche", "suche nach", "search for", "find", "informationen zu", "information about", "weitere informationen", "further information", "im internet", "the internet", "sur internet"]:
            cleaned_prompt = re.sub(phrase, "", cleaned_prompt, flags=re.IGNORECASE)
        cleaned_prompt = cleaned_prompt.strip()
        
-        # If cleaned prompt still has content and is different, use it
-        if cleaned_prompt and cleaned_prompt != userPrompt and len(cleaned_prompt) > 10:
+        # Use cleaned prompt if it has meaningful content
+        if cleaned_prompt and len(cleaned_prompt) > 2:
            query_parts.append(cleaned_prompt)
    
-    # Add article description if found
-    if article_description:
+    # Add article description if found (but NOT if we have an explicit search term)
+    if article_description and not explicit_search_term:
        query_parts.append(article_description)
    
-    # Add article number if found
-    if article_number:
+    # Add article number if found (but NOT if we have an explicit search term)
+    if article_number and not explicit_search_term:
        query_parts.append(article_number)
    
-    # Add supplier if found
-    if supplier:
+    # Add supplier if found (but NOT if we have an explicit search term)
+    if supplier and not explicit_search_term:
        query_parts.append(supplier)
    
    # Extract certification information from prompt if present
@ -600,11 +623,13 @@ def _buildWebResearchQuery(userPrompt: str, workflowMessages: List, queryResults
        query_parts.extend(certification_terms)
    
    # Add "Datenblatt" or "datasheet" if user requested it or if we have product info
-    if "datenblatt" in prompt_lower or "datasheet" in prompt_lower or "fiche technique" in prompt_lower:
-        query_parts.append("Datenblatt")
-    elif query_parts:
-        # If we have product info but no explicit request for datasheet, add it anyway
-        query_parts.append("Datenblatt")
+    # But NOT if we have an explicit search term (user wants to search for something specific)
+    if not explicit_search_term:
+        if "datenblatt" in prompt_lower or "datasheet" in prompt_lower or "fiche technique" in prompt_lower:
+            query_parts.append("Datenblatt")
+        elif query_parts and (article_number or article_description):
+            # If we have product info but no explicit request for datasheet, add it anyway
+            query_parts.append("Datenblatt")
    
    # If we found product information or built a meaningful query, use it
    if query_parts:
@ -857,10 +882,25 @@ async def _processChatbotMessage(
            return
        
        # Build conversation context from history
+        # Only include context if the new question might need it (e.g., references to previous messages)
        context = ""
-        if workflow.messages:
-            recent_messages = workflow.messages[-5:]
-            context = "\n\nPrevious conversation:\n"
+        is_resumed = len(workflow.messages) > 0 if workflow.messages else False
+        
+        # Check if the current question might need context (references like "it", "that", "previous", "earlier", etc.)
+        needs_context = False
+        if is_resumed:
+            current_prompt_lower = userInput.prompt.lower()
+            context_keywords = ["es", "das", "dieses", "jenes", "vorherige", "frühere", "vorhin", "oben", 
+                               "it", "that", "this", "previous", "earlier", "above", "mentioned", "before",
+                               "davor", "dazu", "darauf", "damit", "davon"]
+            needs_context = any(keyword in current_prompt_lower for keyword in context_keywords)
+        
+        if is_resumed and needs_context:
+            recent_messages = workflow.messages[-3:]  # Reduced from 5 to 3 for less distraction
+            context = "\n\n⚠️ WICHTIG - KONTEXT NUR FÜR REFERENZ ⚠️\n"
+            context += "Die folgende Konversation ist nur als Referenz, falls die aktuelle Frage darauf Bezug nimmt.\n"
+            context += "FOKUSSIERE AUF DIE AKTUELLE FRAGE OBEN!\n\n"
+            context += "Vorherige Konversation:\n"
            for msg in recent_messages:
                if msg.role == "user":
                    context += f"User: {msg.message}\n"
@ -873,7 +913,7 @@ async def _processChatbotMessage(
        logger.info("Analyzing user input to generate queries...")
        await _emit_log_and_event(interfaceDbChat, workflowId, event_manager, "Analysiere Benutzeranfrage...")
        
-        analysisPrompt = get_initial_analysis_prompt(userInput.prompt, context)
+        analysisPrompt = get_initial_analysis_prompt(userInput.prompt, context, is_resumed)
        
        # AI call for analysis
        method_ai = MethodAi(services)
@ -1142,7 +1182,8 @@ async def _processChatbotMessage(
                        retry_context += "- COUNT-Query: Wie viele Netzgeräte gibt es insgesamt?\n"
                        retry_context += "- Suche nach ALLEN verfügbaren Netzgeräten\n"
                    
-                    retry_analysis_prompt = get_initial_analysis_prompt(userInput.prompt, retry_context)
+                    # Retry analysis is always part of an ongoing chat, so use is_resumed=True
+                    retry_analysis_prompt = get_initial_analysis_prompt(userInput.prompt, retry_context, is_resumed=True)
                    
                    # AI call for retry analysis
                    retry_analysis_result = await method_ai.process({
@ -1368,7 +1409,8 @@ async def _processChatbotMessage(
        
        # Add web research results
        web_results_part = ""
-        if webResearchResults:
+        # Check if web research results exist and are valid (not empty and not an error)
+        if webResearchResults and webResearchResults.strip() and not webResearchResults.startswith("Web research error"):
            web_results_part = f"\n\nINTERNET-RECHERCHE:\n{webResearchResults}"
            answer_context_parts.append(f"INTERNET-RECHERCHE:\n{webResearchResults}\n")
        
@ -1430,12 +1472,19 @@ async def _processChatbotMessage(
        if warning_parts:
            db_results_part = db_results_part + "".join(warning_parts) if db_results_part else "".join(warning_parts)
        
+        # Determine if we have database results, web results, or both
+        has_db_results = bool(db_results_part and db_results_part.strip())
+        has_web_results = bool(web_results_part and web_results_part.strip())
+        
        # Use the function from constants file to build the prompt
        answer_prompt = get_final_answer_prompt_with_results(
            userInput.prompt,
            context,
            db_results_part,
-            web_results_part
+            web_results_part,
+            is_resumed,
+            has_db_results,
+            has_web_results
        )
        
        answerRequest = AiCallRequest(
@ -1449,9 +1498,14 @@ async def _processChatbotMessage(
        )
        
        answerResponse = await services.ai.callAi(answerRequest)
-        finalAnswer = answerResponse.content
        
-        logger.info("Final answer generated")
+        # Check for errors in AI response
+        if answerResponse.errorCount > 0:
+            logger.error(f"AI call failed with errorCount={answerResponse.errorCount}: {answerResponse.content}")
+            finalAnswer = "Entschuldigung, ich konnte Ihre Anfrage derzeit nicht verarbeiten. Bitte versuchen Sie es später erneut."
+        else:
+            finalAnswer = answerResponse.content
+            logger.info("Final answer generated")
        
        # Check if workflow was stopped during AI call - if so, don't store the message
        if await _check_workflow_stopped(interfaceDbChat, workflowId):
@ -1477,7 +1531,7 @@ async def _processChatbotMessage(
            "status": "last",
            "sequenceNr": len(workflow.messages) + 1,
            "publishedAt": getUtcTimestamp(),
-            "success": True,
+            "success": answerResponse.errorCount == 0 if answerResponse else True,
            "roundNumber": workflow.currentRound,
            "taskNumber": 0,
            "actionNumber": 0