gateway/modules/workflows/processing/adaptive/intentAnalyzer.py
2025-11-03 23:51:20 +01:00

157 lines
7 KiB
Python

# intentAnalyzer.py
# Intent analysis for adaptive Dynamic mode - AI-based, language-agnostic
import json
import logging
from typing import Dict, Any, List
logger = logging.getLogger(__name__)
class IntentAnalyzer:
"""Analyzes user intent using AI - language-agnostic and generic"""
def __init__(self, services=None):
self.services = services
async def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
"""Analyzes user intent from prompt and context using AI (single attempt, no fallbacks)"""
aiAnalysis = await self._analyzeIntentWithAI(userPrompt, context)
if not aiAnalysis:
raise ValueError("AI intent analysis failed: empty or invalid response")
return aiAnalysis
async def _analyzeIntentWithAI(self, userPrompt: str, context: Any) -> Dict[str, Any]:
"""Uses AI to analyze user intent - language-agnostic"""
try:
if not self.services or not hasattr(self.services, 'ai'):
return None
# Create AI analysis prompt
# Determine if we're in task context (have taskStep) or workflow context
isTaskContext = hasattr(context, 'taskStep') and context.taskStep is not None
contextObjective = getattr(context.taskStep, 'objective', '') if isTaskContext else ''
# Use appropriate label based on context
if isTaskContext:
# Task context: use OBJECTIVE label and only task objective
requestLabel = "OBJECTIVE"
contextInfo = f"OBJECTIVE: {self.services.utils.sanitizePromptContent(contextObjective, 'userinput')}"
else:
# Workflow context: use USER REQUEST label
requestLabel = "USER REQUEST"
contextInfo = f"CONTEXT: {self.services.utils.sanitizePromptContent(contextObjective, 'userinput') if contextObjective else 'None'}"
analysisPrompt = f"""
You are an intent analyzer. Analyze the user's request to understand what they want delivered.
{requestLabel}: {self.services.utils.sanitizePromptContent(userPrompt, 'userinput')}
{contextInfo}
Analyze the user's intent and determine:
1. What type of data/content they want (numbers, text, documents, analysis, code, etc.)
2. What file format(s) they expect - provide matching file format extensions list
- If multiple formats requested, list all of them (e.g., ["xlsx", "pdf"])
- If format is unclear or not specified, use empty list []
3. What quality requirements they have (accuracy, completeness)
4. What specific success criteria define completion
5. What language the user is communicating in (detect from the user request)
CRITICAL: Respond with ONLY the JSON object below. Do not include any explanatory text, analysis, or other content before or after the JSON.
{{
"primaryGoal": "The main objective the user wants to achieve",
"dataType": "numbers|text|documents|analysis|code|unknown",
"expectedFormats": ["pdf", "docx", "xlsx", "txt", "json", "csv", "html", "md"],
"qualityRequirements": {{
"accuracyThreshold": 0.0-1.0,
"completenessThreshold": 0.0-1.0
}},
"successCriteria": ["specific criterion 1", "specific criterion 2"],
"languageUserDetected": "en",
"confidenceScore": 0.0-1.0
}}
"""
# Call AI service for analysis
response = await self.services.ai.callAiPlanning(
prompt=analysisPrompt,
placeholders=None,
debugType="intentanalysis"
)
# No retries or correction prompts here; parse-or-fail below
if not response or not response.strip():
logger.warning("AI intent analysis returned empty response")
return None
# Clean and extract JSON from response
result = response.strip()
logger.debug(f"AI intent analysis response length: {len(result)}")
# Try to find JSON in the response with multiple strategies
import re
# Strategy 1: Look for JSON in markdown code blocks
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', result, re.DOTALL)
if json_match:
result = json_match.group(1)
logger.debug(f"Extracted JSON from markdown code block: {result[:200]}...")
else:
# Strategy 2: Look for JSON object with proper structure
json_match = re.search(r'\{[^{}]*"primaryGoal"[^{}]*\}', result, re.DOTALL)
if not json_match:
# Strategy 3: Look for any JSON object
json_match = re.search(r'\{.*\}', result, re.DOTALL)
if not json_match:
logger.warning(f"AI intent analysis failed - no JSON found in response: {result[:200]}...")
logger.debug(f"Full AI response: {result}")
return None
result = json_match.group(0)
logger.debug(f"Extracted JSON directly: {result[:200]}...")
try:
aiResult = json.loads(result)
logger.info("AI intent analysis JSON parsed successfully")
# Set language only if currentUserLanguage is empty
detected_lang = (aiResult.get('languageUserDetected') or '').strip()
if detected_lang and detected_lang.lower() != 'unknown' and self.services.currentUserLanguage == "":
self.services.currentUserLanguage = detected_lang
logger.info(f"Set currentUserLanguage from intent: {detected_lang}")
# Also set services.user.language if it's empty
if self.services.user and not self.services.user.language:
self.services.user.language = detected_lang
logger.info(f"Set services.user.language from intent: {detected_lang}")
return aiResult
except json.JSONDecodeError as json_error:
logger.warning(f"AI intent analysis invalid JSON: {str(json_error)}")
logger.debug(f"JSON content: {result}")
return None
return None
except Exception as e:
logger.error(f"AI intent analysis failed: {str(e)}")
return None
def _isValidJsonResponse(self, response: str) -> bool:
"""Checks if response contains valid JSON structure"""
try:
import re
# Look for JSON with expected structure
json_match = re.search(r'\{[^{}]*"primaryGoal"[^{}]*\}', response, re.DOTALL)
if json_match:
json.loads(json_match.group(0))
return True
return False
except:
return False