gateway/modules/workflows/processing/adaptive/intentAnalyzer.py
2025-10-04 02:54:28 +02:00

239 lines
9.2 KiB
Python

# intentAnalyzer.py
# Intent analysis for adaptive React mode
import re
import logging
from typing import Dict, Any, List
from enum import Enum
logger = logging.getLogger(__name__)
class DataType(Enum):
NUMBERS = "numbers"
TEXT = "text"
DOCUMENTS = "documents"
ANALYSIS = "analysis"
CODE = "code"
UNKNOWN = "unknown"
class ExpectedFormat(Enum):
RAW_DATA = "raw_data"
FORMATTED = "formatted"
STRUCTURED = "structured"
VISUAL = "visual"
UNKNOWN = "unknown"
class IntentAnalyzer:
"""Analyzes user intent to understand what they actually want"""
def __init__(self):
self.dataTypePatterns = {
DataType.NUMBERS: [
r'\b(numbers?|digits?|count|list|sequence)\b',
r'\b(prime|fibonacci|random|even|odd)\s+(numbers?)\b',
r'\b(calculate|compute|generate)\s+(numbers?)\b',
r'\b(first|last)\s+\d+\s+(numbers?)\b'
],
DataType.TEXT: [
r'\b(text|content|words?|sentences?|paragraphs?)\b',
r'\b(write|create|generate)\s+(text|content)\b',
r'\b(summary|description|explanation)\b',
r'\b(article|essay|report)\b'
],
DataType.DOCUMENTS: [
r'\b(document|file|report|pdf|word|excel)\b',
r'\b(create|generate|make)\s+(document|file|report)\b',
r'\b(format|structure|organize)\s+(document)\b',
r'\b(presentation|slides?)\b'
],
DataType.ANALYSIS: [
r'\b(analyze|analysis|examine|study|evaluate)\b',
r'\b(insights?|findings?|results?)\b',
r'\b(compare|contrast|evaluate)\b',
r'\b(trends?|patterns?)\b'
],
DataType.CODE: [
r'\b(code|program|script|algorithm|function)\b',
r'\b(write|create|develop)\s+(code|program|script)\b',
r'\b(implement|build|construct)\b',
r'\b(debug|fix|optimize)\s+(code)\b'
]
}
self.formatPatterns = {
ExpectedFormat.RAW_DATA: [
r'\b(raw|plain|simple|basic)\b',
r'\b(numbers?|data|list)\b(?!\s+(in|as|with))',
r'\b(just|only)\s+(numbers?|data)\b'
],
ExpectedFormat.FORMATTED: [
r'\b(formatted|structured|organized|presented)\b',
r'\b(table|chart|graph|visual)\b',
r'\b(pretty|nice|clean)\s+(format|presentation)\b',
r'\b(professional|polished)\b'
],
ExpectedFormat.STRUCTURED: [
r'\b(json|xml|csv|structured)\b',
r'\b(organized|categorized|grouped)\b',
r'\b(systematic|methodical)\b',
r'\b(database|spreadsheet)\b'
]
}
def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
"""Analyzes user intent from prompt and context"""
try:
# Extract primary goal
primaryGoal = self._extractPrimaryGoal(userPrompt)
# Classify data type
dataType = self._classifyDataType(userPrompt)
# Determine expected format
expectedFormat = self._determineExpectedFormat(userPrompt)
# Assess quality requirements
qualityRequirements = self._assessQualityRequirements(userPrompt, context)
# Extract success criteria
successCriteria = self._extractSuccessCriteria(userPrompt, context)
# Calculate confidence score
confidenceScore = self._calculateConfidenceScore(dataType, expectedFormat, successCriteria)
return {
"primaryGoal": primaryGoal,
"dataType": dataType.value,
"expectedFormat": expectedFormat.value,
"qualityRequirements": qualityRequirements,
"successCriteria": successCriteria,
"confidenceScore": confidenceScore
}
except Exception as e:
logger.error(f"Error analyzing user intent: {str(e)}")
return self._createDefaultIntentAnalysis(userPrompt)
def _extractPrimaryGoal(self, userPrompt: str) -> str:
"""Extracts the primary goal from user prompt"""
# Simple extraction - can be enhanced
return userPrompt.strip()
def _classifyDataType(self, userPrompt: str) -> DataType:
"""Classifies the type of data the user wants"""
promptLower = userPrompt.lower()
for dataType, patterns in self.dataTypePatterns.items():
for pattern in patterns:
if re.search(pattern, promptLower):
return dataType
return DataType.UNKNOWN
def _determineExpectedFormat(self, userPrompt: str) -> ExpectedFormat:
"""Determines the expected format of the output"""
promptLower = userPrompt.lower()
for formatType, patterns in self.formatPatterns.items():
for pattern in patterns:
if re.search(pattern, promptLower):
return formatType
return ExpectedFormat.UNKNOWN
def _assessQualityRequirements(self, userPrompt: str, context: Any) -> Dict[str, Any]:
"""Assesses quality requirements from prompt and context"""
promptLower = userPrompt.lower()
# Check for accuracy requirements
accuracyThreshold = 0.8
if any(word in promptLower for word in ['exact', 'precise', 'accurate', 'correct']):
accuracyThreshold = 0.95
elif any(word in promptLower for word in ['approximate', 'rough', 'estimate']):
accuracyThreshold = 0.7
# Check for completeness requirements
completenessThreshold = 0.8
if any(word in promptLower for word in ['complete', 'full', 'comprehensive', 'all']):
completenessThreshold = 0.95
elif any(word in promptLower for word in ['summary', 'brief', 'overview']):
completenessThreshold = 0.6
# Check for format requirements
formatRequirement = "any"
if any(word in promptLower for word in ['formatted', 'structured', 'organized']):
formatRequirement = "formatted"
elif any(word in promptLower for word in ['raw', 'plain', 'simple']):
formatRequirement = "raw"
return {
"accuracyThreshold": accuracyThreshold,
"completenessThreshold": completenessThreshold,
"formatRequirement": formatRequirement
}
def _extractSuccessCriteria(self, userPrompt: str, context: Any) -> List[str]:
"""Extracts success criteria from prompt and context"""
criteria = []
promptLower = userPrompt.lower()
# Extract explicit criteria
if 'first' in promptLower and 'numbers' in promptLower:
criteria.append("Contains the first N numbers as requested")
if 'prime' in promptLower:
criteria.append("Contains actual prime numbers, not code to generate them")
if 'document' in promptLower:
criteria.append("Creates a properly formatted document")
if 'format' in promptLower:
criteria.append("Content is properly formatted as requested")
# Add context-based criteria
if hasattr(context, 'task_step') and context.task_step:
taskObjective = context.task_step.objective.lower()
if 'word' in taskObjective:
criteria.append("Creates a Word document")
if 'excel' in taskObjective:
criteria.append("Creates an Excel spreadsheet")
return criteria if criteria else ["Delivers what the user requested"]
def _calculateConfidenceScore(self, dataType: DataType, expectedFormat: ExpectedFormat,
successCriteria: List[str]) -> float:
"""Calculates confidence score for the intent analysis"""
score = 0.0
# Data type confidence
if dataType != DataType.UNKNOWN:
score += 0.3
# Format confidence
if expectedFormat != ExpectedFormat.UNKNOWN:
score += 0.2
# Success criteria confidence
if len(successCriteria) > 0:
score += 0.3
# Additional confidence for specific patterns
if len(successCriteria) > 1:
score += 0.2
return min(score, 1.0)
def _createDefaultIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
"""Creates a default intent analysis when analysis fails"""
return {
"primaryGoal": userPrompt,
"dataType": "unknown",
"expectedFormat": "unknown",
"qualityRequirements": {
"accuracyThreshold": 0.8,
"completenessThreshold": 0.8,
"formatRequirement": "any"
},
"successCriteria": ["Delivers what the user requested"],
"confidenceScore": 0.1
}