239 lines
9.2 KiB
Python
239 lines
9.2 KiB
Python
# intentAnalyzer.py
|
|
# Intent analysis for adaptive React mode
|
|
|
|
import re
|
|
import logging
|
|
from typing import Dict, Any, List
|
|
from enum import Enum
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class DataType(Enum):
|
|
NUMBERS = "numbers"
|
|
TEXT = "text"
|
|
DOCUMENTS = "documents"
|
|
ANALYSIS = "analysis"
|
|
CODE = "code"
|
|
UNKNOWN = "unknown"
|
|
|
|
class ExpectedFormat(Enum):
|
|
RAW_DATA = "raw_data"
|
|
FORMATTED = "formatted"
|
|
STRUCTURED = "structured"
|
|
VISUAL = "visual"
|
|
UNKNOWN = "unknown"
|
|
|
|
class IntentAnalyzer:
|
|
"""Analyzes user intent to understand what they actually want"""
|
|
|
|
def __init__(self):
|
|
self.dataTypePatterns = {
|
|
DataType.NUMBERS: [
|
|
r'\b(numbers?|digits?|count|list|sequence)\b',
|
|
r'\b(prime|fibonacci|random|even|odd)\s+(numbers?)\b',
|
|
r'\b(calculate|compute|generate)\s+(numbers?)\b',
|
|
r'\b(first|last)\s+\d+\s+(numbers?)\b'
|
|
],
|
|
DataType.TEXT: [
|
|
r'\b(text|content|words?|sentences?|paragraphs?)\b',
|
|
r'\b(write|create|generate)\s+(text|content)\b',
|
|
r'\b(summary|description|explanation)\b',
|
|
r'\b(article|essay|report)\b'
|
|
],
|
|
DataType.DOCUMENTS: [
|
|
r'\b(document|file|report|pdf|word|excel)\b',
|
|
r'\b(create|generate|make)\s+(document|file|report)\b',
|
|
r'\b(format|structure|organize)\s+(document)\b',
|
|
r'\b(presentation|slides?)\b'
|
|
],
|
|
DataType.ANALYSIS: [
|
|
r'\b(analyze|analysis|examine|study|evaluate)\b',
|
|
r'\b(insights?|findings?|results?)\b',
|
|
r'\b(compare|contrast|evaluate)\b',
|
|
r'\b(trends?|patterns?)\b'
|
|
],
|
|
DataType.CODE: [
|
|
r'\b(code|program|script|algorithm|function)\b',
|
|
r'\b(write|create|develop)\s+(code|program|script)\b',
|
|
r'\b(implement|build|construct)\b',
|
|
r'\b(debug|fix|optimize)\s+(code)\b'
|
|
]
|
|
}
|
|
|
|
self.formatPatterns = {
|
|
ExpectedFormat.RAW_DATA: [
|
|
r'\b(raw|plain|simple|basic)\b',
|
|
r'\b(numbers?|data|list)\b(?!\s+(in|as|with))',
|
|
r'\b(just|only)\s+(numbers?|data)\b'
|
|
],
|
|
ExpectedFormat.FORMATTED: [
|
|
r'\b(formatted|structured|organized|presented)\b',
|
|
r'\b(table|chart|graph|visual)\b',
|
|
r'\b(pretty|nice|clean)\s+(format|presentation)\b',
|
|
r'\b(professional|polished)\b'
|
|
],
|
|
ExpectedFormat.STRUCTURED: [
|
|
r'\b(json|xml|csv|structured)\b',
|
|
r'\b(organized|categorized|grouped)\b',
|
|
r'\b(systematic|methodical)\b',
|
|
r'\b(database|spreadsheet)\b'
|
|
]
|
|
}
|
|
|
|
def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
|
|
"""Analyzes user intent from prompt and context"""
|
|
try:
|
|
# Extract primary goal
|
|
primaryGoal = self._extractPrimaryGoal(userPrompt)
|
|
|
|
# Classify data type
|
|
dataType = self._classifyDataType(userPrompt)
|
|
|
|
# Determine expected format
|
|
expectedFormat = self._determineExpectedFormat(userPrompt)
|
|
|
|
# Assess quality requirements
|
|
qualityRequirements = self._assessQualityRequirements(userPrompt, context)
|
|
|
|
# Extract success criteria
|
|
successCriteria = self._extractSuccessCriteria(userPrompt, context)
|
|
|
|
# Calculate confidence score
|
|
confidenceScore = self._calculateConfidenceScore(dataType, expectedFormat, successCriteria)
|
|
|
|
return {
|
|
"primaryGoal": primaryGoal,
|
|
"dataType": dataType.value,
|
|
"expectedFormat": expectedFormat.value,
|
|
"qualityRequirements": qualityRequirements,
|
|
"successCriteria": successCriteria,
|
|
"confidenceScore": confidenceScore
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error analyzing user intent: {str(e)}")
|
|
return self._createDefaultIntentAnalysis(userPrompt)
|
|
|
|
def _extractPrimaryGoal(self, userPrompt: str) -> str:
|
|
"""Extracts the primary goal from user prompt"""
|
|
# Simple extraction - can be enhanced
|
|
return userPrompt.strip()
|
|
|
|
def _classifyDataType(self, userPrompt: str) -> DataType:
|
|
"""Classifies the type of data the user wants"""
|
|
promptLower = userPrompt.lower()
|
|
|
|
for dataType, patterns in self.dataTypePatterns.items():
|
|
for pattern in patterns:
|
|
if re.search(pattern, promptLower):
|
|
return dataType
|
|
|
|
return DataType.UNKNOWN
|
|
|
|
def _determineExpectedFormat(self, userPrompt: str) -> ExpectedFormat:
|
|
"""Determines the expected format of the output"""
|
|
promptLower = userPrompt.lower()
|
|
|
|
for formatType, patterns in self.formatPatterns.items():
|
|
for pattern in patterns:
|
|
if re.search(pattern, promptLower):
|
|
return formatType
|
|
|
|
return ExpectedFormat.UNKNOWN
|
|
|
|
def _assessQualityRequirements(self, userPrompt: str, context: Any) -> Dict[str, Any]:
|
|
"""Assesses quality requirements from prompt and context"""
|
|
promptLower = userPrompt.lower()
|
|
|
|
# Check for accuracy requirements
|
|
accuracyThreshold = 0.8
|
|
if any(word in promptLower for word in ['exact', 'precise', 'accurate', 'correct']):
|
|
accuracyThreshold = 0.95
|
|
elif any(word in promptLower for word in ['approximate', 'rough', 'estimate']):
|
|
accuracyThreshold = 0.7
|
|
|
|
# Check for completeness requirements
|
|
completenessThreshold = 0.8
|
|
if any(word in promptLower for word in ['complete', 'full', 'comprehensive', 'all']):
|
|
completenessThreshold = 0.95
|
|
elif any(word in promptLower for word in ['summary', 'brief', 'overview']):
|
|
completenessThreshold = 0.6
|
|
|
|
# Check for format requirements
|
|
formatRequirement = "any"
|
|
if any(word in promptLower for word in ['formatted', 'structured', 'organized']):
|
|
formatRequirement = "formatted"
|
|
elif any(word in promptLower for word in ['raw', 'plain', 'simple']):
|
|
formatRequirement = "raw"
|
|
|
|
return {
|
|
"accuracyThreshold": accuracyThreshold,
|
|
"completenessThreshold": completenessThreshold,
|
|
"formatRequirement": formatRequirement
|
|
}
|
|
|
|
def _extractSuccessCriteria(self, userPrompt: str, context: Any) -> List[str]:
|
|
"""Extracts success criteria from prompt and context"""
|
|
criteria = []
|
|
promptLower = userPrompt.lower()
|
|
|
|
# Extract explicit criteria
|
|
if 'first' in promptLower and 'numbers' in promptLower:
|
|
criteria.append("Contains the first N numbers as requested")
|
|
|
|
if 'prime' in promptLower:
|
|
criteria.append("Contains actual prime numbers, not code to generate them")
|
|
|
|
if 'document' in promptLower:
|
|
criteria.append("Creates a properly formatted document")
|
|
|
|
if 'format' in promptLower:
|
|
criteria.append("Content is properly formatted as requested")
|
|
|
|
# Add context-based criteria
|
|
if hasattr(context, 'task_step') and context.task_step:
|
|
taskObjective = context.task_step.objective.lower()
|
|
if 'word' in taskObjective:
|
|
criteria.append("Creates a Word document")
|
|
if 'excel' in taskObjective:
|
|
criteria.append("Creates an Excel spreadsheet")
|
|
|
|
return criteria if criteria else ["Delivers what the user requested"]
|
|
|
|
def _calculateConfidenceScore(self, dataType: DataType, expectedFormat: ExpectedFormat,
|
|
successCriteria: List[str]) -> float:
|
|
"""Calculates confidence score for the intent analysis"""
|
|
score = 0.0
|
|
|
|
# Data type confidence
|
|
if dataType != DataType.UNKNOWN:
|
|
score += 0.3
|
|
|
|
# Format confidence
|
|
if expectedFormat != ExpectedFormat.UNKNOWN:
|
|
score += 0.2
|
|
|
|
# Success criteria confidence
|
|
if len(successCriteria) > 0:
|
|
score += 0.3
|
|
|
|
# Additional confidence for specific patterns
|
|
if len(successCriteria) > 1:
|
|
score += 0.2
|
|
|
|
return min(score, 1.0)
|
|
|
|
def _createDefaultIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
|
|
"""Creates a default intent analysis when analysis fails"""
|
|
return {
|
|
"primaryGoal": userPrompt,
|
|
"dataType": "unknown",
|
|
"expectedFormat": "unknown",
|
|
"qualityRequirements": {
|
|
"accuracyThreshold": 0.8,
|
|
"completenessThreshold": 0.8,
|
|
"formatRequirement": "any"
|
|
},
|
|
"successCriteria": ["Delivers what the user requested"],
|
|
"confidenceScore": 0.1
|
|
}
|