gateway/modules/workflows/processing/adaptive/intentAnalyzer.py

# intentAnalyzer.py
# Intent analysis for adaptive React mode

import re
import logging
from typing import Dict, Any, List
from enum import Enum

logger = logging.getLogger(__name__)

class DataType(Enum):
    NUMBERS = "numbers"
    TEXT = "text"
    DOCUMENTS = "documents"
    ANALYSIS = "analysis"
    CODE = "code"
    UNKNOWN = "unknown"

class ExpectedFormat(Enum):
    RAW_DATA = "raw_data"
    FORMATTED = "formatted"
    STRUCTURED = "structured"
    VISUAL = "visual"
    UNKNOWN = "unknown"

class IntentAnalyzer:
    """Analyzes user intent to understand what they actually want"""

    def __init__(self):
        self.dataTypePatterns = {
            DataType.NUMBERS: [
                r'\b(numbers?|digits?|count|list|sequence)\b',
                r'\b(prime|fibonacci|random|even|odd)\s+(numbers?)\b',
                r'\b(calculate|compute|generate)\s+(numbers?)\b',
                r'\b(first|last)\s+\d+\s+(numbers?)\b'
            ],
            DataType.TEXT: [
                r'\b(text|content|words?|sentences?|paragraphs?)\b',
                r'\b(write|create|generate)\s+(text|content)\b',
                r'\b(summary|description|explanation)\b',
                r'\b(article|essay|report)\b'
            ],
            DataType.DOCUMENTS: [
                r'\b(document|file|report|pdf|word|excel)\b',
                r'\b(create|generate|make)\s+(document|file|report)\b',
                r'\b(format|structure|organize)\s+(document)\b',
                r'\b(presentation|slides?)\b'
            ],
            DataType.ANALYSIS: [
                r'\b(analyze|analysis|examine|study|evaluate)\b',
                r'\b(insights?|findings?|results?)\b',
                r'\b(compare|contrast|evaluate)\b',
                r'\b(trends?|patterns?)\b'
            ],
            DataType.CODE: [
                r'\b(code|program|script|algorithm|function)\b',
                r'\b(write|create|develop)\s+(code|program|script)\b',
                r'\b(implement|build|construct)\b',
                r'\b(debug|fix|optimize)\s+(code)\b'
            ]
        }

        self.formatPatterns = {
            ExpectedFormat.RAW_DATA: [
                r'\b(raw|plain|simple|basic)\b',
                r'\b(numbers?|data|list)\b(?!\s+(in|as|with))',
                r'\b(just|only)\s+(numbers?|data)\b'
            ],
            ExpectedFormat.FORMATTED: [
                r'\b(formatted|structured|organized|presented)\b',
                r'\b(table|chart|graph|visual)\b',
                r'\b(pretty|nice|clean)\s+(format|presentation)\b',
                r'\b(professional|polished)\b'
            ],
            ExpectedFormat.STRUCTURED: [
                r'\b(json|xml|csv|structured)\b',
                r'\b(organized|categorized|grouped)\b',
                r'\b(systematic|methodical)\b',
                r'\b(database|spreadsheet)\b'
            ]
        }

    def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
        """Analyzes user intent from prompt and context"""
        try:
            # Extract primary goal
            primaryGoal = self._extractPrimaryGoal(userPrompt)

            # Classify data type
            dataType = self._classifyDataType(userPrompt)

            # Determine expected format
            expectedFormat = self._determineExpectedFormat(userPrompt)

            # Assess quality requirements
            qualityRequirements = self._assessQualityRequirements(userPrompt, context)

            # Extract success criteria
            successCriteria = self._extractSuccessCriteria(userPrompt, context)

            # Calculate confidence score
            confidenceScore = self._calculateConfidenceScore(dataType, expectedFormat, successCriteria)

            return {
                "primaryGoal": primaryGoal,
                "dataType": dataType.value,
                "expectedFormat": expectedFormat.value,
                "qualityRequirements": qualityRequirements,
                "successCriteria": successCriteria,
                "confidenceScore": confidenceScore
            }

        except Exception as e:
            logger.error(f"Error analyzing user intent: {str(e)}")
            return self._createDefaultIntentAnalysis(userPrompt)

    def _extractPrimaryGoal(self, userPrompt: str) -> str:
        """Extracts the primary goal from user prompt"""
        # Simple extraction - can be enhanced
        return userPrompt.strip()

    def _classifyDataType(self, userPrompt: str) -> DataType:
        """Classifies the type of data the user wants"""
        promptLower = userPrompt.lower()

        for dataType, patterns in self.dataTypePatterns.items():
            for pattern in patterns:
                if re.search(pattern, promptLower):
                    return dataType

        return DataType.UNKNOWN

    def _determineExpectedFormat(self, userPrompt: str) -> ExpectedFormat:
        """Determines the expected format of the output"""
        promptLower = userPrompt.lower()

        for formatType, patterns in self.formatPatterns.items():
            for pattern in patterns:
                if re.search(pattern, promptLower):
                    return formatType

        return ExpectedFormat.UNKNOWN

    def _assessQualityRequirements(self, userPrompt: str, context: Any) -> Dict[str, Any]:
        """Assesses quality requirements from prompt and context"""
        promptLower = userPrompt.lower()

        # Check for accuracy requirements
        accuracyThreshold = 0.8
        if any(word in promptLower for word in ['exact', 'precise', 'accurate', 'correct']):
            accuracyThreshold = 0.95
        elif any(word in promptLower for word in ['approximate', 'rough', 'estimate']):
            accuracyThreshold = 0.7

        # Check for completeness requirements
        completenessThreshold = 0.8
        if any(word in promptLower for word in ['complete', 'full', 'comprehensive', 'all']):
            completenessThreshold = 0.95
        elif any(word in promptLower for word in ['summary', 'brief', 'overview']):
            completenessThreshold = 0.6

        # Check for format requirements
        formatRequirement = "any"
        if any(word in promptLower for word in ['formatted', 'structured', 'organized']):
            formatRequirement = "formatted"
        elif any(word in promptLower for word in ['raw', 'plain', 'simple']):
            formatRequirement = "raw"

        return {
            "accuracyThreshold": accuracyThreshold,
            "completenessThreshold": completenessThreshold,
            "formatRequirement": formatRequirement
        }

    def _extractSuccessCriteria(self, userPrompt: str, context: Any) -> List[str]:
        """Extracts success criteria from prompt and context"""
        criteria = []
        promptLower = userPrompt.lower()

        # Extract explicit criteria
        if 'first' in promptLower and 'numbers' in promptLower:
            criteria.append("Contains the first N numbers as requested")

        if 'prime' in promptLower:
            criteria.append("Contains actual prime numbers, not code to generate them")

        if 'document' in promptLower:
            criteria.append("Creates a properly formatted document")

        if 'format' in promptLower:
            criteria.append("Content is properly formatted as requested")

        # Add context-based criteria
        if hasattr(context, 'task_step') and context.task_step:
            taskObjective = context.task_step.objective.lower()
            if 'word' in taskObjective:
                criteria.append("Creates a Word document")
            if 'excel' in taskObjective:
                criteria.append("Creates an Excel spreadsheet")

        return criteria if criteria else ["Delivers what the user requested"]

    def _calculateConfidenceScore(self, dataType: DataType, expectedFormat: ExpectedFormat,
                                successCriteria: List[str]) -> float:
        """Calculates confidence score for the intent analysis"""
        score = 0.0

        # Data type confidence
        if dataType != DataType.UNKNOWN:
            score += 0.3

        # Format confidence
        if expectedFormat != ExpectedFormat.UNKNOWN:
            score += 0.2

        # Success criteria confidence
        if len(successCriteria) > 0:
            score += 0.3

        # Additional confidence for specific patterns
        if len(successCriteria) > 1:
            score += 0.2

        return min(score, 1.0)

    def _createDefaultIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
        """Creates a default intent analysis when analysis fails"""
        return {
            "primaryGoal": userPrompt,
            "dataType": "unknown",
            "expectedFormat": "unknown",
            "qualityRequirements": {
                "accuracyThreshold": 0.8,
                "completenessThreshold": 0.8,
                "formatRequirement": "any"
            },
            "successCriteria": ["Delivers what the user requested"],
            "confidenceScore": 0.1
        }