gateway/modules/workflows/processing/adaptive/contentValidator.py

# contentValidator.py
# Content validation for adaptive React mode

import logging
import json
import re
from typing import List, Dict, Any

logger = logging.getLogger(__name__)

class ContentValidator:
    """Validates delivered content against user intent"""

    def __init__(self, services=None):
        self.services = services

    async def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
        """Validates delivered content against user intent using AI"""
        try:
            # Use AI for comprehensive validation
            return await self._validateWithAI(documents, intent)

        except Exception as e:
            logger.error(f"Error validating content: {str(e)}")
            return self._createFailedValidationResult(str(e))

    def _extractContent(self, doc: Any) -> str:
        """Extracts content from a document"""
        try:
            if hasattr(doc, 'documentData'):
                data = doc.documentData
                if isinstance(data, dict) and 'content' in data:
                    return str(data['content'])
                else:
                    return str(data)
            return ""
        except Exception:
            return ""

    def _createFailedValidationResult(self, error: str) -> Dict[str, Any]:
        """Creates a failed validation result in a schema-stable shape"""
        return {
            "overallSuccess": None,  # Unknown when validator itself failed
            "qualityScore": None,
            "validationDetails": [],
            "improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."],
            "schemaCompliant": False,
            "originalType": "error",
            "missingFields": ["overallSuccess", "qualityScore"],
        }

    def _isValidJsonResponse(self, response: str) -> bool:
        """Checks if response contains valid JSON structure"""
        try:
            import re
            # Look for JSON with expected structure
            json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', response, re.DOTALL)
            if json_match:
                json.loads(json_match.group(0))
                return True
            return False
        except:
            return False

    def _extractFallbackValidationResult(self, response: str) -> Dict[str, Any]:
        """Extracts a minimal validation result from a malformed AI response (schema-stable)"""
        try:
            import re

            # Extract key values using regex patterns
            overall_success = re.search(r'"overallSuccess"\s*:\s*(true|false)', response, re.IGNORECASE)
            quality_score = re.search(r'"qualityScore"\s*:\s*([0-9.]+)', response)
            gap_analysis = re.search(r'"gapAnalysis"\s*:\s*"([^"]*)"', response)

            # Determine overall success from context if not found
            if not overall_success:
                # Look for positive/negative indicators in the text
                if any(word in response.lower() for word in ['success', 'complete', 'fulfilled', 'satisfied']):
                    overall_success = True
                elif any(word in response.lower() for word in ['failed', 'incomplete', 'missing', 'error']):
                    overall_success = False
                else:
                    overall_success = False

            parsed_overall = overall_success if isinstance(overall_success, bool) else (overall_success.group(1).lower() == 'true' if overall_success else None)
            parsed_quality = float(quality_score.group(1)) if quality_score else None

            result = {
                "overallSuccess": parsed_overall,
                "qualityScore": parsed_quality,
                "validationDetails": [{
                    "documentName": "AI Validation (Fallback)",
                    "gapAnalysis": gap_analysis.group(1) if gap_analysis else "Unable to parse detailed analysis",
                    "successCriteriaMet": []
                }],
                "improvementSuggestions": ["NEXT STEP: AI response was malformed - retry the operation for better results"],
                "schemaCompliant": False,
                "originalType": "text",
                "missingFields": [k for k, v in {"overallSuccess": parsed_overall, "qualityScore": parsed_quality}.items() if v is None],
            }
            return result
        except Exception as e:
            logger.error(f"Fallback extraction failed: {str(e)}")
            return None

    async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
        """AI-based comprehensive validation - single main function"""
        try:
            if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
                return self._createFailedValidationResult("AI service not available")

            # Extract content from all documents
            documentContents = []
            for doc in documents:
                content = self._extractContent(doc)
                documentContents.append({
                    "name": getattr(doc, 'documentName', 'Unknown'),
                    "content": content[:2000]  # Limit content for AI processing
                })

            # Create comprehensive AI validation prompt
            validationPrompt = f"""
You are a comprehensive task completion validator. Analyze if the delivered content fulfills the user's request.

USER REQUEST: {intent.get('primaryGoal', 'Unknown')}
EXPECTED DATA TYPE: {intent.get('dataType', 'unknown')}
EXPECTED FORMAT: {intent.get('expectedFormat', 'unknown')}
SUCCESS CRITERIA: {intent.get('successCriteria', [])}

DELIVERED CONTENT:
{json.dumps(documentContents, indent=2)}

Perform comprehensive validation:
1. Check if content matches expected data type
2. Check if content matches expected format
3. Verify success criteria are met
4. Assess overall quality and completeness
5. Identify specific gaps and issues
6. Provide actionable next steps

CRITICAL: You MUST respond with ONLY the JSON object below. NO TEXT ANALYSIS. NO EXPLANATIONS. NO OTHER CONTENT.

RESPOND WITH THIS EXACT JSON FORMAT:

{{
    "overallSuccess": false,
    "qualityScore": 0.5,
    "dataTypeMatch": false,
    "formatMatch": false,
    "successCriteriaMet": [false, false],
    "gapAnalysis": "Content does not match expected format and lacks required elements",
    "improvementSuggestions": ["NEXT STEP: Create proper content in expected format", "NEXT STEP: Ensure all success criteria are met"],
    "validationDetails": [
        {{
            "documentName": "Content Validation",
            "issues": ["Format mismatch", "Missing required elements"],
            "suggestions": ["NEXT STEP: Fix format", "NEXT STEP: Add missing elements"]
        }}
    ]
}}
"""

            # Call AI service for validation
            from modules.datamodels.datamodelAi import AiCallOptions, OperationType
            request_options = AiCallOptions()
            request_options.operationType = OperationType.GENERAL

            response = await self.services.ai.callAi(
                prompt=validationPrompt,
                documents=None,
                options=request_options
            )

            # If first attempt fails, try with more explicit prompt
            if response and not self._isValidJsonResponse(response):
                logger.debug("First AI validation attempt failed, retrying with explicit JSON-only prompt")
                explicitPrompt = f"""
VALIDATE AND RETURN JSON ONLY - NO TEXT ANALYSIS

Request: {intent.get('primaryGoal', 'Unknown')}
Data Type: {intent.get('dataType', 'unknown')}
Format: {intent.get('expectedFormat', 'unknown')}
Criteria: {intent.get('successCriteria', [])}

Content: {json.dumps(documentContents, indent=2)}

RESPOND WITH THIS EXACT JSON FORMAT - NO OTHER TEXT:

{{
    "overallSuccess": false,
    "qualityScore": 0.3,
    "dataTypeMatch": false,
    "formatMatch": false,
    "successCriteriaMet": [false, false],
    "gapAnalysis": "Content does not match expected format and lacks required elements",
    "improvementSuggestions": ["NEXT STEP: Create proper content in expected format", "NEXT STEP: Ensure all success criteria are met"],
    "validationDetails": [
        {{
            "documentName": "Content Validation",
            "issues": ["Format mismatch", "Missing required elements"],
            "suggestions": ["NEXT STEP: Fix format", "NEXT STEP: Add missing elements"]
        }}
    ]
}}
"""
                response = await self.services.ai.callAi(
                    prompt=explicitPrompt,
                    documents=None,
                    options=request_options
                )

            if not response or not response.strip():
                logger.warning("AI validation returned empty response")
                return self._createFailedValidationResult("AI validation failed - empty response")

            # Clean and extract JSON from response
            result = response.strip()
            logger.debug(f"AI validation response length: {len(result)}")

            # Try to find JSON in the response with multiple strategies
            import re

            # Strategy 1: Look for JSON in markdown code blocks
            json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', result, re.DOTALL)
            if json_match:
                result = json_match.group(1)
                logger.debug(f"Extracted JSON from markdown code block: {result[:200]}...")
            else:
                # Strategy 2: Look for JSON object with proper structure
                json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', result, re.DOTALL)
                if not json_match:
                    # Strategy 3: Look for any JSON object
                    json_match = re.search(r'\{.*\}', result, re.DOTALL)

                if json_match:
                    result = json_match.group(0)
                    logger.debug(f"Extracted JSON directly: {result[:200]}...")
                else:
                    logger.debug(f"No JSON found in AI response, trying fallback extraction: {result[:200]}...")
                    logger.debug(f"Full AI response: {result}")

                    # Try fallback extraction for text responses
                    fallback_result = self._extractFallbackValidationResult(result)
                    if fallback_result:
                        logger.info("Using fallback text extraction for validation")
                        return fallback_result

                    logger.warning("All AI validation attempts failed - no JSON found and fallback extraction failed")
                    return self._createFailedValidationResult("AI validation failed - no JSON in response")

            try:
                aiResult = json.loads(result)
                logger.info("AI validation JSON parsed successfully")

                overall = aiResult.get("overallSuccess")
                quality = aiResult.get("qualityScore")
                details = aiResult.get("validationDetails")
                gap = aiResult.get("gapAnalysis", "")
                criteria = aiResult.get("successCriteriaMet")
                improvements = aiResult.get("improvementSuggestions", [])

                # Normalize into schema-stable object without forcing failure defaults
                normalized = {
                    "overallSuccess": overall if isinstance(overall, bool) else None,
                    "qualityScore": float(quality) if isinstance(quality, (int, float)) else None,
                    "validationDetails": details if isinstance(details, list) else [{
                        "documentName": "AI Validation",
                        "gapAnalysis": gap,
                        "successCriteriaMet": criteria if isinstance(criteria, list) else []
                    }],
                    "improvementSuggestions": improvements,
                    "schemaCompliant": True,
                    "originalType": "json",
                    "missingFields": []
                }

                if normalized["overallSuccess"] is None:
                    normalized["missingFields"].append("overallSuccess")
                if normalized["qualityScore"] is None:
                    normalized["missingFields"].append("qualityScore")
                # If any critical field missing, mark as not fully compliant
                if normalized["missingFields"]:
                    normalized["schemaCompliant"] = False

                return normalized

            except json.JSONDecodeError as json_error:
                logger.warning(f"All AI validation attempts failed - invalid JSON: {str(json_error)}")
                logger.debug(f"JSON content: {result}")

                # Try to extract key information from malformed response
                fallbackResult = self._extractFallbackValidationResult(result)
                if fallbackResult:
                    logger.info("Using fallback validation result from malformed JSON")
                    return fallbackResult

                return self._createFailedValidationResult(f"AI validation failed - invalid JSON: {str(json_error)}")

            return self._createFailedValidationResult("AI validation failed - no response")

        except Exception as e:
            logger.error(f"AI validation failed: {str(e)}")
            return self._createFailedValidationResult(f"AI validation error: {str(e)}")