303 lines
No EOL
14 KiB
Python
303 lines
No EOL
14 KiB
Python
# contentValidator.py
|
|
# Content validation for adaptive React mode
|
|
|
|
import logging
|
|
import json
|
|
import re
|
|
from typing import List, Dict, Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class ContentValidator:
|
|
"""Validates delivered content against user intent"""
|
|
|
|
def __init__(self, services=None):
|
|
self.services = services
|
|
|
|
async def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Validates delivered content against user intent using AI"""
|
|
try:
|
|
# Use AI for comprehensive validation
|
|
return await self._validateWithAI(documents, intent)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error validating content: {str(e)}")
|
|
return self._createFailedValidationResult(str(e))
|
|
|
|
def _extractContent(self, doc: Any) -> str:
|
|
"""Extracts content from a document"""
|
|
try:
|
|
if hasattr(doc, 'documentData'):
|
|
data = doc.documentData
|
|
if isinstance(data, dict) and 'content' in data:
|
|
return str(data['content'])
|
|
else:
|
|
return str(data)
|
|
return ""
|
|
except Exception:
|
|
return ""
|
|
|
|
def _createFailedValidationResult(self, error: str) -> Dict[str, Any]:
|
|
"""Creates a failed validation result in a schema-stable shape"""
|
|
return {
|
|
"overallSuccess": None, # Unknown when validator itself failed
|
|
"qualityScore": None,
|
|
"validationDetails": [],
|
|
"improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."],
|
|
"schemaCompliant": False,
|
|
"originalType": "error",
|
|
"missingFields": ["overallSuccess", "qualityScore"],
|
|
}
|
|
|
|
def _isValidJsonResponse(self, response: str) -> bool:
|
|
"""Checks if response contains valid JSON structure"""
|
|
try:
|
|
import re
|
|
# Look for JSON with expected structure
|
|
json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', response, re.DOTALL)
|
|
if json_match:
|
|
json.loads(json_match.group(0))
|
|
return True
|
|
return False
|
|
except:
|
|
return False
|
|
|
|
def _extractFallbackValidationResult(self, response: str) -> Dict[str, Any]:
|
|
"""Extracts a minimal validation result from a malformed AI response (schema-stable)"""
|
|
try:
|
|
import re
|
|
|
|
# Extract key values using regex patterns
|
|
overall_success = re.search(r'"overallSuccess"\s*:\s*(true|false)', response, re.IGNORECASE)
|
|
quality_score = re.search(r'"qualityScore"\s*:\s*([0-9.]+)', response)
|
|
gap_analysis = re.search(r'"gapAnalysis"\s*:\s*"([^"]*)"', response)
|
|
|
|
# Determine overall success from context if not found
|
|
if not overall_success:
|
|
# Look for positive/negative indicators in the text
|
|
if any(word in response.lower() for word in ['success', 'complete', 'fulfilled', 'satisfied']):
|
|
overall_success = True
|
|
elif any(word in response.lower() for word in ['failed', 'incomplete', 'missing', 'error']):
|
|
overall_success = False
|
|
else:
|
|
overall_success = False
|
|
|
|
parsed_overall = overall_success if isinstance(overall_success, bool) else (overall_success.group(1).lower() == 'true' if overall_success else None)
|
|
parsed_quality = float(quality_score.group(1)) if quality_score else None
|
|
|
|
result = {
|
|
"overallSuccess": parsed_overall,
|
|
"qualityScore": parsed_quality,
|
|
"validationDetails": [{
|
|
"documentName": "AI Validation (Fallback)",
|
|
"gapAnalysis": gap_analysis.group(1) if gap_analysis else "Unable to parse detailed analysis",
|
|
"successCriteriaMet": []
|
|
}],
|
|
"improvementSuggestions": ["NEXT STEP: AI response was malformed - retry the operation for better results"],
|
|
"schemaCompliant": False,
|
|
"originalType": "text",
|
|
"missingFields": [k for k, v in {"overallSuccess": parsed_overall, "qualityScore": parsed_quality}.items() if v is None],
|
|
}
|
|
return result
|
|
except Exception as e:
|
|
logger.error(f"Fallback extraction failed: {str(e)}")
|
|
return None
|
|
|
|
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""AI-based comprehensive validation - single main function"""
|
|
try:
|
|
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
|
|
return self._createFailedValidationResult("AI service not available")
|
|
|
|
# Extract content from all documents
|
|
documentContents = []
|
|
for doc in documents:
|
|
content = self._extractContent(doc)
|
|
documentContents.append({
|
|
"name": getattr(doc, 'documentName', 'Unknown'),
|
|
"content": content[:2000] # Limit content for AI processing
|
|
})
|
|
|
|
# Create comprehensive AI validation prompt
|
|
validationPrompt = f"""
|
|
You are a comprehensive task completion validator. Analyze if the delivered content fulfills the user's request.
|
|
|
|
USER REQUEST: {intent.get('primaryGoal', 'Unknown')}
|
|
EXPECTED DATA TYPE: {intent.get('dataType', 'unknown')}
|
|
EXPECTED FORMAT: {intent.get('expectedFormat', 'unknown')}
|
|
SUCCESS CRITERIA: {intent.get('successCriteria', [])}
|
|
|
|
DELIVERED CONTENT:
|
|
{json.dumps(documentContents, indent=2)}
|
|
|
|
Perform comprehensive validation:
|
|
1. Check if content matches expected data type
|
|
2. Check if content matches expected format
|
|
3. Verify success criteria are met
|
|
4. Assess overall quality and completeness
|
|
5. Identify specific gaps and issues
|
|
6. Provide actionable next steps
|
|
|
|
CRITICAL: You MUST respond with ONLY the JSON object below. NO TEXT ANALYSIS. NO EXPLANATIONS. NO OTHER CONTENT.
|
|
|
|
RESPOND WITH THIS EXACT JSON FORMAT:
|
|
|
|
{{
|
|
"overallSuccess": false,
|
|
"qualityScore": 0.5,
|
|
"dataTypeMatch": false,
|
|
"formatMatch": false,
|
|
"successCriteriaMet": [false, false],
|
|
"gapAnalysis": "Content does not match expected format and lacks required elements",
|
|
"improvementSuggestions": ["NEXT STEP: Create proper content in expected format", "NEXT STEP: Ensure all success criteria are met"],
|
|
"validationDetails": [
|
|
{{
|
|
"documentName": "Content Validation",
|
|
"issues": ["Format mismatch", "Missing required elements"],
|
|
"suggestions": ["NEXT STEP: Fix format", "NEXT STEP: Add missing elements"]
|
|
}}
|
|
]
|
|
}}
|
|
"""
|
|
|
|
# Call AI service for validation
|
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationType
|
|
request_options = AiCallOptions()
|
|
request_options.operationType = OperationType.GENERAL
|
|
|
|
response = await self.services.ai.callAi(
|
|
prompt=validationPrompt,
|
|
documents=None,
|
|
options=request_options
|
|
)
|
|
|
|
# If first attempt fails, try with more explicit prompt
|
|
if response and not self._isValidJsonResponse(response):
|
|
logger.debug("First AI validation attempt failed, retrying with explicit JSON-only prompt")
|
|
explicitPrompt = f"""
|
|
VALIDATE AND RETURN JSON ONLY - NO TEXT ANALYSIS
|
|
|
|
Request: {intent.get('primaryGoal', 'Unknown')}
|
|
Data Type: {intent.get('dataType', 'unknown')}
|
|
Format: {intent.get('expectedFormat', 'unknown')}
|
|
Criteria: {intent.get('successCriteria', [])}
|
|
|
|
Content: {json.dumps(documentContents, indent=2)}
|
|
|
|
RESPOND WITH THIS EXACT JSON FORMAT - NO OTHER TEXT:
|
|
|
|
{{
|
|
"overallSuccess": false,
|
|
"qualityScore": 0.3,
|
|
"dataTypeMatch": false,
|
|
"formatMatch": false,
|
|
"successCriteriaMet": [false, false],
|
|
"gapAnalysis": "Content does not match expected format and lacks required elements",
|
|
"improvementSuggestions": ["NEXT STEP: Create proper content in expected format", "NEXT STEP: Ensure all success criteria are met"],
|
|
"validationDetails": [
|
|
{{
|
|
"documentName": "Content Validation",
|
|
"issues": ["Format mismatch", "Missing required elements"],
|
|
"suggestions": ["NEXT STEP: Fix format", "NEXT STEP: Add missing elements"]
|
|
}}
|
|
]
|
|
}}
|
|
"""
|
|
response = await self.services.ai.callAi(
|
|
prompt=explicitPrompt,
|
|
documents=None,
|
|
options=request_options
|
|
)
|
|
|
|
if not response or not response.strip():
|
|
logger.warning("AI validation returned empty response")
|
|
return self._createFailedValidationResult("AI validation failed - empty response")
|
|
|
|
# Clean and extract JSON from response
|
|
result = response.strip()
|
|
logger.debug(f"AI validation response length: {len(result)}")
|
|
|
|
# Try to find JSON in the response with multiple strategies
|
|
import re
|
|
|
|
# Strategy 1: Look for JSON in markdown code blocks
|
|
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', result, re.DOTALL)
|
|
if json_match:
|
|
result = json_match.group(1)
|
|
logger.debug(f"Extracted JSON from markdown code block: {result[:200]}...")
|
|
else:
|
|
# Strategy 2: Look for JSON object with proper structure
|
|
json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', result, re.DOTALL)
|
|
if not json_match:
|
|
# Strategy 3: Look for any JSON object
|
|
json_match = re.search(r'\{.*\}', result, re.DOTALL)
|
|
|
|
if json_match:
|
|
result = json_match.group(0)
|
|
logger.debug(f"Extracted JSON directly: {result[:200]}...")
|
|
else:
|
|
logger.debug(f"No JSON found in AI response, trying fallback extraction: {result[:200]}...")
|
|
logger.debug(f"Full AI response: {result}")
|
|
|
|
# Try fallback extraction for text responses
|
|
fallback_result = self._extractFallbackValidationResult(result)
|
|
if fallback_result:
|
|
logger.info("Using fallback text extraction for validation")
|
|
return fallback_result
|
|
|
|
logger.warning("All AI validation attempts failed - no JSON found and fallback extraction failed")
|
|
return self._createFailedValidationResult("AI validation failed - no JSON in response")
|
|
|
|
try:
|
|
aiResult = json.loads(result)
|
|
logger.info("AI validation JSON parsed successfully")
|
|
|
|
overall = aiResult.get("overallSuccess")
|
|
quality = aiResult.get("qualityScore")
|
|
details = aiResult.get("validationDetails")
|
|
gap = aiResult.get("gapAnalysis", "")
|
|
criteria = aiResult.get("successCriteriaMet")
|
|
improvements = aiResult.get("improvementSuggestions", [])
|
|
|
|
# Normalize into schema-stable object without forcing failure defaults
|
|
normalized = {
|
|
"overallSuccess": overall if isinstance(overall, bool) else None,
|
|
"qualityScore": float(quality) if isinstance(quality, (int, float)) else None,
|
|
"validationDetails": details if isinstance(details, list) else [{
|
|
"documentName": "AI Validation",
|
|
"gapAnalysis": gap,
|
|
"successCriteriaMet": criteria if isinstance(criteria, list) else []
|
|
}],
|
|
"improvementSuggestions": improvements,
|
|
"schemaCompliant": True,
|
|
"originalType": "json",
|
|
"missingFields": []
|
|
}
|
|
|
|
if normalized["overallSuccess"] is None:
|
|
normalized["missingFields"].append("overallSuccess")
|
|
if normalized["qualityScore"] is None:
|
|
normalized["missingFields"].append("qualityScore")
|
|
# If any critical field missing, mark as not fully compliant
|
|
if normalized["missingFields"]:
|
|
normalized["schemaCompliant"] = False
|
|
|
|
return normalized
|
|
|
|
except json.JSONDecodeError as json_error:
|
|
logger.warning(f"All AI validation attempts failed - invalid JSON: {str(json_error)}")
|
|
logger.debug(f"JSON content: {result}")
|
|
|
|
# Try to extract key information from malformed response
|
|
fallbackResult = self._extractFallbackValidationResult(result)
|
|
if fallbackResult:
|
|
logger.info("Using fallback validation result from malformed JSON")
|
|
return fallbackResult
|
|
|
|
return self._createFailedValidationResult(f"AI validation failed - invalid JSON: {str(json_error)}")
|
|
|
|
return self._createFailedValidationResult("AI validation failed - no response")
|
|
|
|
except Exception as e:
|
|
logger.error(f"AI validation failed: {str(e)}")
|
|
return self._createFailedValidationResult(f"AI validation error: {str(e)}") |