gateway/modules/workflows/processing/adaptive/contentValidator.py

206 lines
No EOL
8.5 KiB
Python

# contentValidator.py
# Content validation for adaptive React mode
import logging
import json
import re
from typing import List, Dict, Any
logger = logging.getLogger(__name__)
class ContentValidator:
"""Validates delivered content against user intent"""
def __init__(self, services=None, learningEngine=None):
self.services = services
self.learningEngine = learningEngine
async def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
"""Validates delivered content against user intent using AI (single attempt; parse-or-fail)"""
return await self._validateWithAI(documents, intent)
def _extractContent(self, doc: Any) -> str:
"""Extracts content from a document with size protection for large documents"""
try:
if hasattr(doc, 'documentData'):
data = doc.documentData
if isinstance(data, dict) and 'content' in data:
content = data['content']
# For large content, check size before converting to string
if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold
# For very large content, return a size indicator instead
return f"[Large document content - {len(str(content))} characters - truncated for validation]"
return str(content)
else:
content = data
# For large content, check size before converting to string
if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold
return f"[Large document content - {len(str(content))} characters - truncated for validation]"
return str(content)
return ""
except Exception:
return ""
# Removed schema fallback creator to keep failures explicit
def _isValidJsonResponse(self, response: str) -> bool:
"""Checks if response contains valid JSON structure"""
try:
import re
# Look for JSON with expected structure
json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', response, re.DOTALL)
if json_match:
json.loads(json_match.group(0))
return True
return False
except:
return False
# Removed text-based fallback extraction to avoid hiding issues
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
"""AI-based comprehensive validation - single main function"""
try:
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
return self._createFailedValidationResult("AI service not available")
# Extract content from all documents
documentContents = []
for doc in documents:
content = self._extractContent(doc)
documentContents.append({
"name": getattr(doc, 'documentName', 'Unknown'),
"content": content[:2000] # Limit content for AI processing
})
# Create structured AI validation prompt
successCriteria = intent.get('successCriteria', [])
criteriaCount = len(successCriteria)
validationPrompt = f"""TASK VALIDATION
USER REQUEST: '{intent.get('primaryGoal', 'Unknown')}'
EXPECTED TYPE: {intent.get('dataType', 'unknown')}
EXPECTED FORMAT: {intent.get('expectedFormat', 'unknown')}
SUCCESS CRITERIA ({criteriaCount} items): {successCriteria}
VALIDATION RULES:
1. Check if content matches expected data type
2. Check if content matches expected format
3. Verify each success criterion is met
4. Rate overall quality (0.0-1.0)
5. Identify specific gaps
6. Suggest next steps
OUTPUT FORMAT - JSON ONLY (no prose):
{{
"overallSuccess": false,
"qualityScore": 0.0,
"dataTypeMatch": false,
"formatMatch": false,
"successCriteriaMet": {[False] * criteriaCount},
"gapAnalysis": "Specific gaps found",
"improvementSuggestions": ["NEXT STEP: Action 1", "NEXT STEP: Action 2"],
"validationDetails": [
{{
"documentName": "Document Name",
"issues": ["Issue 1", "Issue 2"],
"suggestions": ["NEXT STEP: Fix 1", "NEXT STEP: Fix 2"]
}}
]
}}
DELIVERED CONTENT TO CHECK:
{json.dumps(documentContents, indent=2)}
"""
# Call AI service for validation
from modules.datamodels.datamodelAi import AiCallOptions, OperationType
request_options = AiCallOptions()
request_options.operationType = OperationType.GENERAL
response = await self.services.ai.callAi(
prompt=validationPrompt,
documents=None,
options=request_options
)
# No retries or correction prompts here; parse-or-fail below
if not response or not response.strip():
logger.warning("AI validation returned empty response")
raise ValueError("AI validation failed - empty response")
# Clean and extract JSON from response
result = response.strip()
logger.debug(f"AI validation response length: {len(result)}")
# Try to find JSON in the response with multiple strategies
import re
# Strategy 1: Look for JSON in markdown code blocks
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', result, re.DOTALL)
if json_match:
result = json_match.group(1)
logger.debug(f"Extracted JSON from markdown code block: {result[:200]}...")
else:
# Strategy 2: Look for JSON object with proper structure
json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', result, re.DOTALL)
if not json_match:
# Strategy 3: Look for any JSON object
json_match = re.search(r'\{.*\}', result, re.DOTALL)
if json_match:
result = json_match.group(0)
logger.debug(f"Extracted JSON directly: {result[:200]}...")
else:
logger.debug(f"No JSON found in AI response: {result[:200]}...")
logger.debug(f"Full AI response: {result}")
raise ValueError("AI validation failed - no JSON in response")
try:
aiResult = json.loads(result)
logger.info("AI validation JSON parsed successfully")
overall = aiResult.get("overallSuccess")
quality = aiResult.get("qualityScore")
details = aiResult.get("validationDetails")
gap = aiResult.get("gapAnalysis", "")
criteria = aiResult.get("successCriteriaMet")
improvements = aiResult.get("improvementSuggestions", [])
# Normalize while keeping failures explicit
normalized = {
"overallSuccess": overall if isinstance(overall, bool) else None,
"qualityScore": float(quality) if isinstance(quality, (int, float)) else None,
"validationDetails": details if isinstance(details, list) else [{
"documentName": "AI Validation",
"gapAnalysis": gap,
"successCriteriaMet": criteria if isinstance(criteria, list) else []
}],
"improvementSuggestions": improvements,
"schemaCompliant": True,
"originalType": "json",
"missingFields": []
}
if normalized["overallSuccess"] is None:
normalized["missingFields"].append("overallSuccess")
if normalized["qualityScore"] is None:
normalized["missingFields"].append("qualityScore")
if normalized["missingFields"]:
normalized["schemaCompliant"] = False
return normalized
except json.JSONDecodeError as json_error:
logger.warning(f"AI validation invalid JSON: {str(json_error)}")
logger.debug(f"JSON content: {result}")
raise
raise ValueError("AI validation failed - no response")
except Exception as e:
logger.error(f"AI validation failed: {str(e)}")
raise