gateway/modules/workflows/processing/adaptive/contentValidator.py
2025-10-15 00:36:00 +02:00

248 lines
No EOL
11 KiB
Python

# contentValidator.py
# Content validation for adaptive React mode
import logging
import json
import re
from typing import List, Dict, Any
logger = logging.getLogger(__name__)
class ContentValidator:
"""Validates delivered content against user intent"""
def __init__(self, services=None):
self.services = services
async def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
"""Validates delivered content against user intent using AI"""
try:
# Use AI for comprehensive validation
return await self._validateWithAI(documents, intent)
except Exception as e:
logger.error(f"Error validating content: {str(e)}")
return self._createFailedValidationResult(str(e))
def _extractContent(self, doc: Any) -> str:
"""Extracts content from a document"""
try:
if hasattr(doc, 'documentData'):
data = doc.documentData
if isinstance(data, dict) and 'content' in data:
return str(data['content'])
else:
return str(data)
return ""
except Exception:
return ""
def _createFailedValidationResult(self, error: str) -> Dict[str, Any]:
"""Creates a failed validation result"""
return {
"overallSuccess": False,
"qualityScore": 0.0,
"validationDetails": [],
"improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."]
}
def _isValidJsonResponse(self, response: str) -> bool:
"""Checks if response contains valid JSON structure"""
try:
import re
# Look for JSON with expected structure
json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', response, re.DOTALL)
if json_match:
json.loads(json_match.group(0))
return True
return False
except:
return False
def _extractFallbackValidationResult(self, response: str) -> Dict[str, Any]:
"""Extracts validation result from malformed AI response"""
try:
import re
# Extract key values using regex patterns
overall_success = re.search(r'"overallSuccess"\s*:\s*(true|false)', response, re.IGNORECASE)
quality_score = re.search(r'"qualityScore"\s*:\s*([0-9.]+)', response)
gap_analysis = re.search(r'"gapAnalysis"\s*:\s*"([^"]*)"', response)
# Determine overall success from context if not found
if not overall_success:
# Look for positive/negative indicators in the text
if any(word in response.lower() for word in ['success', 'complete', 'fulfilled', 'satisfied']):
overall_success = True
elif any(word in response.lower() for word in ['failed', 'incomplete', 'missing', 'error']):
overall_success = False
else:
overall_success = False
return {
"overallSuccess": overall_success.group(1).lower() == 'true' if overall_success else False,
"qualityScore": float(quality_score.group(1)) if quality_score else 0.5,
"validationDetails": [{
"documentName": "AI Validation (Fallback)",
"gapAnalysis": gap_analysis.group(1) if gap_analysis else "Unable to parse detailed analysis",
"successCriteriaMet": [False] # Conservative fallback
}],
"improvementSuggestions": ["NEXT STEP: AI response was malformed - retry the operation for better results"]
}
except Exception as e:
logger.error(f"Fallback extraction failed: {str(e)}")
return None
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
"""AI-based comprehensive validation - single main function"""
try:
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
return self._createFailedValidationResult("AI service not available")
# Extract content from all documents
documentContents = []
for doc in documents:
content = self._extractContent(doc)
documentContents.append({
"name": getattr(doc, 'documentName', 'Unknown'),
"content": content[:2000] # Limit content for AI processing
})
# Create comprehensive AI validation prompt
validationPrompt = f"""
You are a comprehensive task completion validator. Analyze if the delivered content fulfills the user's request.
USER REQUEST: {intent.get('primaryGoal', 'Unknown')}
EXPECTED DATA TYPE: {intent.get('dataType', 'unknown')}
EXPECTED FORMAT: {intent.get('expectedFormat', 'unknown')}
SUCCESS CRITERIA: {intent.get('successCriteria', [])}
DELIVERED CONTENT:
{json.dumps(documentContents, indent=2)}
Perform comprehensive validation:
1. Check if content matches expected data type
2. Check if content matches expected format
3. Verify success criteria are met
4. Assess overall quality and completeness
5. Identify specific gaps and issues
6. Provide actionable next steps
CRITICAL: Respond with ONLY the JSON object below. Do not include any explanatory text, analysis, or other content before or after the JSON.
IMPORTANT: Even if the content is binary files (like .docx, .pdf, etc.), you must still respond with JSON only. Do not explain that files are binary - just validate based on file names and types.
{{
"overallSuccess": true/false,
"qualityScore": 0.0-1.0,
"dataTypeMatch": true/false,
"formatMatch": true/false,
"successCriteriaMet": [true/false for each criterion],
"gapAnalysis": "Detailed analysis: what's missing/incorrect AND what specific next step to do",
"improvementSuggestions": ["NEXT STEP: specific action 1", "NEXT STEP: specific action 2"],
"validationDetails": [
{{
"documentName": "Document name",
"issues": ["specific issue 1", "specific issue 2"],
"suggestions": ["NEXT STEP: specific fix 1", "NEXT STEP: specific fix 2"]
}}
]
}}
"""
# Call AI service for validation
from modules.datamodels.datamodelAi import AiCallOptions, OperationType
request_options = AiCallOptions()
request_options.operationType = OperationType.GENERAL
response = await self.services.ai.callAi(
prompt=validationPrompt,
documents=None,
options=request_options
)
# If first attempt fails, try with more explicit prompt
if response and not self._isValidJsonResponse(response):
logger.warning("First AI validation attempt failed, retrying with explicit JSON-only prompt")
explicitPrompt = f"""
{validationPrompt}
IMPORTANT: You must respond with ONLY valid JSON. No explanations, no analysis, no text before or after. Just the JSON object.
"""
response = await self.services.ai.callAi(
prompt=explicitPrompt,
documents=None,
options=request_options
)
if not response or not response.strip():
logger.warning("AI validation returned empty response")
return self._createFailedValidationResult("AI validation failed - empty response")
# Clean and extract JSON from response
result = response.strip()
logger.debug(f"AI validation response length: {len(result)}")
# Try to find JSON in the response with multiple strategies
import re
# Strategy 1: Look for JSON in markdown code blocks
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', result, re.DOTALL)
if json_match:
result = json_match.group(1)
logger.debug(f"Extracted JSON from markdown code block: {result[:200]}...")
else:
# Strategy 2: Look for JSON object with proper structure
json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', result, re.DOTALL)
if not json_match:
# Strategy 3: Look for any JSON object
json_match = re.search(r'\{.*\}', result, re.DOTALL)
if not json_match:
logger.debug(f"No JSON found in AI response, trying fallback extraction: {result[:200]}...")
logger.debug(f"Full AI response: {result}")
# Try fallback extraction for text responses
fallback_result = self._extractFallbackValidationResult(result)
if fallback_result:
logger.info("Using fallback text extraction for validation")
return fallback_result
logger.warning("All AI validation attempts failed - no JSON found and fallback extraction failed")
return self._createFailedValidationResult("AI validation failed - no JSON in response")
else:
result = json_match.group(0)
logger.debug(f"Extracted JSON directly: {result[:200]}...")
try:
aiResult = json.loads(result)
logger.info("AI validation JSON parsed successfully")
return {
"overallSuccess": aiResult.get("overallSuccess", False),
"qualityScore": aiResult.get("qualityScore", 0.0),
"validationDetails": aiResult.get("validationDetails", [{
"documentName": "AI Validation",
"gapAnalysis": aiResult.get("gapAnalysis", ""),
"successCriteriaMet": aiResult.get("successCriteriaMet", [False])
}]),
"improvementSuggestions": aiResult.get("improvementSuggestions", [])
}
except json.JSONDecodeError as json_error:
logger.warning(f"All AI validation attempts failed - invalid JSON: {str(json_error)}")
logger.debug(f"JSON content: {result}")
# Try to extract key information from malformed response
fallbackResult = self._extractFallbackValidationResult(result)
if fallbackResult:
logger.info("Using fallback validation result from malformed JSON")
return fallbackResult
return self._createFailedValidationResult(f"AI validation failed - invalid JSON: {str(json_error)}")
return self._createFailedValidationResult("AI validation failed - no response")
except Exception as e:
logger.error(f"AI validation failed: {str(e)}")
return self._createFailedValidationResult(f"AI validation error: {str(e)}")