385 lines
No EOL
16 KiB
Python
385 lines
No EOL
16 KiB
Python
# contentValidator.py
|
|
# Content validation for adaptive React mode
|
|
|
|
import re
|
|
import logging
|
|
import json
|
|
from typing import List, Dict, Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class ContentValidator:
|
|
"""Validates delivered content against user intent"""
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Validates delivered content against user intent using AI"""
|
|
try:
|
|
# First, try AI-based validation for intelligent gap analysis
|
|
aiValidation = self._validateWithAI(documents, intent)
|
|
if aiValidation:
|
|
return aiValidation
|
|
|
|
# Fallback to rule-based validation if AI validation fails
|
|
validationDetails = []
|
|
|
|
for doc in documents:
|
|
content = self._extractContent(doc)
|
|
detail = self._validateSingleDocument(content, doc, intent)
|
|
validationDetails.append(detail)
|
|
|
|
# Calculate overall success
|
|
overallSuccess = all(detail.get("successCriteriaMet", [False]) for detail in validationDetails)
|
|
|
|
# Calculate quality score
|
|
qualityScore = self._calculateQualityScore(validationDetails)
|
|
|
|
# Generate improvement suggestions
|
|
improvementSuggestions = self._generateImprovementSuggestions(validationDetails, intent)
|
|
|
|
return {
|
|
"overallSuccess": overallSuccess,
|
|
"qualityScore": qualityScore,
|
|
"validationDetails": validationDetails,
|
|
"improvementSuggestions": improvementSuggestions
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error validating content: {str(e)}")
|
|
return self._createFailedValidationResult(str(e))
|
|
|
|
def _extractContent(self, doc: Any) -> str:
|
|
"""Extracts content from a document"""
|
|
try:
|
|
if hasattr(doc, 'documentData'):
|
|
data = doc.documentData
|
|
if isinstance(data, dict) and 'content' in data:
|
|
return str(data['content'])
|
|
else:
|
|
return str(data)
|
|
return ""
|
|
except Exception:
|
|
return ""
|
|
|
|
def _validateSingleDocument(self, content: str, doc: Any, intent: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Validates a single document against intent"""
|
|
# Check data type match
|
|
dataTypeMatch = self._checkDataTypeMatch(content, intent.get("dataType", "unknown"))
|
|
|
|
# Check format match
|
|
formatMatch = self._checkFormatMatch(content, intent.get("expectedFormat", "unknown"))
|
|
|
|
# Calculate quality score
|
|
qualityScore = self._calculateDocumentQualityScore(content, intent)
|
|
|
|
# Check success criteria
|
|
successCriteriaMet = self._checkSuccessCriteria(content, intent)
|
|
|
|
# Identify specific issues
|
|
specificIssues = self._identifySpecificIssues(content, intent)
|
|
|
|
# Generate improvement suggestions
|
|
improvementSuggestions = self._generateDocumentImprovementSuggestions(content, intent)
|
|
|
|
return {
|
|
"documentName": getattr(doc, 'documentName', 'Unknown'),
|
|
"dataTypeMatch": dataTypeMatch,
|
|
"formatMatch": formatMatch,
|
|
"qualityScore": qualityScore,
|
|
"successCriteriaMet": successCriteriaMet,
|
|
"specificIssues": specificIssues,
|
|
"improvementSuggestions": improvementSuggestions
|
|
}
|
|
|
|
def _checkDataTypeMatch(self, content: str, dataType: str) -> bool:
|
|
"""Checks if content matches the expected data type"""
|
|
if dataType == "numbers":
|
|
return self._containsNumbers(content)
|
|
elif dataType == "text":
|
|
return self._containsText(content)
|
|
elif dataType == "documents":
|
|
return self._containsDocumentContent(content)
|
|
elif dataType == "analysis":
|
|
return self._containsAnalysis(content)
|
|
elif dataType == "code":
|
|
return self._containsCode(content)
|
|
else:
|
|
return True # Unknown type, assume match
|
|
|
|
def _containsNumbers(self, content: str) -> bool:
|
|
"""Checks if content contains actual numbers (not code)"""
|
|
# Look for actual numbers in the content
|
|
numbers = re.findall(r'\b\d+\b', content)
|
|
|
|
# Check if it's code (contains function definitions, etc.)
|
|
isCode = any(keyword in content.lower() for keyword in [
|
|
'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
|
|
'return', 'print(', 'console.log', 'public ', 'private '
|
|
])
|
|
|
|
# If it's code, it doesn't contain actual numbers
|
|
if isCode:
|
|
return False
|
|
|
|
# If it has numbers and it's not code, it contains actual numbers
|
|
return len(numbers) > 0
|
|
|
|
def _containsText(self, content: str) -> bool:
|
|
"""Checks if content contains readable text"""
|
|
# Remove numbers and special characters
|
|
textContent = re.sub(r'[^\w\s]', '', content)
|
|
words = textContent.split()
|
|
|
|
# Check if there are enough words to be considered text
|
|
return len(words) > 5
|
|
|
|
def _containsDocumentContent(self, content: str) -> bool:
|
|
"""Checks if content is suitable for document creation"""
|
|
# Check for structured content
|
|
hasStructure = any(indicator in content for indicator in [
|
|
'\n', '\t', '|', '-', '*', '1.', '2.', '•', '◦'
|
|
])
|
|
|
|
# Check for meaningful content
|
|
hasMeaningfulContent = len(content.strip()) > 50
|
|
|
|
return hasStructure and hasMeaningfulContent
|
|
|
|
def _containsAnalysis(self, content: str) -> bool:
|
|
"""Checks if content contains analysis"""
|
|
analysisIndicators = [
|
|
'analysis', 'findings', 'conclusion', 'summary', 'insights',
|
|
'trends', 'patterns', 'comparison', 'evaluation', 'assessment'
|
|
]
|
|
|
|
contentLower = content.lower()
|
|
return any(indicator in contentLower for indicator in analysisIndicators)
|
|
|
|
def _containsCode(self, content: str) -> bool:
|
|
"""Checks if content contains code"""
|
|
codeIndicators = [
|
|
'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
|
|
'return', 'print(', 'console.log', 'public ', 'private ', 'void ',
|
|
'int ', 'string ', 'var ', 'let ', 'const '
|
|
]
|
|
|
|
contentLower = content.lower()
|
|
return any(indicator in contentLower for indicator in codeIndicators)
|
|
|
|
def _checkFormatMatch(self, content: str, expectedFormat: str) -> bool:
|
|
"""Checks if content matches expected format"""
|
|
if expectedFormat == "raw_data":
|
|
# Raw data should be simple, not heavily formatted
|
|
return not any(indicator in content for indicator in [
|
|
'<html>', '<div>', '<table>', '## ', '### ', '**', '__'
|
|
])
|
|
elif expectedFormat == "formatted":
|
|
# Formatted content should have structure
|
|
return any(indicator in content for indicator in [
|
|
'\n', '\t', '|', '-', '*', '1.', '2.', '•'
|
|
])
|
|
elif expectedFormat == "structured":
|
|
# Structured content should have clear organization
|
|
return any(indicator in content for indicator in [
|
|
'{', '}', '[', ']', '|', '\t', ' '
|
|
])
|
|
else:
|
|
return True # Unknown format, assume match
|
|
|
|
def _checkSuccessCriteria(self, content: str, intent: Dict[str, Any]) -> List[bool]:
|
|
"""Checks if content meets success criteria"""
|
|
criteriaMet = []
|
|
successCriteria = intent.get("successCriteria", [])
|
|
|
|
for criterion in successCriteria:
|
|
if 'prime numbers' in criterion.lower():
|
|
# Check if content contains actual prime numbers, not code
|
|
hasNumbers = bool(re.search(r'\b\d+\b', content))
|
|
isNotCode = not any(keyword in content.lower() for keyword in [
|
|
'def ', 'function', 'import ', 'class '
|
|
])
|
|
criteriaMet.append(hasNumbers and isNotCode)
|
|
elif 'document' in criterion.lower():
|
|
# Check if content is suitable for document creation
|
|
hasStructure = any(indicator in content for indicator in [
|
|
'\n', '\t', '|', '-', '*', '1.', '2.'
|
|
])
|
|
criteriaMet.append(hasStructure)
|
|
elif 'format' in criterion.lower():
|
|
# Check if content is properly formatted
|
|
hasFormatting = any(indicator in content for indicator in [
|
|
'\n', '\t', '|', '-', '*', '1.', '2.', '•'
|
|
])
|
|
criteriaMet.append(hasFormatting)
|
|
else:
|
|
# Generic check - content should not be empty
|
|
criteriaMet.append(len(content.strip()) > 0)
|
|
|
|
return criteriaMet
|
|
|
|
def _calculateDocumentQualityScore(self, content: str, intent: Dict[str, Any]) -> float:
|
|
"""Calculates quality score for a single document"""
|
|
score = 0.0
|
|
|
|
# Base score for having content
|
|
if len(content.strip()) > 0:
|
|
score += 0.2
|
|
|
|
# Score for data type match
|
|
if self._checkDataTypeMatch(content, intent.get("dataType", "unknown")):
|
|
score += 0.3
|
|
|
|
# Score for format match
|
|
if self._checkFormatMatch(content, intent.get("expectedFormat", "unknown")):
|
|
score += 0.2
|
|
|
|
# Score for success criteria
|
|
successCriteriaMet = self._checkSuccessCriteria(content, intent)
|
|
if successCriteriaMet:
|
|
successRate = sum(successCriteriaMet) / len(successCriteriaMet)
|
|
score += 0.3 * successRate
|
|
|
|
return min(score, 1.0)
|
|
|
|
def _calculateQualityScore(self, validationDetails: List[Dict[str, Any]]) -> float:
|
|
"""Calculates overall quality score from validation details"""
|
|
if not validationDetails:
|
|
return 0.0
|
|
|
|
totalScore = sum(detail.get("qualityScore", 0) for detail in validationDetails)
|
|
return totalScore / len(validationDetails)
|
|
|
|
def _identifySpecificIssues(self, content: str, intent: Dict[str, Any]) -> List[str]:
|
|
"""Identifies specific issues with the content"""
|
|
issues = []
|
|
|
|
# Check for common issues
|
|
if intent.get("dataType") == "numbers" and self._containsCode(content):
|
|
issues.append("Content contains code instead of actual numbers")
|
|
|
|
if intent.get("expectedFormat") == "raw_data" and any(indicator in content for indicator in ['<html>', '## ', '**']):
|
|
issues.append("Content is formatted when raw data was requested")
|
|
|
|
if len(content.strip()) == 0:
|
|
issues.append("Content is empty")
|
|
|
|
return issues
|
|
|
|
def _generateDocumentImprovementSuggestions(self, content: str, intent: Dict[str, Any]) -> List[str]:
|
|
"""Generates improvement suggestions for a single document"""
|
|
suggestions = []
|
|
|
|
dataType = intent.get("dataType", "unknown")
|
|
expectedFormat = intent.get("expectedFormat", "unknown")
|
|
|
|
if dataType == "numbers" and self._containsCode(content):
|
|
suggestions.append("Deliver actual numbers, not code to generate them")
|
|
|
|
if expectedFormat == "raw_data" and any(indicator in content for indicator in ['<html>', '## ']):
|
|
suggestions.append("Provide raw data without formatting")
|
|
|
|
if len(content.strip()) == 0:
|
|
suggestions.append("Provide actual content")
|
|
|
|
return suggestions
|
|
|
|
def _generateImprovementSuggestions(self, validationDetails: List[Dict[str, Any]],
|
|
intent: Dict[str, Any]) -> List[str]:
|
|
"""Generates improvement suggestions based on validation results"""
|
|
suggestions = []
|
|
|
|
# Check for common issues
|
|
if not any(detail.get("dataTypeMatch", False) for detail in validationDetails):
|
|
dataType = intent.get("dataType", "unknown")
|
|
suggestions.append(f"Content should contain {dataType} data, not code or other formats")
|
|
|
|
if not any(detail.get("formatMatch", False) for detail in validationDetails):
|
|
expectedFormat = intent.get("expectedFormat", "unknown")
|
|
suggestions.append(f"Content should be in {expectedFormat} format")
|
|
|
|
# Add specific suggestions from validation details
|
|
for detail in validationDetails:
|
|
suggestions.extend(detail.get("improvementSuggestions", []))
|
|
|
|
return list(set(suggestions)) # Remove duplicates
|
|
|
|
def _createFailedValidationResult(self, error: str) -> Dict[str, Any]:
|
|
"""Creates a failed validation result"""
|
|
return {
|
|
"overallSuccess": False,
|
|
"qualityScore": 0.0,
|
|
"validationDetails": [],
|
|
"improvementSuggestions": [f"Validation failed: {error}"]
|
|
}
|
|
|
|
def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""AI-based validation to intelligently assess task completion"""
|
|
try:
|
|
# Extract content from all documents
|
|
documentContents = []
|
|
for doc in documents:
|
|
content = self._extractContent(doc)
|
|
documentContents.append({
|
|
"name": getattr(doc, 'documentName', 'Unknown'),
|
|
"content": content[:2000] # Limit content for AI processing
|
|
})
|
|
|
|
# Create AI validation prompt
|
|
validationPrompt = f"""
|
|
You are a task completion validator. Analyze if the delivered content actually fulfills the user's request.
|
|
|
|
USER REQUEST: {intent.get('primaryGoal', 'Unknown')}
|
|
|
|
DELIVERED CONTENT:
|
|
{json.dumps(documentContents, indent=2)}
|
|
|
|
TASK: Determine if the user's request has been fully completed.
|
|
|
|
Analyze the gap between what was requested and what was delivered. Consider any missing elements, incorrect formats, incomplete work, or other discrepancies.
|
|
|
|
Respond with JSON only:
|
|
{{
|
|
"overallSuccess": true/false,
|
|
"qualityScore": 0.0-1.0,
|
|
"gapAnalysis": "Detailed analysis of what's missing or incorrect",
|
|
"improvementSuggestions": ["specific action 1", "specific action 2"]
|
|
}}
|
|
"""
|
|
|
|
# Call AI service for validation
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
|
|
request_options = AiCallOptions()
|
|
request_options.operationType = OperationType.GENERAL
|
|
|
|
request = AiCallRequest(prompt=validationPrompt, context="", options=request_options)
|
|
|
|
# Get AI service from the workflow context
|
|
if hasattr(self, 'services') and hasattr(self.services, 'ai'):
|
|
response = self.services.ai.aiObjects.call(request)
|
|
if response and response.content:
|
|
import re
|
|
result = response.content.strip()
|
|
json_match = re.search(r'\{.*\}', result, re.DOTALL)
|
|
if json_match:
|
|
result = json_match.group(0)
|
|
|
|
aiResult = json.loads(result)
|
|
|
|
return {
|
|
"overallSuccess": aiResult.get("overallSuccess", False),
|
|
"qualityScore": aiResult.get("qualityScore", 0.0),
|
|
"validationDetails": [{
|
|
"documentName": "AI Validation",
|
|
"gapAnalysis": aiResult.get("gapAnalysis", ""),
|
|
"successCriteriaMet": [aiResult.get("overallSuccess", False)]
|
|
}],
|
|
"improvementSuggestions": aiResult.get("improvementSuggestions", [])
|
|
}
|
|
|
|
return None # Fallback to rule-based validation
|
|
|
|
except Exception as e:
|
|
logger.error(f"AI validation failed: {str(e)}")
|
|
return None # Fallback to rule-based validation |