# contentValidator.py # Content validation for adaptive React mode import re import logging from typing import List, Dict, Any logger = logging.getLogger(__name__) class ContentValidator: """Validates delivered content against user intent""" def __init__(self): pass def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]: """Validates delivered content against user intent""" try: validationDetails = [] for doc in documents: content = self._extractContent(doc) detail = self._validateSingleDocument(content, doc, intent) validationDetails.append(detail) # Calculate overall success overallSuccess = all(detail.get("successCriteriaMet", [False]) for detail in validationDetails) # Calculate quality score qualityScore = self._calculateQualityScore(validationDetails) # Generate improvement suggestions improvementSuggestions = self._generateImprovementSuggestions(validationDetails, intent) return { "overallSuccess": overallSuccess, "qualityScore": qualityScore, "validationDetails": validationDetails, "improvementSuggestions": improvementSuggestions } except Exception as e: logger.error(f"Error validating content: {str(e)}") return self._createFailedValidationResult(str(e)) def _extractContent(self, doc: Any) -> str: """Extracts content from a document""" try: if hasattr(doc, 'documentData'): data = doc.documentData if isinstance(data, dict) and 'content' in data: return str(data['content']) else: return str(data) return "" except Exception: return "" def _validateSingleDocument(self, content: str, doc: Any, intent: Dict[str, Any]) -> Dict[str, Any]: """Validates a single document against intent""" # Check data type match dataTypeMatch = self._checkDataTypeMatch(content, intent.get("dataType", "unknown")) # Check format match formatMatch = self._checkFormatMatch(content, intent.get("expectedFormat", "unknown")) # Calculate quality score qualityScore = self._calculateDocumentQualityScore(content, intent) # Check success criteria successCriteriaMet = self._checkSuccessCriteria(content, intent) # Identify specific issues specificIssues = self._identifySpecificIssues(content, intent) # Generate improvement suggestions improvementSuggestions = self._generateDocumentImprovementSuggestions(content, intent) return { "documentName": getattr(doc, 'documentName', 'Unknown'), "dataTypeMatch": dataTypeMatch, "formatMatch": formatMatch, "qualityScore": qualityScore, "successCriteriaMet": successCriteriaMet, "specificIssues": specificIssues, "improvementSuggestions": improvementSuggestions } def _checkDataTypeMatch(self, content: str, dataType: str) -> bool: """Checks if content matches the expected data type""" if dataType == "numbers": return self._containsNumbers(content) elif dataType == "text": return self._containsText(content) elif dataType == "documents": return self._containsDocumentContent(content) elif dataType == "analysis": return self._containsAnalysis(content) elif dataType == "code": return self._containsCode(content) else: return True # Unknown type, assume match def _containsNumbers(self, content: str) -> bool: """Checks if content contains actual numbers (not code)""" # Look for actual numbers in the content numbers = re.findall(r'\b\d+\b', content) # Check if it's code (contains function definitions, etc.) isCode = any(keyword in content.lower() for keyword in [ 'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ', 'return', 'print(', 'console.log', 'public ', 'private ' ]) # If it's code, it doesn't contain actual numbers if isCode: return False # If it has numbers and it's not code, it contains actual numbers return len(numbers) > 0 def _containsText(self, content: str) -> bool: """Checks if content contains readable text""" # Remove numbers and special characters textContent = re.sub(r'[^\w\s]', '', content) words = textContent.split() # Check if there are enough words to be considered text return len(words) > 5 def _containsDocumentContent(self, content: str) -> bool: """Checks if content is suitable for document creation""" # Check for structured content hasStructure = any(indicator in content for indicator in [ '\n', '\t', '|', '-', '*', '1.', '2.', '•', '◦' ]) # Check for meaningful content hasMeaningfulContent = len(content.strip()) > 50 return hasStructure and hasMeaningfulContent def _containsAnalysis(self, content: str) -> bool: """Checks if content contains analysis""" analysisIndicators = [ 'analysis', 'findings', 'conclusion', 'summary', 'insights', 'trends', 'patterns', 'comparison', 'evaluation', 'assessment' ] contentLower = content.lower() return any(indicator in contentLower for indicator in analysisIndicators) def _containsCode(self, content: str) -> bool: """Checks if content contains code""" codeIndicators = [ 'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ', 'return', 'print(', 'console.log', 'public ', 'private ', 'void ', 'int ', 'string ', 'var ', 'let ', 'const ' ] contentLower = content.lower() return any(indicator in contentLower for indicator in codeIndicators) def _checkFormatMatch(self, content: str, expectedFormat: str) -> bool: """Checks if content matches expected format""" if expectedFormat == "raw_data": # Raw data should be simple, not heavily formatted return not any(indicator in content for indicator in [ '', '