# contentValidator.py # Content validation for adaptive React mode import logging import json import re from typing import List, Dict, Any logger = logging.getLogger(__name__) class ContentValidator: """Validates delivered content against user intent""" def __init__(self, services=None, learningEngine=None): self.services = services self.learningEngine = learningEngine async def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]: """Validates delivered content against user intent using AI (single attempt; parse-or-fail)""" return await self._validateWithAI(documents, intent) def _extractContent(self, doc: Any) -> str: """Extracts content from a document with size protection for large documents""" try: if hasattr(doc, 'documentData'): data = doc.documentData if isinstance(data, dict) and 'content' in data: content = data['content'] # For large content, check size before converting to string if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold # For very large content, return a size indicator instead return f"[Large document content - {len(str(content))} characters - truncated for validation]" return str(content) else: content = data # For large content, check size before converting to string if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold return f"[Large document content - {len(str(content))} characters - truncated for validation]" return str(content) return "" except Exception: return "" # Removed schema fallback creator to keep failures explicit def _isValidJsonResponse(self, response: str) -> bool: """Checks if response contains valid JSON structure""" try: import re # Look for JSON with expected structure json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', response, re.DOTALL) if json_match: json.loads(json_match.group(0)) return True return False except: return False # Removed text-based fallback extraction to avoid hiding issues async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]: """AI-based comprehensive validation - single main function""" try: if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'): return self._createFailedValidationResult("AI service not available") # Extract content from all documents documentContents = [] for doc in documents: content = self._extractContent(doc) documentContents.append({ "name": getattr(doc, 'documentName', 'Unknown'), "content": content[:2000] # Limit content for AI processing }) # Create structured AI validation prompt successCriteria = intent.get('successCriteria', []) criteriaCount = len(successCriteria) validationPrompt = f"""TASK VALIDATION USER REQUEST: '{intent.get('primaryGoal', 'Unknown')}' EXPECTED TYPE: {intent.get('dataType', 'unknown')} EXPECTED FORMAT: {intent.get('expectedFormat', 'unknown')} SUCCESS CRITERIA ({criteriaCount} items): {successCriteria} VALIDATION RULES: 1. Check if content matches expected data type 2. Check if content matches expected format 3. Verify each success criterion is met 4. Rate overall quality (0.0-1.0) 5. Identify specific gaps 6. Suggest next steps OUTPUT FORMAT - JSON ONLY (no prose): {{ "overallSuccess": false, "qualityScore": 0.0, "dataTypeMatch": false, "formatMatch": false, "successCriteriaMet": {[False] * criteriaCount}, "gapAnalysis": "Specific gaps found", "improvementSuggestions": ["NEXT STEP: Action 1", "NEXT STEP: Action 2"], "validationDetails": [ {{ "documentName": "Document Name", "issues": ["Issue 1", "Issue 2"], "suggestions": ["NEXT STEP: Fix 1", "NEXT STEP: Fix 2"] }} ] }} DELIVERED CONTENT TO CHECK: {json.dumps(documentContents, indent=2)} """ # Call AI service for validation from modules.datamodels.datamodelAi import AiCallOptions, OperationType request_options = AiCallOptions() request_options.operationType = OperationType.GENERAL response = await self.services.ai.callAi( prompt=validationPrompt, documents=None, options=request_options ) # Write validation prompt/response to debug from modules.shared.debugLogger import writeDebugFile writeDebugFile(validationPrompt, "validation_content_prompt") writeDebugFile(response or '', "validation_content_response") # No retries or correction prompts here; parse-or-fail below if not response or not response.strip(): logger.warning("AI validation returned empty response") raise ValueError("AI validation failed - empty response") # Clean and extract JSON from response result = response.strip() logger.debug(f"AI validation response length: {len(result)}") # Try to find JSON in the response with multiple strategies import re # Strategy 1: Look for JSON in markdown code blocks json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', result, re.DOTALL) if json_match: result = json_match.group(1) logger.debug(f"Extracted JSON from markdown code block: {result[:200]}...") else: # Strategy 2: Look for JSON object with proper structure json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', result, re.DOTALL) if not json_match: # Strategy 3: Look for any JSON object json_match = re.search(r'\{.*\}', result, re.DOTALL) if json_match: result = json_match.group(0) logger.debug(f"Extracted JSON directly: {result[:200]}...") else: logger.debug(f"No JSON found in AI response: {result[:200]}...") logger.debug(f"Full AI response: {result}") raise ValueError("AI validation failed - no JSON in response") try: aiResult = json.loads(result) logger.info("AI validation JSON parsed successfully") overall = aiResult.get("overallSuccess") quality = aiResult.get("qualityScore") details = aiResult.get("validationDetails") gap = aiResult.get("gapAnalysis", "") criteria = aiResult.get("successCriteriaMet") improvements = aiResult.get("improvementSuggestions", []) # Normalize while keeping failures explicit normalized = { "overallSuccess": overall if isinstance(overall, bool) else None, "qualityScore": float(quality) if isinstance(quality, (int, float)) else None, "validationDetails": details if isinstance(details, list) else [{ "documentName": "AI Validation", "gapAnalysis": gap, "successCriteriaMet": criteria if isinstance(criteria, list) else [] }], "improvementSuggestions": improvements, "schemaCompliant": True, "originalType": "json", "missingFields": [] } if normalized["overallSuccess"] is None: normalized["missingFields"].append("overallSuccess") if normalized["qualityScore"] is None: normalized["missingFields"].append("qualityScore") if normalized["missingFields"]: normalized["schemaCompliant"] = False return normalized except json.JSONDecodeError as json_error: logger.warning(f"AI validation invalid JSON: {str(json_error)}") logger.debug(f"JSON content: {result}") raise raise ValueError("AI validation failed - no response") except Exception as e: logger.error(f"AI validation failed: {str(e)}") raise