diff --git a/modules/services/serviceNormalization/mainServiceNormalization.py b/modules/services/serviceNormalization/mainServiceNormalization.py index 763feaef..34805ef2 100644 --- a/modules/services/serviceNormalization/mainServiceNormalization.py +++ b/modules/services/serviceNormalization/mainServiceNormalization.py @@ -91,8 +91,20 @@ class NormalizationService: ) response = await self.services.ai.callAi(prompt=prompt) - js = response[response.find('{'):response.rfind('}') + 1] if response else '{}' - mapping = json.loads(js) + if not response: + return {"mapping": {}, "normalizationPolicy": {}} + + # Extract JSON from response more safely + start_idx = response.find('{') + end_idx = response.rfind('}') + if start_idx == -1 or end_idx == -1 or start_idx >= end_idx: + return {"mapping": {}, "normalizationPolicy": {}} + + js = response[start_idx:end_idx + 1] + try: + mapping = json.loads(js) + except json.JSONDecodeError: + return {"mapping": {}, "normalizationPolicy": {}} # Normalize key naming from AI: prefer single key "mapping" if "mapping" not in mapping and "mappings" in mapping and isinstance(mapping["mappings"], dict): mapping["mapping"] = mapping["mappings"] @@ -156,7 +168,11 @@ class NormalizationService: canonicalRow: List[str] = [] for ch in canonicalHeaders: idx = indexMap.get(ch) - value = r[idx] if (idx is not None and idx < len(r)) else "" + try: + value = r[idx] if (idx is not None and idx < len(r)) else "" + except (IndexError, KeyError) as e: + # Handle corrupted data gracefully + value = "" canonicalRow.append(self._normalizeValue(ch, value, policy)) # consider as row if at least one non-empty meaningful field if any(v.strip() for v in canonicalRow): diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py index 740067b7..0279df90 100644 --- a/modules/workflows/processing/adaptive/contentValidator.py +++ b/modules/workflows/processing/adaptive/contentValidator.py @@ -128,23 +128,23 @@ Perform comprehensive validation: 5. Identify specific gaps and issues 6. Provide actionable next steps -CRITICAL: Respond with ONLY the JSON object below. Do not include any explanatory text, analysis, or other content before or after the JSON. +CRITICAL: You MUST respond with ONLY the JSON object below. NO TEXT ANALYSIS. NO EXPLANATIONS. NO OTHER CONTENT. -IMPORTANT: Even if the content is binary files (like .docx, .pdf, etc.), you must still respond with JSON only. Do not explain that files are binary - just validate based on file names and types. +RESPOND WITH THIS EXACT JSON FORMAT: {{ - "overallSuccess": true/false, - "qualityScore": 0.0-1.0, - "dataTypeMatch": true/false, - "formatMatch": true/false, - "successCriteriaMet": [true/false for each criterion], - "gapAnalysis": "Detailed analysis: what's missing/incorrect AND what specific next step to do", - "improvementSuggestions": ["NEXT STEP: specific action 1", "NEXT STEP: specific action 2"], + "overallSuccess": false, + "qualityScore": 0.5, + "dataTypeMatch": false, + "formatMatch": false, + "successCriteriaMet": [false, false], + "gapAnalysis": "Content does not match expected format and lacks required elements", + "improvementSuggestions": ["NEXT STEP: Create proper content in expected format", "NEXT STEP: Ensure all success criteria are met"], "validationDetails": [ {{ - "documentName": "Document name", - "issues": ["specific issue 1", "specific issue 2"], - "suggestions": ["NEXT STEP: specific fix 1", "NEXT STEP: specific fix 2"] + "documentName": "Content Validation", + "issues": ["Format mismatch", "Missing required elements"], + "suggestions": ["NEXT STEP: Fix format", "NEXT STEP: Add missing elements"] }} ] }} @@ -165,9 +165,33 @@ IMPORTANT: Even if the content is binary files (like .docx, .pdf, etc.), you mus if response and not self._isValidJsonResponse(response): logger.debug("First AI validation attempt failed, retrying with explicit JSON-only prompt") explicitPrompt = f""" -{validationPrompt} +VALIDATE AND RETURN JSON ONLY - NO TEXT ANALYSIS -IMPORTANT: You must respond with ONLY valid JSON. No explanations, no analysis, no text before or after. Just the JSON object. +Request: {intent.get('primaryGoal', 'Unknown')} +Data Type: {intent.get('dataType', 'unknown')} +Format: {intent.get('expectedFormat', 'unknown')} +Criteria: {intent.get('successCriteria', [])} + +Content: {json.dumps(documentContents, indent=2)} + +RESPOND WITH THIS EXACT JSON FORMAT - NO OTHER TEXT: + +{{ + "overallSuccess": false, + "qualityScore": 0.3, + "dataTypeMatch": false, + "formatMatch": false, + "successCriteriaMet": [false, false], + "gapAnalysis": "Content does not match expected format and lacks required elements", + "improvementSuggestions": ["NEXT STEP: Create proper content in expected format", "NEXT STEP: Ensure all success criteria are met"], + "validationDetails": [ + {{ + "documentName": "Content Validation", + "issues": ["Format mismatch", "Missing required elements"], + "suggestions": ["NEXT STEP: Fix format", "NEXT STEP: Add missing elements"] + }} + ] +}} """ response = await self.services.ai.callAi( prompt=explicitPrompt, @@ -198,21 +222,21 @@ IMPORTANT: You must respond with ONLY valid JSON. No explanations, no analysis, # Strategy 3: Look for any JSON object json_match = re.search(r'\{.*\}', result, re.DOTALL) - if not json_match: - logger.debug(f"No JSON found in AI response, trying fallback extraction: {result[:200]}...") - logger.debug(f"Full AI response: {result}") - - # Try fallback extraction for text responses - fallback_result = self._extractFallbackValidationResult(result) - if fallback_result: - logger.info("Using fallback text extraction for validation") - return fallback_result - - logger.warning("All AI validation attempts failed - no JSON found and fallback extraction failed") - return self._createFailedValidationResult("AI validation failed - no JSON in response") - else: - result = json_match.group(0) - logger.debug(f"Extracted JSON directly: {result[:200]}...") + if json_match: + result = json_match.group(0) + logger.debug(f"Extracted JSON directly: {result[:200]}...") + else: + logger.debug(f"No JSON found in AI response, trying fallback extraction: {result[:200]}...") + logger.debug(f"Full AI response: {result}") + + # Try fallback extraction for text responses + fallback_result = self._extractFallbackValidationResult(result) + if fallback_result: + logger.info("Using fallback text extraction for validation") + return fallback_result + + logger.warning("All AI validation attempts failed - no JSON found and fallback extraction failed") + return self._createFailedValidationResult("AI validation failed - no JSON in response") try: aiResult = json.loads(result)