From e9756bbc171b6b9adcc9f14cd93dc2b382cc7758 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Wed, 15 Oct 2025 00:59:18 +0200
Subject: [PATCH] Fixes 04

---
 .../mainServiceNormalization.py               | 22 ++++-
 .../processing/adaptive/contentValidator.py   | 82 ++++++++++++-------
 2 files changed, 72 insertions(+), 32 deletions(-)

diff --git a/modules/services/serviceNormalization/mainServiceNormalization.py b/modules/services/serviceNormalization/mainServiceNormalization.py
index 763feaef..34805ef2 100644
--- a/modules/services/serviceNormalization/mainServiceNormalization.py
+++ b/modules/services/serviceNormalization/mainServiceNormalization.py
@@ -91,8 +91,20 @@ class NormalizationService:
         )
 
         response = await self.services.ai.callAi(prompt=prompt)
-        js = response[response.find('{'):response.rfind('}') + 1] if response else '{}'
-        mapping = json.loads(js)
+        if not response:
+            return {"mapping": {}, "normalizationPolicy": {}}
+        
+        # Extract JSON from response more safely
+        start_idx = response.find('{')
+        end_idx = response.rfind('}')
+        if start_idx == -1 or end_idx == -1 or start_idx >= end_idx:
+            return {"mapping": {}, "normalizationPolicy": {}}
+        
+        js = response[start_idx:end_idx + 1]
+        try:
+            mapping = json.loads(js)
+        except json.JSONDecodeError:
+            return {"mapping": {}, "normalizationPolicy": {}}
         # Normalize key naming from AI: prefer single key "mapping"
         if "mapping" not in mapping and "mappings" in mapping and isinstance(mapping["mappings"], dict):
             mapping["mapping"] = mapping["mappings"]
@@ -156,7 +168,11 @@ class NormalizationService:
                 canonicalRow: List[str] = []
                 for ch in canonicalHeaders:
                     idx = indexMap.get(ch)
-                    value = r[idx] if (idx is not None and idx < len(r)) else ""
+                    try:
+                        value = r[idx] if (idx is not None and idx < len(r)) else ""
+                    except (IndexError, KeyError) as e:
+                        # Handle corrupted data gracefully
+                        value = ""
                     canonicalRow.append(self._normalizeValue(ch, value, policy))
                 # consider as row if at least one non-empty meaningful field
                 if any(v.strip() for v in canonicalRow):
diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py
index 740067b7..0279df90 100644
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@@ -128,23 +128,23 @@ Perform comprehensive validation:
 5. Identify specific gaps and issues
 6. Provide actionable next steps
 
-CRITICAL: Respond with ONLY the JSON object below. Do not include any explanatory text, analysis, or other content before or after the JSON.
+CRITICAL: You MUST respond with ONLY the JSON object below. NO TEXT ANALYSIS. NO EXPLANATIONS. NO OTHER CONTENT.
 
-IMPORTANT: Even if the content is binary files (like .docx, .pdf, etc.), you must still respond with JSON only. Do not explain that files are binary - just validate based on file names and types.
+RESPOND WITH THIS EXACT JSON FORMAT:
 
 {{
-    "overallSuccess": true/false,
-    "qualityScore": 0.0-1.0,
-    "dataTypeMatch": true/false,
-    "formatMatch": true/false,
-    "successCriteriaMet": [true/false for each criterion],
-    "gapAnalysis": "Detailed analysis: what's missing/incorrect AND what specific next step to do",
-    "improvementSuggestions": ["NEXT STEP: specific action 1", "NEXT STEP: specific action 2"],
+    "overallSuccess": false,
+    "qualityScore": 0.5,
+    "dataTypeMatch": false,
+    "formatMatch": false,
+    "successCriteriaMet": [false, false],
+    "gapAnalysis": "Content does not match expected format and lacks required elements",
+    "improvementSuggestions": ["NEXT STEP: Create proper content in expected format", "NEXT STEP: Ensure all success criteria are met"],
     "validationDetails": [
         {{
-            "documentName": "Document name",
-            "issues": ["specific issue 1", "specific issue 2"],
-            "suggestions": ["NEXT STEP: specific fix 1", "NEXT STEP: specific fix 2"]
+            "documentName": "Content Validation",
+            "issues": ["Format mismatch", "Missing required elements"],
+            "suggestions": ["NEXT STEP: Fix format", "NEXT STEP: Add missing elements"]
         }}
     ]
 }}
@@ -165,9 +165,33 @@ IMPORTANT: Even if the content is binary files (like .docx, .pdf, etc.), you mus
             if response and not self._isValidJsonResponse(response):
                 logger.debug("First AI validation attempt failed, retrying with explicit JSON-only prompt")
                 explicitPrompt = f"""
-{validationPrompt}
+VALIDATE AND RETURN JSON ONLY - NO TEXT ANALYSIS
 
-IMPORTANT: You must respond with ONLY valid JSON. No explanations, no analysis, no text before or after. Just the JSON object.
+Request: {intent.get('primaryGoal', 'Unknown')}
+Data Type: {intent.get('dataType', 'unknown')}
+Format: {intent.get('expectedFormat', 'unknown')}
+Criteria: {intent.get('successCriteria', [])}
+
+Content: {json.dumps(documentContents, indent=2)}
+
+RESPOND WITH THIS EXACT JSON FORMAT - NO OTHER TEXT:
+
+{{
+    "overallSuccess": false,
+    "qualityScore": 0.3,
+    "dataTypeMatch": false,
+    "formatMatch": false,
+    "successCriteriaMet": [false, false],
+    "gapAnalysis": "Content does not match expected format and lacks required elements",
+    "improvementSuggestions": ["NEXT STEP: Create proper content in expected format", "NEXT STEP: Ensure all success criteria are met"],
+    "validationDetails": [
+        {{
+            "documentName": "Content Validation",
+            "issues": ["Format mismatch", "Missing required elements"],
+            "suggestions": ["NEXT STEP: Fix format", "NEXT STEP: Add missing elements"]
+        }}
+    ]
+}}
 """
                 response = await self.services.ai.callAi(
                     prompt=explicitPrompt,
@@ -198,21 +222,21 @@ IMPORTANT: You must respond with ONLY valid JSON. No explanations, no analysis,
                     # Strategy 3: Look for any JSON object
                     json_match = re.search(r'\{.*\}', result, re.DOTALL)
                 
-            if not json_match:
-                logger.debug(f"No JSON found in AI response, trying fallback extraction: {result[:200]}...")
-                logger.debug(f"Full AI response: {result}")
-                
-                # Try fallback extraction for text responses
-                fallback_result = self._extractFallbackValidationResult(result)
-                if fallback_result:
-                    logger.info("Using fallback text extraction for validation")
-                    return fallback_result
-                
-                logger.warning("All AI validation attempts failed - no JSON found and fallback extraction failed")
-                return self._createFailedValidationResult("AI validation failed - no JSON in response")
-            else:
-                result = json_match.group(0)
-                logger.debug(f"Extracted JSON directly: {result[:200]}...")
+                if json_match:
+                    result = json_match.group(0)
+                    logger.debug(f"Extracted JSON directly: {result[:200]}...")
+                else:
+                    logger.debug(f"No JSON found in AI response, trying fallback extraction: {result[:200]}...")
+                    logger.debug(f"Full AI response: {result}")
+                    
+                    # Try fallback extraction for text responses
+                    fallback_result = self._extractFallbackValidationResult(result)
+                    if fallback_result:
+                        logger.info("Using fallback text extraction for validation")
+                        return fallback_result
+                    
+                    logger.warning("All AI validation attempts failed - no JSON found and fallback extraction failed")
+                    return self._createFailedValidationResult("AI validation failed - no JSON in response")
             
             try:
                 aiResult = json.loads(result)