diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 117930e0..99e509dc 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -327,11 +327,41 @@ Respond with ONLY a JSON object in this exact format: logger.info(f"Defined {len(accumulationState.kpis)} KPIs: {[kpi.get('id') for kpi in accumulationState.kpis]}") # Extract and validate KPIs (if in accumulation mode with KPIs defined) - if accumulationState and accumulationState.isAccumulationMode and accumulationState.kpis and parsedResult: - updatedKpis = JsonResponseHandler.extractKpiValuesFromJson( - parsedResult, - accumulationState.kpis - ) + if accumulationState and accumulationState.isAccumulationMode and accumulationState.kpis: + # For KPI extraction, prefer accumulated JSON string over repaired JSON + # because repairBrokenJson may lose data (e.g., empty rows array when JSON is incomplete) + updatedKpis = [] + + # First try to extract from parsedResult (repaired JSON) + if parsedResult: + try: + updatedKpis = JsonResponseHandler.extractKpiValuesFromJson( + parsedResult, + accumulationState.kpis + ) + # Check if we got meaningful values (non-zero) + hasValidValues = any(kpi.get("currentValue", 0) > 0 for kpi in updatedKpis) + if not hasValidValues and accumulationState.accumulatedJsonString: + # Repaired JSON has empty values, try accumulated string + logger.debug("Repaired JSON has empty KPI values, trying accumulated JSON string") + updatedKpis = JsonResponseHandler.extractKpiValuesFromIncompleteJson( + accumulationState.accumulatedJsonString, + accumulationState.kpis + ) + except Exception as e: + logger.debug(f"Error extracting KPIs from parsedResult: {e}") + updatedKpis = [] + + # If no parsedResult or extraction failed, try accumulated string + if not updatedKpis and accumulationState.accumulatedJsonString: + try: + updatedKpis = JsonResponseHandler.extractKpiValuesFromIncompleteJson( + accumulationState.accumulatedJsonString, + accumulationState.kpis + ) + except Exception as e: + logger.debug(f"Error extracting KPIs from accumulated JSON string: {e}") + updatedKpis = [] if updatedKpis: shouldProceed, reason = JsonResponseHandler.validateKpiProgression( @@ -518,11 +548,16 @@ Last Complete Element: Task: Define which JSON items should be tracked to measure completion progress. +IMPORTANT: Analyze the JSON structure to understand what is being tracked: +1. Identify the structure type (table with rows, list with items, etc.) +2. Determine what the jsonPath actually counts (number of rows, number of items, etc.) +3. Calculate targetValue based on what is being tracked, NOT the total quantity requested + For each trackable item, provide: - id: Unique identifier (use descriptive name) -- description: What this KPI measures -- jsonPath: Path to extract value from JSON (use dot notation with array indices, e.g., "sections[0].elements[0].items") -- targetValue: Target value to reach (integer) +- description: What this KPI measures (be specific about what is counted) +- jsonPath: Path to extract value from JSON (use dot notation with array indices, e.g., "documents[0].sections[0].elements") +- targetValue: Target value to reach (integer) - MUST match what jsonPath actually tracks (rows count, items count, etc.) Return ONLY valid JSON in this format: {{ diff --git a/modules/services/serviceAi/subJsonResponseHandling.py b/modules/services/serviceAi/subJsonResponseHandling.py index 489aa267..558536b1 100644 --- a/modules/services/serviceAi/subJsonResponseHandling.py +++ b/modules/services/serviceAi/subJsonResponseHandling.py @@ -1239,13 +1239,76 @@ class JsonResponseHandler: # Count items/rows/elements based on type if isinstance(value, list): updatedKpi["currentValue"] = len(value) + logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: list with {len(value)} items") elif isinstance(value, (int, float)): updatedKpi["currentValue"] = int(value) + logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: numeric value {int(value)}") else: updatedKpi["currentValue"] = 0 + logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: non-list/non-numeric value, set to 0") except Exception as e: - logger.debug(f"Error extracting KPI {kpiId} from path {jsonPath}: {e}") + logger.warning(f"Error extracting KPI {kpiId} from path {jsonPath}: {e}") + updatedKpi["currentValue"] = kpi.get("currentValue", 0) + + updatedKpis.append(updatedKpi) + + return updatedKpis + + @staticmethod + def extractKpiValuesFromIncompleteJson( + jsonString: str, + kpis: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: + """ + Extract KPI values from incomplete JSON string. + Uses existing JSON completion function to close incomplete structures, then extracts KPIs. + + Args: + jsonString: Incomplete JSON string + kpis: List of KPI objects + + Returns: + Updated list of KPI objects with currentValue set + """ + updatedKpis = [] + + for kpi in kpis: + kpiId = kpi.get("id") + jsonPath = kpi.get("jsonPath") + + if not kpiId or not jsonPath: + continue + + updatedKpi = kpi.copy() + + try: + # Use existing JSON completion function to close incomplete structures + from modules.shared.jsonUtils import extractJsonString, closeJsonStructures + + # Extract JSON string and complete it with missing closing elements + extracted = extractJsonString(jsonString) + completed = closeJsonStructures(extracted) + + # Parse completed JSON + parsed = json.loads(completed) + + # Extract value using path + value = JsonResponseHandler._extractValueByPath(parsed, jsonPath) + + # Count items/rows/elements based on type + if isinstance(value, list): + updatedKpi["currentValue"] = len(value) + logger.debug(f"Extracted KPI {kpiId} from completed JSON: list with {len(value)} items") + elif isinstance(value, (int, float)): + updatedKpi["currentValue"] = int(value) + logger.debug(f"Extracted KPI {kpiId} from completed JSON: numeric value {int(value)}") + else: + updatedKpi["currentValue"] = 0 + logger.debug(f"Extracted KPI {kpiId} from completed JSON: non-list/non-numeric value, set to 0") + + except Exception as e: + logger.warning(f"Error extracting KPI {kpiId} from incomplete JSON: {e}") updatedKpi["currentValue"] = kpi.get("currentValue", 0) updatedKpis.append(updatedKpi) @@ -1313,6 +1376,8 @@ class JsonResponseHandler: # Build dict of last values for comparison lastValues = {kpi.get("id"): kpi.get("currentValue", 0) for kpi in accumulationState.kpis} + logger.debug(f"KPI validation: lastValues = {lastValues}") + logger.debug(f"KPI validation: updatedKpis = {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}") # Check if any KPI went backwards for updatedKpi in updatedKpis: @@ -1359,7 +1424,7 @@ class JsonResponseHandler: break if not atLeastOneProgressed: - logger.warning("No KPIs progressed") + logger.warning(f"No KPIs progressed. Last values: {lastValues}, Current values: {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}") return False, "No progress" return True, "Progress detected" diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py index 20152578..d2805ecb 100644 --- a/modules/shared/jsonUtils.py +++ b/modules/shared/jsonUtils.py @@ -128,7 +128,7 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]: for stepSize in [100, 50, 10, 1]: for i in range(len(text), 0, -stepSize): testStr = text[:i] - closedStr = _closeJsonStructures(testStr) + closedStr = closeJsonStructures(testStr) obj, err, _ = tryParseJson(closedStr) if err is None and isinstance(obj, dict): bestResult = obj @@ -161,7 +161,7 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]: return bestResult # Strategy 3: Structure closing - close incomplete structures - closedStr = _closeJsonStructures(text) + closedStr = closeJsonStructures(text) obj, err, _ = tryParseJson(closedStr) if err is None and isinstance(obj, dict): logger.info("Repaired JSON using structure closing") @@ -171,7 +171,7 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]: return None -def _closeJsonStructures(text: str) -> str: +def closeJsonStructures(text: str) -> str: """ Close incomplete JSON structures by adding missing closing brackets. """ @@ -212,7 +212,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]: for step_size in [1000, 500, 100, 50, 10]: for i in range(len(text), 0, -step_size): test_str = text[:i] - closed_str = _closeJsonStructures(test_str) + closed_str = closeJsonStructures(test_str) obj, err, _ = tryParseJson(closed_str) if err is None and isinstance(obj, dict): extracted_sections = extractSectionsFromDocument(obj) diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py index 218e3162..a2c93288 100644 --- a/modules/workflows/processing/adaptive/contentValidator.py +++ b/modules/workflows/processing/adaptive/contentValidator.py @@ -456,47 +456,35 @@ EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContex === VALIDATION INSTRUCTIONS === -VALIDATION CONTEXT: -You have METADATA (filename, format, size, mimeType) and STRUCTURE SUMMARY (if available: sections, tables, captions, IDs, statistics). +IMPORTANT: Different formats can represent the same data structure. Do not reject a format just because it differs from expected - check the structure summary for actual content. -VALIDATION PRINCIPLES: -1. EVIDENCE-BASED VALIDATION (CRITICAL): Claims must match structure evidence. If structure shows different values than claimed, trust the structure evidence, not claims. -2. INDEPENDENT CRITERIA EVALUATION (CRITICAL): For criteriaMapping reason field - address ONLY the specific criterion requirement. Do not mention other criteria or other issues. -3. PRIORITY: Missing data > Formatting issues. Always prioritize data completeness over format correctness. -4. Structure validation: Use structure summary (statistics, counts, structure metadata) as PRIMARY evidence. Compare with task requirements. -5. Discrepancy detection: If task requires specific quantities/amounts but structure shows different values, classify as missing_data or incomplete_data, not success. -6. Format compatibility: Match delivered format to expected format (secondary priority after data completeness) -7. Filename appropriateness: Check if filename suggests correct content type -8. Document count: Verify number matches expectations +VALIDATION RULES: +1. Use structure summary (sections, statistics, counts) as PRIMARY evidence. Trust structure over format claims. +2. For each criterion in criteriaMapping: evaluate ONLY that criterion. Do not mention other criteria. +3. Priority: Data completeness > Format compatibility. Missing data is more critical than format mismatch. +4. Format understanding: Different formats can represent equivalent data structures. Focus on content, not format name. +5. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help. -LIMITATIONS: -- Cannot validate: Content accuracy, data correctness, formatting details, or requirements requiring full content reading -- If structure summary unavailable, validate only metadata (format, filename, count, size) +VALIDATION STEPS: +- Check structure summary for quantities, counts, statistics +- Compare found values with required values from criteria +- If structure unavailable, use metadata only (format, filename, size) +- Classify gaps: missing_data (less than required), incomplete_data (partial), wrong_structure (wrong organization), wrong_format (format mismatch but data present) +- Assess if documents contain the required data: If structure shows documents lack the data, note this in findings - data must be generated or obtained elsewhere, not re-extracted from same documents -SCORING GUIDELINES: -- Data complete + format matches + structure matches requirements → qualityScore: 0.9-1.0 -- Data complete but format/structure issues → qualityScore: 0.7-0.9 -- Missing/incomplete data (even if format correct) → qualityScore: <0.7 -- Claims don't match structure evidence → qualityScore: <0.6 (trust structure, not claims) -- Format mismatch → qualityScore: <0.7 -- Only suggest improvements for CLEAR metadata/structure issues +SCORING: +- Data complete + structure matches → qualityScore: 0.9-1.0 +- Data complete but format issues → qualityScore: 0.7-0.9 +- Missing/incomplete data → qualityScore: <0.7 +- Format mismatch only (data present) → qualityScore: 0.6-0.7 -VALIDATION LOGIC: -- If structure shows fewer quantities/amounts than required → gapType: missing_data or incomplete_data -- If structure shows wrong organization but correct quantity → gapType: wrong_structure -- If structure matches requirements but format wrong → gapType: wrong_format -- If claims say "X delivered" but structure shows "Y" (Y < X) → overallSuccess: false, gapType: missing_data -- Always trust structure statistics over any claims or descriptions +SUGGESTIONS: +- ONE suggestion per UNMET criterion, ordered by criteriaMapping index +- Reference actual structure values found and required values +- Calculate quantitative gaps when numbers are available +- Be specific and actionable based on structure evidence -IMPROVEMENT SUGGESTIONS PRIORITY (CRITICAL): -- Create ONE suggestion per UNMET criterion from criteriaMapping -- Order suggestions by criteriaMapping index: [0] = first unmet criterion, [1] = second unmet criterion, etc. -- Each suggestion addresses ONLY that specific criterion requirement -- Do NOT combine multiple criteria into one suggestion -- ACTIONABLE GUIDANCE: Provide concrete, actionable steps based on the structure evidence. Avoid simply restating the requirement - instead, explain what action to perform to meet the criterion based on what was actually found -- EVIDENCE-BASED: Base suggestions on structure evidence, not assumptions. - -=== OUTPUT FORMAT (JSON TEMPLATE) === +=== OUTPUT FORMAT === {{ "overallSuccess": false, "qualityScore": 0.0, @@ -506,33 +494,28 @@ IMPROVEMENT SUGGESTIONS PRIORITY (CRITICAL): "criteriaMapping": [ {{ "index": 0, - "criterion": "exact_criterion_text_from_data_section", + "criterion": "exact_criterion_text", "met": false, - "reason": "explanation_about_this_criterion_based_on_structure_evidence" + "reason": "explanation_for_this_criterion_only" }} ], - "gapAnalysis": "Brief description of gaps based on metadata/structure only. If validation is limited, state this clearly.", + "gapAnalysis": "Brief gap summary", "gapType": "missing_data" | "wrong_structure" | "wrong_format" | "incomplete_data" | "no_gap", "structureComparison": {{ "required": {{}}, "found": {{}}, "gap": {{}} }}, - "improvementSuggestions": [], + "improvementSuggestions": ["One suggestion per unmet criterion"], "validationDetails": [ {{ - "documentName": "document.ext", - "issues": ["Issue inferred from metadata/structure only"], - "suggestions": ["Specific fix based on metadata/structure analysis"] + "documentName": "name.ext", + "issues": ["Specific issue"], + "suggestions": ["Specific fix"] }} ] }} -OUTPUT FORMAT NOTES: -- criteriaMapping reason: Address ONLY the specific criterion requirement. -- improvementSuggestions: ONE suggestion per UNMET criterion, ordered by criteriaMapping index. Do NOT combine criteria. -- improvementSuggestions: Each suggestion must reference actual structure values found, calculate quantitative gaps when structure provides numbers, and provide actionable guidance based on structure evidence. Avoid generic restatements of requirements. - === DATA === SUCCESS CRITERIA TO VALIDATE in criteriaMapping array: diff --git a/tests/functional/repaired_debug.json b/tests/functional/repaired_debug.json new file mode 100644 index 00000000..3dfe89d6 --- /dev/null +++ b/tests/functional/repaired_debug.json @@ -0,0 +1,35 @@ +{ + "metadata": { + "split_strategy": "single_document", + "source_documents": [], + "extraction_method": "ai_generation" + }, + "documents": [ + { + "sections": [ + { + "id": "section_prime_numbers_table", + "content_type": "table", + "elements": [ + { + "headers": [ + "Column 1", + "Column 2", + "Column 3", + "Column 4", + "Column 5", + "Column 6", + "Column 7", + "Column 8", + "Column 9", + "Column 10" + ], + "rows": [] + } + ], + "order": 0 + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/functional/test_kpi_fix.py b/tests/functional/test_kpi_fix.py new file mode 100644 index 00000000..1e864815 --- /dev/null +++ b/tests/functional/test_kpi_fix.py @@ -0,0 +1,86 @@ +"""Test KPI extraction fix with incomplete JSON""" +import json +import sys +import os + +# Add gateway directory to path +_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +if _gateway_path not in sys.path: + sys.path.insert(0, _gateway_path) + +from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler +from modules.datamodels.datamodelAi import JsonAccumulationState + +# Load actual incomplete JSON response +json_file = os.path.join( + os.path.dirname(__file__), + "..", "..", "..", "local", "debug", "prompts", + "20251130-211706-078-document_generation_response.txt" +) + +with open(json_file, 'r', encoding='utf-8') as f: + incompleteJsonString = f.read() + +# KPI definition +kpiDefinitions = [{ + "id": "prime_numbers_count", + "description": "Number of prime numbers generated and organized in the table", + "jsonPath": "documents[0].sections[0].elements[0].rows", + "targetValue": 4000 +}] + +print("="*60) +print("KPI EXTRACTION FIX TEST") +print("="*60) + +# Test 1: Extract from incomplete JSON string +print(f"\nTest 1: Extracting from incomplete JSON string...") +updatedKpis = JsonResponseHandler.extractKpiValuesFromIncompleteJson( + incompleteJsonString, + [{**kpi, "currentValue": 0} for kpi in kpiDefinitions] +) + +print(f" Result: {updatedKpis[0].get('currentValue', 'N/A')} rows") +print(f" Expected: ~400 rows (incomplete JSON)") + +# Test 2: Compare with repaired JSON +print(f"\nTest 2: Comparing with repaired JSON...") +from modules.shared.jsonUtils import extractJsonString, repairBrokenJson + +extracted = extractJsonString(incompleteJsonString) +repaired = repairBrokenJson(extracted) + +if repaired: + repairedKpis = JsonResponseHandler.extractKpiValuesFromJson( + repaired, + [{**kpi, "currentValue": 0} for kpi in kpiDefinitions] + ) + print(f" Repaired JSON: {repairedKpis[0].get('currentValue', 'N/A')} rows") + print(f" Incomplete JSON string: {updatedKpis[0].get('currentValue', 'N/A')} rows") + + if updatedKpis[0].get('currentValue', 0) > repairedKpis[0].get('currentValue', 0): + print(f" ✅ Fix works! Incomplete JSON string extraction found more data") + else: + print(f" ⚠️ Both methods found same or less data") + +# Test 3: Validate progression +print(f"\nTest 3: Testing KPI validation...") +accumulationState = JsonAccumulationState( + accumulatedJsonString=incompleteJsonString, + isAccumulationMode=True, + lastParsedResult=repaired, + allSections=[], + kpis=[{**kpi, "currentValue": 0} for kpi in kpiDefinitions] +) + +shouldProceed, reason = JsonResponseHandler.validateKpiProgression( + accumulationState, + updatedKpis +) + +print(f" Result: shouldProceed={shouldProceed}, reason={reason}") +if shouldProceed: + print(f" ✅ Validation passes - KPIs will progress correctly") +else: + print(f" ❌ Validation fails - {reason}") + diff --git a/tests/functional/test_kpi_full.py b/tests/functional/test_kpi_full.py new file mode 100644 index 00000000..2d73f4be --- /dev/null +++ b/tests/functional/test_kpi_full.py @@ -0,0 +1,95 @@ +"""Test full KPI extraction and validation flow""" +import json +import sys +import os + +# Add gateway directory to path +_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +if _gateway_path not in sys.path: + sys.path.insert(0, _gateway_path) + +from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler +from modules.datamodels.datamodelAi import JsonAccumulationState + +# Load actual JSON response +json_file = os.path.join( + os.path.dirname(__file__), + "..", "..", "..", "local", "debug", "prompts", + "20251130-211706-078-document_generation_response.txt" +) + +if not os.path.exists(json_file): + print(f"File not found: {json_file}") + sys.exit(1) + +with open(json_file, 'r', encoding='utf-8') as f: + content = f.read() + +# Extract JSON +from modules.shared.jsonUtils import extractJsonString +extracted = extractJsonString(content) +parsedJson = json.loads(extracted) + +# KPI definition from the response +kpiDefinitions = [{ + "id": "prime_numbers_count", + "description": "Number of prime numbers generated and organized in the table", + "jsonPath": "documents[0].sections[0].elements[0].rows", + "targetValue": 4000 +}] + +print("="*60) +print("KPI EXTRACTION AND VALIDATION TEST") +print("="*60) + +# Step 1: Initialize accumulation state with KPIs +accumulationState = JsonAccumulationState( + accumulatedJsonString="", + isAccumulationMode=True, + lastParsedResult=None, + allSections=[], + kpis=[{**kpi, "currentValue": 0} for kpi in kpiDefinitions] +) + +print(f"\nStep 1: Initialized KPIs") +for kpi in accumulationState.kpis: + print(f" KPI {kpi['id']}: currentValue={kpi.get('currentValue', 'N/A')}, targetValue={kpi.get('targetValue', 'N/A')}") + +# Step 2: Extract KPI values from parsed JSON +print(f"\nStep 2: Extracting KPI values from JSON...") +updatedKpis = JsonResponseHandler.extractKpiValuesFromJson( + parsedJson, + accumulationState.kpis +) + +print(f" Extracted {len(updatedKpis)} KPIs") +for kpi in updatedKpis: + print(f" KPI {kpi['id']}: currentValue={kpi.get('currentValue', 'N/A')}, targetValue={kpi.get('targetValue', 'N/A')}") + +# Step 3: Validate progression +print(f"\nStep 3: Validating KPI progression...") +shouldProceed, reason = JsonResponseHandler.validateKpiProgression( + accumulationState, + updatedKpis +) + +print(f" Result: shouldProceed={shouldProceed}, reason={reason}") + +# Step 4: Check what's in accumulationState.kpis vs updatedKpis +print(f"\nStep 4: Comparing state...") +print(f" accumulationState.kpis[0].currentValue = {accumulationState.kpis[0].get('currentValue', 'N/A')}") +print(f" updatedKpis[0].currentValue = {updatedKpis[0].get('currentValue', 'N/A')}") + +# Step 5: Check if we need to update accumulationState.kpis +print(f"\nStep 5: Updating accumulationState.kpis...") +accumulationState.kpis = updatedKpis +print(f" Updated accumulationState.kpis[0].currentValue = {accumulationState.kpis[0].get('currentValue', 'N/A')}") + +# Step 6: Validate again (should show progress) +print(f"\nStep 6: Validating again after update...") +shouldProceed2, reason2 = JsonResponseHandler.validateKpiProgression( + accumulationState, + updatedKpis +) +print(f" Result: shouldProceed={shouldProceed2}, reason={reason2}") + diff --git a/tests/functional/test_kpi_incomplete.py b/tests/functional/test_kpi_incomplete.py new file mode 100644 index 00000000..e308246f --- /dev/null +++ b/tests/functional/test_kpi_incomplete.py @@ -0,0 +1,133 @@ +"""Test KPI extraction with incomplete JSON""" +import json +import sys +import os + +# Add gateway directory to path +_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +if _gateway_path not in sys.path: + sys.path.insert(0, _gateway_path) + +from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler +from modules.datamodels.datamodelAi import JsonAccumulationState +from modules.shared.jsonUtils import extractJsonString, repairBrokenJson + +# Load actual incomplete JSON response +json_file = os.path.join( + os.path.dirname(__file__), + "..", "..", "..", "local", "debug", "prompts", + "20251130-211706-078-document_generation_response.txt" +) + +if not os.path.exists(json_file): + print(f"File not found: {json_file}") + sys.exit(1) + +with open(json_file, 'r', encoding='utf-8') as f: + content = f.read() + +print("="*60) +print("KPI EXTRACTION WITH INCOMPLETE JSON TEST") +print("="*60) + +# Step 1: Try to extract and parse JSON +print(f"\nStep 1: Extracting JSON string...") +extracted = extractJsonString(content) +print(f" Extracted length: {len(extracted)} chars") + +# Step 2: Try to parse +print(f"\nStep 2: Attempting to parse...") +parsedJson = None +try: + parsedJson = json.loads(extracted) + print(f" ✅ JSON parsed successfully") +except json.JSONDecodeError as e: + print(f" ❌ JSON parsing failed: {e}") + print(f" Attempting repair...") + try: + parsedJson = repairBrokenJson(extracted) + if parsedJson: + print(f" ✅ JSON repaired successfully") + else: + print(f" ❌ JSON repair failed") + except Exception as e2: + print(f" ❌ Repair error: {e2}") + +if not parsedJson: + print("\n❌ Cannot proceed - JSON cannot be parsed or repaired") + sys.exit(1) + +# Step 3: Check if path exists +print(f"\nStep 3: Checking if KPI path exists...") +path = "documents[0].sections[0].elements[0].rows" +try: + value = JsonResponseHandler._extractValueByPath(parsedJson, path) + print(f" ✅ Path exists: {type(value)}") + if isinstance(value, list): + print(f" ✅ Value is list with {len(value)} items") + if len(value) > 0: + print(f" ✅ First item: {value[0]}") + else: + print(f" ⚠️ Value is not a list: {value}") +except Exception as e: + print(f" ❌ Path extraction failed: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + +# Step 4: Test KPI extraction +print(f"\nStep 4: Testing KPI extraction...") +kpiDefinitions = [{ + "id": "prime_numbers_count", + "description": "Number of prime numbers generated and organized in the table", + "jsonPath": "documents[0].sections[0].elements[0].rows", + "targetValue": 4000 +}] + +accumulationState = JsonAccumulationState( + accumulatedJsonString="", + isAccumulationMode=True, + lastParsedResult=parsedJson, + allSections=[], + kpis=[{**kpi, "currentValue": 0} for kpi in kpiDefinitions] +) + +print(f" Initial KPI currentValue: {accumulationState.kpis[0].get('currentValue', 'N/A')}") + +updatedKpis = JsonResponseHandler.extractKpiValuesFromJson( + parsedJson, + accumulationState.kpis +) + +print(f" Updated KPI currentValue: {updatedKpis[0].get('currentValue', 'N/A')}") + +# Step 5: Test validation +print(f"\nStep 5: Testing KPI validation...") +shouldProceed, reason = JsonResponseHandler.validateKpiProgression( + accumulationState, + updatedKpis +) + +print(f" Result: shouldProceed={shouldProceed}, reason={reason}") + +if not shouldProceed: + print(f"\n❌ VALIDATION FAILED - This is the problem!") + print(f" Let's debug why...") + + # Check what's being compared + lastValues = {kpi.get("id"): kpi.get("currentValue", 0) for kpi in accumulationState.kpis} + print(f" Last values from accumulationState: {lastValues}") + + for updatedKpi in updatedKpis: + kpiId = updatedKpi.get("id") + currentValue = updatedKpi.get("currentValue", 0) + print(f" Updated KPI {kpiId}: currentValue={currentValue}") + + if kpiId in lastValues: + lastValue = lastValues[kpiId] + print(f" Comparing: {lastValue} vs {currentValue}") + if currentValue > lastValue: + print(f" ✅ Should detect progress!") + else: + print(f" ❌ No progress detected (currentValue <= lastValue)") + diff --git a/tests/functional/test_kpi_path.py b/tests/functional/test_kpi_path.py new file mode 100644 index 00000000..7be6dba8 --- /dev/null +++ b/tests/functional/test_kpi_path.py @@ -0,0 +1,66 @@ +"""Test KPI path extraction""" +import json +import sys +import os + +# Add gateway directory to path +_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +if _gateway_path not in sys.path: + sys.path.insert(0, _gateway_path) + +from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler + +# Test JSON matching the actual response +test_json = { + "metadata": { + "split_strategy": "single_document", + "source_documents": [], + "extraction_method": "ai_generation" + }, + "documents": [ + { + "id": "doc_1", + "title": "Prime Numbers Table", + "filename": "prime_numbers.json", + "sections": [ + { + "id": "section_prime_numbers_table", + "content_type": "table", + "elements": [ + { + "headers": ["Column 1", "Column 2"], + "rows": [ + [2, 3, 5, 7, 11], + [13, 17, 19, 23, 29] + ] + } + ] + } + ] + } + ] +} + +# Test path from KPI definition +path = "documents[0].sections[0].elements[0].rows" + +print(f"Testing path: {path}") +print(f"JSON structure: documents[0].sections[0].elements[0].rows") +print() + +try: + value = JsonResponseHandler._extractValueByPath(test_json, path) + print(f"✅ Extracted value: {type(value)}") + print(f" Value: {value}") + + if isinstance(value, list): + count = len(value) + print(f" Count: {count}") + else: + print(f" Not a list!") + +except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + diff --git a/tests/functional/test_repair_debug.py b/tests/functional/test_repair_debug.py new file mode 100644 index 00000000..1e60d725 --- /dev/null +++ b/tests/functional/test_repair_debug.py @@ -0,0 +1,58 @@ +"""Debug what repairBrokenJson returns""" +import json +import sys +import os + +# Add gateway directory to path +_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +if _gateway_path not in sys.path: + sys.path.insert(0, _gateway_path) + +from modules.shared.jsonUtils import extractJsonString, repairBrokenJson + +# Load actual incomplete JSON response +json_file = os.path.join( + os.path.dirname(__file__), + "..", "..", "..", "local", "debug", "prompts", + "20251130-211706-078-document_generation_response.txt" +) + +with open(json_file, 'r', encoding='utf-8') as f: + content = f.read() + +extracted = extractJsonString(content) +print(f"Extracted JSON length: {len(extracted)} chars") +print(f"Last 200 chars: {extracted[-200:]}") + +repaired = repairBrokenJson(extracted) +if repaired: + print(f"\nRepaired JSON structure:") + print(f" Has 'documents': {'documents' in repaired}") + if 'documents' in repaired and isinstance(repaired['documents'], list) and len(repaired['documents']) > 0: + doc = repaired['documents'][0] + print(f" Has 'sections': {'sections' in doc}") + if 'sections' in doc and isinstance(doc['sections'], list) and len(doc['sections']) > 0: + section = doc['sections'][0] + print(f" Has 'elements': {'elements' in section}") + if 'elements' in section and isinstance(section['elements'], list) and len(section['elements']) > 0: + element = section['elements'][0] + print(f" Has 'rows': {'rows' in element}") + if 'rows' in element: + rows = element['rows'] + print(f" Rows type: {type(rows)}") + if isinstance(rows, list): + print(f" Rows count: {len(rows)}") + if len(rows) > 0: + print(f" First row: {rows[0]}") + print(f" Last row: {rows[-1]}") + else: + print(f" Rows value: {rows}") + + # Save to file for inspection + output_file = os.path.join(os.path.dirname(__file__), "repaired_debug.json") + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(repaired, f, indent=2, ensure_ascii=False) + print(f"\nSaved repaired JSON to: {output_file}") +else: + print("Repair failed") +