gateway/tests/test_overlap_context.py

216 lines
9.2 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Test function to verify structure hierarchy and overlap context generation.
Tests the functions used to generate continuation prompts for incomplete JSON.
"""
import json
import os
from pathlib import Path
def testOverlapContext():
"""
Test function that loads two JSON parts and returns:
1. Structure hierarchy result
2. Overlap requirement context result
"""
# Load the JSON file (incomplete/cut JSON)
basePath = Path(__file__).parent.parent.parent / "local" / "debug" / "prompts"
file1Path = basePath / "20260104-220716-032-chapter_2_section_section_2_response.txt"
# Read JSON (incomplete)
with open(file1Path, 'r', encoding='utf-8') as f:
json1Content = f.read().strip()
# Find the break position in json1 (where it was cut)
# The last line in json1 is incomplete: [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039
# We need to find where this incomplete array element ends (right after the last number)
# Find the last number in the file - that's where the content actually ends
import re
# Find all numbers at the end and get the position of the last one
# Look for the pattern: number followed by whitespace/newline or end of string
matches = list(re.finditer(r'\d+', json1Content))
if matches:
lastMatch = matches[-1]
# Break position is right after the last number (where the closing ] should be)
breakPosition = lastMatch.end()
else:
# Fallback: use end of file
breakPosition = len(json1Content.rstrip())
print(f"Break position determined: {breakPosition}")
print(f"Content at break position: '{json1Content[max(0, breakPosition-50):breakPosition+10]}'")
# Import the functions we need to test
import sys
sys.path.insert(0, str(Path(__file__).parent.parent))
from modules.shared.jsonUtils import findStructureHierarchy, extractCutPiece, buildIncompleteContext
from modules.services.serviceGeneration.paths.codePath import CodeGenerationPath
# Test 1: Find structure hierarchy
print("=" * 80)
print("TEST 1: Structure Hierarchy")
print("=" * 80)
print(f"Break position: {breakPosition}")
print(f"JSON length: {len(json1Content)}")
print(f"Content around break: '{json1Content[max(0, breakPosition-100):breakPosition+20]}'")
hierarchy = findStructureHierarchy(json1Content, breakPosition)
print(f"\nHierarchy levels found: {len(hierarchy) if hierarchy else 0}")
if not hierarchy:
print("WARNING: No hierarchy found! This suggests the function isn't working correctly.")
else:
print("\nHierarchy details (from root to cut level):")
for i, level in enumerate(hierarchy):
levelType = level['type']
levelKey = level.get('key', 'N/A')
levelDepth = level['depth']
levelStart = level['start_pos']
levelEnd = level['end_pos']
print(f" Level {i}: {levelType:6s} depth={levelDepth} key='{levelKey}' start={levelStart} end={levelEnd}")
# Show a snippet of content at this level
if levelStart < len(json1Content):
snippet = json1Content[levelStart:min(levelStart + 50, levelEnd, len(json1Content))]
print(f" Content: {repr(snippet)}")
# Test 2: Extract cut piece
print("\n" + "=" * 80)
print("TEST 2: Extract Cut Piece")
print("=" * 80)
cutPiece = extractCutPiece(json1Content, breakPosition)
print(f"\nCut piece extracted (length: {len(cutPiece)}):")
if cutPiece:
print(cutPiece[:500] if len(cutPiece) > 500 else cutPiece)
else:
print("WARNING: Cut piece is empty! This suggests the function isn't working correctly.")
# Try to manually find the cut piece
# Look backwards from break position for the start of the incomplete array
i = breakPosition - 1
while i >= 0 and json1Content[i] not in ['[', ',', '\n']:
i -= 1
if i >= 0 and json1Content[i] == '[':
manualCutPiece = json1Content[i:breakPosition]
print(f"\nManually found cut piece: {manualCutPiece[:200]}")
# Test 3: Build incomplete context (structure hierarchy with cut point)
print("\n" + "=" * 80)
print("TEST 3: Build Incomplete Context (Structure Hierarchy with Cut Point)")
print("=" * 80)
print("Expected: Should show complete hierarchy from root to cut point")
print(" with complete elements before cut and cut piece marked")
incompleteContext = buildIncompleteContext(json1Content, breakPosition)
print(f"\nIncomplete context (length: {len(incompleteContext)} chars):")
print("-" * 80)
print(incompleteContext)
print("-" * 80)
# Validate the output
if incompleteContext:
# Check if it shows hierarchy (should have multiple levels of indentation)
lines = incompleteContext.split('\n')
indentLevels = set()
for line in lines:
if line.strip():
indent = len(line) - len(line.lstrip())
indentLevels.add(indent)
print(f"\nValidation: Found {len(indentLevels)} different indent levels (should be > 1 for hierarchy)")
# Check if cut point is marked
if "<-- CUT POINT" in incompleteContext:
print("Validation: Cut point marker found ✓")
else:
print("Validation: WARNING - Cut point marker NOT found!")
# Check if root structure is shown
if incompleteContext.strip().startswith('{') or incompleteContext.strip().startswith('['):
print("Validation: Root structure opening found ✓")
else:
print("Validation: WARNING - Root structure opening NOT found!")
else:
print("WARNING: Incomplete context is empty!")
# Test 4: Extract overlap context (cut part and full part before same level)
print("\n" + "=" * 80)
print("TEST 4: Extract Overlap Context (Cut Part + Full Part Before Same Level)")
print("=" * 80)
overlapContext = CodeGenerationPath._extractOverlapContext(json1Content, breakPosition)
print(f"\nOverlap context:")
print(overlapContext)
# Return results as dictionary
results = {
"hierarchy": hierarchy,
"cutPiece": cutPiece,
"incompleteContext": incompleteContext,
"overlapContext": overlapContext,
"breakPosition": breakPosition,
"json1Length": len(json1Content),
"json1Content": json1Content
}
return results
if __name__ == "__main__":
print("Testing Overlap Context Generation")
print("=" * 80)
results = testOverlapContext()
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
print(f"\nBreak position: {results['breakPosition']}")
print(f"JSON1 length: {results['json1Length']}")
print(f"Hierarchy levels: {len(results['hierarchy']) if results['hierarchy'] else 0}")
print(f"Cut piece length: {len(results['cutPiece'])}")
print(f"Incomplete context length: {len(results['incompleteContext'])}")
print(f"Overlap context length: {len(results['overlapContext'])}")
# Save results to file for inspection
outputPath = Path(__file__).parent.parent.parent / "local" / "debug" / "test_overlap_results.txt"
outputPath.parent.mkdir(parents=True, exist_ok=True)
with open(outputPath, 'w', encoding='utf-8') as f:
f.write("=" * 80 + "\n")
f.write("OVERLAP CONTEXT TEST RESULTS\n")
f.write("=" * 80 + "\n\n")
f.write("FIRST JSON (CUT/INCOMPLETE):\n")
f.write("-" * 80 + "\n")
f.write(f"Break position: {results['breakPosition']}\n")
f.write(f"JSON length: {results['json1Length']}\n")
json1Content = results['json1Content']
f.write(f"Content around break: '{json1Content[max(0, results['breakPosition']-100):results['breakPosition']+20]}'\n\n")
f.write("Full JSON1 content:\n")
f.write(json1Content)
f.write("\n\n" + "=" * 80 + "\n")
f.write("STRUCTURE HIERARCHY:\n")
f.write("-" * 80 + "\n")
if results['hierarchy']:
f.write(f"Hierarchy levels found: {len(results['hierarchy'])}\n\n")
f.write("Hierarchy details (from root to cut level):\n")
for i, level in enumerate(results['hierarchy']):
levelType = level['type']
levelKey = level.get('key', 'N/A')
levelDepth = level['depth']
levelStart = level['start_pos']
levelEnd = level['end_pos']
f.write(f" Level {i}: {levelType:6s} depth={levelDepth} key='{levelKey}' start={levelStart} end={levelEnd}\n")
else:
f.write("No hierarchy found\n")
f.write("\n\n" + "=" * 80 + "\n")
f.write("INCOMPLETE CONTEXT (Structure Hierarchy with Cut Point):\n")
f.write("-" * 80 + "\n")
f.write(results['incompleteContext'])
f.write("\n\n" + "=" * 80 + "\n")
f.write("OVERLAP CONTEXT (Object containing the cut element):\n")
f.write("-" * 80 + "\n")
f.write(results['overlapContext'])
print(f"\n\nFull results saved to: {outputPath}")