216 lines
9.2 KiB
Python
216 lines
9.2 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Test function to verify structure hierarchy and overlap context generation.
|
|
Tests the functions used to generate continuation prompts for incomplete JSON.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
|
|
|
|
def testOverlapContext():
|
|
"""
|
|
Test function that loads two JSON parts and returns:
|
|
1. Structure hierarchy result
|
|
2. Overlap requirement context result
|
|
"""
|
|
# Load the JSON file (incomplete/cut JSON)
|
|
basePath = Path(__file__).parent.parent.parent / "local" / "debug" / "prompts"
|
|
|
|
file1Path = basePath / "20260104-220716-032-chapter_2_section_section_2_response.txt"
|
|
|
|
# Read JSON (incomplete)
|
|
with open(file1Path, 'r', encoding='utf-8') as f:
|
|
json1Content = f.read().strip()
|
|
|
|
# Find the break position in json1 (where it was cut)
|
|
# The last line in json1 is incomplete: [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039
|
|
# We need to find where this incomplete array element ends (right after the last number)
|
|
# Find the last number in the file - that's where the content actually ends
|
|
import re
|
|
# Find all numbers at the end and get the position of the last one
|
|
# Look for the pattern: number followed by whitespace/newline or end of string
|
|
matches = list(re.finditer(r'\d+', json1Content))
|
|
if matches:
|
|
lastMatch = matches[-1]
|
|
# Break position is right after the last number (where the closing ] should be)
|
|
breakPosition = lastMatch.end()
|
|
else:
|
|
# Fallback: use end of file
|
|
breakPosition = len(json1Content.rstrip())
|
|
|
|
print(f"Break position determined: {breakPosition}")
|
|
print(f"Content at break position: '{json1Content[max(0, breakPosition-50):breakPosition+10]}'")
|
|
|
|
# Import the functions we need to test
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from modules.shared.jsonUtils import findStructureHierarchy, extractCutPiece, buildIncompleteContext
|
|
from modules.services.serviceGeneration.paths.codePath import CodeGenerationPath
|
|
|
|
# Test 1: Find structure hierarchy
|
|
print("=" * 80)
|
|
print("TEST 1: Structure Hierarchy")
|
|
print("=" * 80)
|
|
print(f"Break position: {breakPosition}")
|
|
print(f"JSON length: {len(json1Content)}")
|
|
print(f"Content around break: '{json1Content[max(0, breakPosition-100):breakPosition+20]}'")
|
|
hierarchy = findStructureHierarchy(json1Content, breakPosition)
|
|
print(f"\nHierarchy levels found: {len(hierarchy) if hierarchy else 0}")
|
|
if not hierarchy:
|
|
print("WARNING: No hierarchy found! This suggests the function isn't working correctly.")
|
|
else:
|
|
print("\nHierarchy details (from root to cut level):")
|
|
for i, level in enumerate(hierarchy):
|
|
levelType = level['type']
|
|
levelKey = level.get('key', 'N/A')
|
|
levelDepth = level['depth']
|
|
levelStart = level['start_pos']
|
|
levelEnd = level['end_pos']
|
|
print(f" Level {i}: {levelType:6s} depth={levelDepth} key='{levelKey}' start={levelStart} end={levelEnd}")
|
|
# Show a snippet of content at this level
|
|
if levelStart < len(json1Content):
|
|
snippet = json1Content[levelStart:min(levelStart + 50, levelEnd, len(json1Content))]
|
|
print(f" Content: {repr(snippet)}")
|
|
|
|
# Test 2: Extract cut piece
|
|
print("\n" + "=" * 80)
|
|
print("TEST 2: Extract Cut Piece")
|
|
print("=" * 80)
|
|
cutPiece = extractCutPiece(json1Content, breakPosition)
|
|
print(f"\nCut piece extracted (length: {len(cutPiece)}):")
|
|
if cutPiece:
|
|
print(cutPiece[:500] if len(cutPiece) > 500 else cutPiece)
|
|
else:
|
|
print("WARNING: Cut piece is empty! This suggests the function isn't working correctly.")
|
|
# Try to manually find the cut piece
|
|
# Look backwards from break position for the start of the incomplete array
|
|
i = breakPosition - 1
|
|
while i >= 0 and json1Content[i] not in ['[', ',', '\n']:
|
|
i -= 1
|
|
if i >= 0 and json1Content[i] == '[':
|
|
manualCutPiece = json1Content[i:breakPosition]
|
|
print(f"\nManually found cut piece: {manualCutPiece[:200]}")
|
|
|
|
# Test 3: Build incomplete context (structure hierarchy with cut point)
|
|
print("\n" + "=" * 80)
|
|
print("TEST 3: Build Incomplete Context (Structure Hierarchy with Cut Point)")
|
|
print("=" * 80)
|
|
print("Expected: Should show complete hierarchy from root to cut point")
|
|
print(" with complete elements before cut and cut piece marked")
|
|
incompleteContext = buildIncompleteContext(json1Content, breakPosition)
|
|
print(f"\nIncomplete context (length: {len(incompleteContext)} chars):")
|
|
print("-" * 80)
|
|
print(incompleteContext)
|
|
print("-" * 80)
|
|
|
|
# Validate the output
|
|
if incompleteContext:
|
|
# Check if it shows hierarchy (should have multiple levels of indentation)
|
|
lines = incompleteContext.split('\n')
|
|
indentLevels = set()
|
|
for line in lines:
|
|
if line.strip():
|
|
indent = len(line) - len(line.lstrip())
|
|
indentLevels.add(indent)
|
|
print(f"\nValidation: Found {len(indentLevels)} different indent levels (should be > 1 for hierarchy)")
|
|
|
|
# Check if cut point is marked
|
|
if "<-- CUT POINT" in incompleteContext:
|
|
print("Validation: Cut point marker found ✓")
|
|
else:
|
|
print("Validation: WARNING - Cut point marker NOT found!")
|
|
|
|
# Check if root structure is shown
|
|
if incompleteContext.strip().startswith('{') or incompleteContext.strip().startswith('['):
|
|
print("Validation: Root structure opening found ✓")
|
|
else:
|
|
print("Validation: WARNING - Root structure opening NOT found!")
|
|
else:
|
|
print("WARNING: Incomplete context is empty!")
|
|
|
|
# Test 4: Extract overlap context (cut part and full part before same level)
|
|
print("\n" + "=" * 80)
|
|
print("TEST 4: Extract Overlap Context (Cut Part + Full Part Before Same Level)")
|
|
print("=" * 80)
|
|
overlapContext = CodeGenerationPath._extractOverlapContext(json1Content, breakPosition)
|
|
print(f"\nOverlap context:")
|
|
print(overlapContext)
|
|
|
|
# Return results as dictionary
|
|
results = {
|
|
"hierarchy": hierarchy,
|
|
"cutPiece": cutPiece,
|
|
"incompleteContext": incompleteContext,
|
|
"overlapContext": overlapContext,
|
|
"breakPosition": breakPosition,
|
|
"json1Length": len(json1Content),
|
|
"json1Content": json1Content
|
|
}
|
|
|
|
return results
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("Testing Overlap Context Generation")
|
|
print("=" * 80)
|
|
results = testOverlapContext()
|
|
|
|
print("\n" + "=" * 80)
|
|
print("SUMMARY")
|
|
print("=" * 80)
|
|
print(f"\nBreak position: {results['breakPosition']}")
|
|
print(f"JSON1 length: {results['json1Length']}")
|
|
print(f"Hierarchy levels: {len(results['hierarchy']) if results['hierarchy'] else 0}")
|
|
print(f"Cut piece length: {len(results['cutPiece'])}")
|
|
print(f"Incomplete context length: {len(results['incompleteContext'])}")
|
|
print(f"Overlap context length: {len(results['overlapContext'])}")
|
|
|
|
# Save results to file for inspection
|
|
outputPath = Path(__file__).parent.parent.parent / "local" / "debug" / "test_overlap_results.txt"
|
|
outputPath.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
with open(outputPath, 'w', encoding='utf-8') as f:
|
|
f.write("=" * 80 + "\n")
|
|
f.write("OVERLAP CONTEXT TEST RESULTS\n")
|
|
f.write("=" * 80 + "\n\n")
|
|
|
|
f.write("FIRST JSON (CUT/INCOMPLETE):\n")
|
|
f.write("-" * 80 + "\n")
|
|
f.write(f"Break position: {results['breakPosition']}\n")
|
|
f.write(f"JSON length: {results['json1Length']}\n")
|
|
json1Content = results['json1Content']
|
|
f.write(f"Content around break: '{json1Content[max(0, results['breakPosition']-100):results['breakPosition']+20]}'\n\n")
|
|
f.write("Full JSON1 content:\n")
|
|
f.write(json1Content)
|
|
|
|
f.write("\n\n" + "=" * 80 + "\n")
|
|
f.write("STRUCTURE HIERARCHY:\n")
|
|
f.write("-" * 80 + "\n")
|
|
if results['hierarchy']:
|
|
f.write(f"Hierarchy levels found: {len(results['hierarchy'])}\n\n")
|
|
f.write("Hierarchy details (from root to cut level):\n")
|
|
for i, level in enumerate(results['hierarchy']):
|
|
levelType = level['type']
|
|
levelKey = level.get('key', 'N/A')
|
|
levelDepth = level['depth']
|
|
levelStart = level['start_pos']
|
|
levelEnd = level['end_pos']
|
|
f.write(f" Level {i}: {levelType:6s} depth={levelDepth} key='{levelKey}' start={levelStart} end={levelEnd}\n")
|
|
else:
|
|
f.write("No hierarchy found\n")
|
|
|
|
f.write("\n\n" + "=" * 80 + "\n")
|
|
f.write("INCOMPLETE CONTEXT (Structure Hierarchy with Cut Point):\n")
|
|
f.write("-" * 80 + "\n")
|
|
f.write(results['incompleteContext'])
|
|
|
|
f.write("\n\n" + "=" * 80 + "\n")
|
|
f.write("OVERLAP CONTEXT (Object containing the cut element):\n")
|
|
f.write("-" * 80 + "\n")
|
|
f.write(results['overlapContext'])
|
|
|
|
print(f"\n\nFull results saved to: {outputPath}")
|