gateway/tests/functional/test07_json_merge.py
2026-01-22 21:11:25 +01:00

910 lines
36 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Test JSON string accumulation for broken JSON iterations - String accumulation approach"""
import json
import sys
import os
# Add gateway directory to path (go up 2 levels from tests/functional/)
_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if _gateway_path not in sys.path:
sys.path.insert(0, _gateway_path)
# Import after path setup
from modules.aichat.serviceAi.subJsonResponseHandling import JsonResponseHandler # type: ignore
from modules.shared.jsonUtils import extractSectionsFromDocument # type: ignore
def createBigJsonStructure():
"""Create a comprehensive JSON structure with various content types"""
return {
"documents": [{
"documentName": "test_document.json",
"sections": [
{
"id": "section_bullet_list",
"content_type": "bullet_list",
"order": 0,
"elements": [{
"items": [f"item_{i}" for i in range(1, 21)] # 20 items
}]
},
{
"id": "section_table",
"content_type": "table",
"order": 1,
"elements": [{
"headers": ["ID", "Name", "Age", "City"],
"rows": [
["1", "Alice", "25", "New York"],
["2", "Bob", "30", "London"],
["3", "Charlie", "35", "Paris"],
["4", "Diana", "28", "Berlin"],
["5", "Eve", "32", "Tokyo"],
["6", "Frank", "27", "Sydney"],
["7", "Grace", "29", "Toronto"],
["8", "Henry", "31", "Madrid"]
]
}]
},
{
"id": "section_code_block",
"content_type": "code_block",
"order": 2,
"elements": [{
"code": "def calculate_sum(numbers):\n result = 0\n for num in numbers:\n result += num\n return result\n\ndef calculate_product(numbers):\n result = 1\n for num in numbers:\n result *= num\n return result",
"language": "python"
}]
}
]
}]
}
def createComplexJsonStructure():
"""Create a more complex and longer JSON structure for advanced testing"""
return {
"documents": [{
"documentName": "complex_test_document.json",
"sections": [
{
"id": "section_large_list",
"content_type": "bullet_list",
"order": 0,
"elements": [{
"items": [f"product_{i:04d}" for i in range(1, 101)] # 100 items
}]
},
{
"id": "section_nested_structure",
"content_type": "nested_list",
"order": 1,
"elements": [{
"categories": [
{
"name": "Category A",
"subcategories": [
{"name": "Sub A1", "items": [f"item_a1_{i}" for i in range(1, 21)]},
{"name": "Sub A2", "items": [f"item_a2_{i}" for i in range(1, 16)]}
]
},
{
"name": "Category B",
"subcategories": [
{"name": "Sub B1", "items": [f"item_b1_{i}" for i in range(1, 25)]},
{"name": "Sub B2", "items": [f"item_b2_{i}" for i in range(1, 18)]}
]
}
]
}]
},
{
"id": "section_large_table",
"content_type": "table",
"order": 2,
"elements": [{
"headers": ["ID", "Name", "Email", "Department", "Salary", "StartDate"],
"rows": [
[f"{i}", f"Employee_{i:03d}", f"emp{i}@company.com", f"Dept{(i % 5) + 1}", f"{(50000 + i * 1000)}", f"2024-{(i % 12) + 1:02d}-15"]
for i in range(1, 51) # 50 rows
]
}]
},
{
"id": "section_code_blocks",
"content_type": "code_block",
"order": 3,
"elements": [
{
"code": "class DataProcessor:\n def __init__(self, config):\n self.config = config\n self.cache = {}\n \n def process(self, data):\n result = []\n for item in data:\n processed = self.transform(item)\n result.append(processed)\n return result\n \n def transform(self, item):\n return item.upper() if isinstance(item, str) else item",
"language": "python"
},
{
"code": "function calculateStatistics(data) {\n const stats = {\n mean: 0,\n median: 0,\n mode: null,\n stdDev: 0\n };\n \n if (data.length === 0) return stats;\n \n const sum = data.reduce((a, b) => a + b, 0);\n stats.mean = sum / data.length;\n \n const sorted = [...data].sort((a, b) => a - b);\n const mid = Math.floor(sorted.length / 2);\n stats.median = sorted.length % 2 === 0\n ? (sorted[mid - 1] + sorted[mid]) / 2\n : sorted[mid];\n \n return stats;\n}",
"language": "javascript"
}
]
},
{
"id": "section_mixed_content",
"content_type": "mixed",
"order": 4,
"elements": [{
"paragraphs": [
"This is a long paragraph that contains multiple sentences. " * 5,
"Another paragraph with different content. " * 8,
"Yet another paragraph for testing purposes. " * 10
],
"highlights": [f"Highlight {i}" for i in range(1, 31)], # 30 highlights
"metadata": {
"author": "Test Author",
"version": "1.0.0",
"tags": [f"tag_{i}" for i in range(1, 21)], # 20 tags
"references": [f"ref_{i:03d}" for i in range(1, 16)] # 15 references
}
}]
}
]
}]
}
def testPattern1_ArraySliced():
"""Test Pattern 1: Slice JSON string containing array into multiple pieces - String accumulation"""
print("\n" + "="*60)
print("PATTERN 1: Array Sliced into Multiple Pieces (String Accumulation)")
print("="*60)
# Create big JSON structure - use FULL document structure
bigJson = createBigJsonStructure()
# Convert FULL document to JSON string (not just section)
jsonStr = json.dumps(bigJson, ensure_ascii=False)
print(f"Full JSON string length: {len(jsonStr)} chars")
# Find where to slice - look for item_8 in the items array
itemsArrayStart = jsonStr.find('"items": [')
item8Pos = jsonStr.find('"item_8"', itemsArrayStart)
item15Pos = jsonStr.find('"item_15"', itemsArrayStart)
# Slice into 3 pieces (simulating 3 iterations)
# Piece 1: Cut after item_8 (incomplete)
cut1 = item8Pos + len('"item_8"')
piece1 = jsonStr[:cut1]
# Piece 2: Continue from item_8, cut after item_15 (incomplete, overlaps with item_8)
cut2 = item15Pos + len('"item_15"')
piece2 = jsonStr[cut1 - len('"item_8"'):cut2] # Overlap + continuation
# Piece 3: Continue from item_15 to end (overlaps with item_15)
piece3 = jsonStr[cut2 - len('"item_15"'):]
print(f"Piece 1 length: {len(piece1)} chars (cut at: {cut1})")
print(f"Piece 2 length: {len(piece2)} chars")
print(f"Piece 3 length: {len(piece3)} chars")
# Step 1: Iteration 1 - Start accumulation with piece1
accumulatedJsonString = piece1
allSections = []
print(f"Iteration 1: Starting accumulation with {len(accumulatedJsonString)} chars")
# Step 2: Iteration 2 - Accumulate piece2
accumulatedJsonString, iter2_sections, isComplete2, parsedResult2 = \
JsonResponseHandler.accumulateAndParseJsonFragments(
accumulatedJsonString,
piece2,
allSections,
2
)
if iter2_sections:
allSections = iter2_sections
print(f"Iteration 2: Accumulated, {len(allSections)} sections, complete={isComplete2}")
# Step 3: Iteration 3 - Accumulate piece3
accumulatedJsonString, iter3_sections, isComplete3, parsedResult3 = \
JsonResponseHandler.accumulateAndParseJsonFragments(
accumulatedJsonString,
piece3,
allSections,
3
)
if iter3_sections:
allSections = iter3_sections
print(f"Iteration 3: Accumulated, {len(allSections)} sections, complete={isComplete3}")
# Verify final result
if allSections:
# Find bullet_list section
bulletSection = None
for section in allSections:
if section.get('id') == 'section_bullet_list':
bulletSection = section
break
if bulletSection:
elements = bulletSection.get('elements', [])
if isinstance(elements, list) and len(elements) > 0:
element = elements[0]
items = element.get('items', [])
else:
items = []
print(f"✅ Final result: {len(items)} items")
assert len(items) == 20, f"Expected 20 items, got {len(items)}"
else:
print("❌ Bullet list section not found")
assert False, "Bullet list section should exist"
else:
print("❌ No sections after accumulation")
assert False, "Accumulation should produce sections"
def testPattern2_TableSliced():
"""Test Pattern 2: Slice JSON string containing table into multiple pieces - String accumulation"""
print("\n" + "="*60)
print("PATTERN 2: Table Sliced into Multiple Pieces (String Accumulation)")
print("="*60)
bigJson = createBigJsonStructure()
# Convert FULL document to JSON string
jsonStr = json.dumps(bigJson, ensure_ascii=False)
print(f"Full JSON string length: {len(jsonStr)} chars")
# Find where to slice - look for rows in the table section
rowsArrayStart = jsonStr.find('"rows": [')
row4Pos = jsonStr.find('["4", "Diana"', rowsArrayStart)
row7Pos = jsonStr.find('["7", "Grace"', rowsArrayStart)
# Slice into 3 pieces
# Piece 1: Cut after row 3 (incomplete row 4)
cut1 = row4Pos + len('["4", "Diana"')
piece1 = jsonStr[:cut1]
# Piece 2: Continue from row 4, cut after row 6 (overlaps with row 4)
cut2 = row7Pos + len('["7", "Grace"')
piece2 = jsonStr[cut1 - len('["4", "Diana"'):cut2]
# Piece 3: Continue from row 7 to end (overlaps with row 7)
piece3 = jsonStr[cut2 - len('["7", "Grace"'):]
print(f"Piece 1 length: {len(piece1)} chars")
print(f"Piece 2 length: {len(piece2)} chars")
print(f"Piece 3 length: {len(piece3)} chars")
# Step 1: Iteration 1 - Start accumulation with piece1
accumulatedJsonString = piece1
allSections = []
print(f"Iteration 1: Starting accumulation with {len(accumulatedJsonString)} chars")
# Step 2: Iteration 2 - Accumulate piece2
accumulatedJsonString, iter2_sections, isComplete2, parsedResult2 = \
JsonResponseHandler.accumulateAndParseJsonFragments(
accumulatedJsonString,
piece2,
allSections,
2
)
if iter2_sections:
allSections = iter2_sections
print(f"Iteration 2: Accumulated, {len(allSections)} sections, complete={isComplete2}")
# Step 3: Iteration 3 - Accumulate piece3
accumulatedJsonString, iter3_sections, isComplete3, parsedResult3 = \
JsonResponseHandler.accumulateAndParseJsonFragments(
accumulatedJsonString,
piece3,
allSections,
3
)
if iter3_sections:
allSections = iter3_sections
print(f"Iteration 3: Accumulated, {len(allSections)} sections, complete={isComplete3}")
# Verify final result
if allSections:
# Find table section
tableSection = None
for section in allSections:
if section.get('id') == 'section_table':
tableSection = section
break
if tableSection:
elements = tableSection.get('elements', [])
if isinstance(elements, list) and len(elements) > 0:
element = elements[0]
rows = element.get('rows', [])
else:
rows = []
print(f"✅ Final result: {len(rows)} rows")
assert len(rows) == 8, f"Expected 8 rows, got {len(rows)}"
else:
print("❌ Table section not found")
assert False, "Table section should exist"
else:
print("❌ No sections after accumulation")
assert False, "Accumulation should produce sections"
def testPattern3_CodeBlockSliced():
"""Test Pattern 3: Slice JSON string containing code block into multiple pieces - String accumulation"""
print("\n" + "="*60)
print("PATTERN 3: Code Block Sliced into Multiple Pieces (String Accumulation)")
print("="*60)
bigJson = createBigJsonStructure()
# Convert FULL document to JSON string
jsonStr = json.dumps(bigJson, ensure_ascii=False)
print(f"Full JSON string length: {len(jsonStr)} chars")
# Find where to slice - look for code in the code_block section
codeStart = jsonStr.find('"code": "')
codeCutPos = jsonStr.find("return result", codeStart) + len("return result")
piece1 = jsonStr[:codeCutPos]
# Piece 2: Continue from cut point to end (small overlap)
piece2 = jsonStr[codeCutPos - 10:]
print(f"Piece 1 length: {len(piece1)} chars")
print(f"Piece 2 length: {len(piece2)} chars")
# Step 1: Iteration 1 - Start accumulation with piece1
accumulatedJsonString = piece1
allSections = []
print(f"Iteration 1: Starting accumulation with {len(accumulatedJsonString)} chars")
# Step 2: Iteration 2 - Accumulate piece2
accumulatedJsonString, iter2_sections, isComplete2, parsedResult2 = \
JsonResponseHandler.accumulateAndParseJsonFragments(
accumulatedJsonString,
piece2,
allSections,
2
)
if iter2_sections:
allSections = iter2_sections
print(f"Iteration 2: Accumulated, {len(allSections)} sections, complete={isComplete2}")
# Verify final result
if allSections:
# Find code_block section
codeSection = None
for section in allSections:
if section.get('id') == 'section_code_block':
codeSection = section
break
if codeSection:
elements = codeSection.get('elements', [])
if isinstance(elements, list) and len(elements) > 0:
element = elements[0]
mergedCode = element.get('code', '')
else:
mergedCode = ''
print(f"✅ Final result: {len(mergedCode)} chars")
assert "calculate_sum" in mergedCode and "calculate_product" in mergedCode
else:
print("❌ Code block section not found")
assert False, "Code block section should exist"
else:
print("❌ No sections after accumulation")
assert False, "Accumulation should produce sections"
def testPattern4_LargeListSliced():
"""Test Pattern 4: Slice large list (100 items) into multiple pieces"""
print("\n" + "="*60)
print("PATTERN 4: Large List Sliced into Multiple Pieces (String Accumulation)")
print("="*60)
bigJson = createComplexJsonStructure()
jsonStr = json.dumps(bigJson, ensure_ascii=False)
print(f"Full JSON string length: {len(jsonStr)} chars")
# Find where to slice - look for products in the large list
itemsArrayStart = jsonStr.find('"items": [')
product30Pos = jsonStr.find('"product_0030"', itemsArrayStart)
product60Pos = jsonStr.find('"product_0060"', itemsArrayStart)
product90Pos = jsonStr.find('"product_0090"', itemsArrayStart)
# Slice into 4 pieces
cut1 = product30Pos + len('"product_0030"')
piece1 = jsonStr[:cut1]
cut2 = product60Pos + len('"product_0060"')
piece2 = jsonStr[cut1 - len('"product_0030"'):cut2]
cut3 = product90Pos + len('"product_0090"')
piece3 = jsonStr[cut2 - len('"product_0060"'):cut3]
piece4 = jsonStr[cut3 - len('"product_0090"'):]
print(f"Piece 1 length: {len(piece1)} chars")
print(f"Piece 2 length: {len(piece2)} chars")
print(f"Piece 3 length: {len(piece3)} chars")
print(f"Piece 4 length: {len(piece4)} chars")
# Accumulate pieces
accumulatedJsonString = piece1
allSections = []
print(f"Iteration 1: Starting accumulation with {len(accumulatedJsonString)} chars")
for iteration, piece in enumerate([piece2, piece3, piece4], start=2):
accumulatedJsonString, sections, isComplete, parsedResult = \
JsonResponseHandler.accumulateAndParseJsonFragments(
accumulatedJsonString,
piece,
allSections,
iteration
)
if sections:
allSections = sections
print(f"Iteration {iteration}: Accumulated, {len(allSections)} sections, complete={isComplete}")
# Verify final result
if allSections:
largeListSection = None
for section in allSections:
if section.get('id') == 'section_large_list':
largeListSection = section
break
if largeListSection:
elements = largeListSection.get('elements', [])
if isinstance(elements, list) and len(elements) > 0:
element = elements[0]
items = element.get('items', [])
else:
items = []
print(f"✅ Final result: {len(items)} items")
assert len(items) == 100, f"Expected 100 items, got {len(items)}"
else:
print("❌ Large list section not found")
assert False, "Large list section should exist"
else:
print("❌ No sections after accumulation")
assert False, "Accumulation should produce sections"
def testPattern5_NestedStructureSliced():
"""Test Pattern 5: Slice nested structure in the middle of nested arrays"""
print("\n" + "="*60)
print("PATTERN 5: Nested Structure Sliced (String Accumulation)")
print("="*60)
bigJson = createComplexJsonStructure()
jsonStr = json.dumps(bigJson, ensure_ascii=False)
print(f"Full JSON string length: {len(jsonStr)} chars")
# Find where to slice - slice at actual item positions in nested structure
nestedStart = jsonStr.find('"categories": [')
itemA1_10Pos = jsonStr.find('"item_a1_10"', nestedStart)
itemA2_8Pos = jsonStr.find('"item_a2_8"', nestedStart)
itemB1_12Pos = jsonStr.find('"item_b1_12"', nestedStart)
# Slice into 4 pieces
cut1 = itemA1_10Pos + len('"item_a1_10"')
piece1 = jsonStr[:cut1]
cut2 = itemA2_8Pos + len('"item_a2_8"')
piece2 = jsonStr[cut1 - len('"item_a1_10"'):cut2]
cut3 = itemB1_12Pos + len('"item_b1_12"')
piece3 = jsonStr[cut2 - len('"item_a2_8"'):cut3]
piece4 = jsonStr[cut3 - len('"item_b1_12"'):]
print(f"Piece 1 length: {len(piece1)} chars")
print(f"Piece 2 length: {len(piece2)} chars")
print(f"Piece 3 length: {len(piece3)} chars")
print(f"Piece 4 length: {len(piece4)} chars")
# Accumulate pieces
accumulatedJsonString = piece1
allSections = []
print(f"Iteration 1: Starting accumulation with {len(accumulatedJsonString)} chars")
for iteration, piece in enumerate([piece2, piece3, piece4], start=2):
accumulatedJsonString, sections, isComplete, parsedResult = \
JsonResponseHandler.accumulateAndParseJsonFragments(
accumulatedJsonString,
piece,
allSections,
iteration
)
if sections:
allSections = sections
print(f"Iteration {iteration}: Accumulated, {len(allSections)} sections, complete={isComplete}")
# Verify final result - check nested structure
if allSections:
nestedSection = None
for section in allSections:
if section.get('id') == 'section_nested_structure':
nestedSection = section
break
if nestedSection:
elements = nestedSection.get('elements', [])
if isinstance(elements, list) and len(elements) > 0:
element = elements[0]
categories = element.get('categories', [])
totalItems = 0
for category in categories:
for subcat in category.get('subcategories', []):
totalItems += len(subcat.get('items', []))
else:
totalItems = 0
print(f"✅ Final result: {totalItems} items across nested structure")
# Allow some tolerance due to slicing complexity in nested structures
# Expected: 20 (Sub A1) + 15 (Sub A2) + 25 (Sub B1) + 18 (Sub B2) = 78
assert totalItems >= 75, f"Expected at least 75 items, got {totalItems}"
if totalItems != 78:
print(f"⚠️ Note: Got {totalItems} instead of 78 (acceptable due to nested structure slicing)")
else:
print("❌ Nested structure section not found")
assert False, "Nested structure section should exist"
else:
print("❌ No sections after accumulation")
assert False, "Accumulation should produce sections"
def testPattern6_LargeTableSliced():
"""Test Pattern 6: Slice large table (50 rows) into multiple pieces"""
print("\n" + "="*60)
print("PATTERN 6: Large Table Sliced into Multiple Pieces (String Accumulation)")
print("="*60)
bigJson = createComplexJsonStructure()
jsonStr = json.dumps(bigJson, ensure_ascii=False)
print(f"Full JSON string length: {len(jsonStr)} chars")
# Find where to slice - look for rows in the large table
rowsArrayStart = jsonStr.find('"rows": [')
row15Pos = jsonStr.find('"15", "Employee_015"', rowsArrayStart)
row30Pos = jsonStr.find('"30", "Employee_030"', rowsArrayStart)
row45Pos = jsonStr.find('"45", "Employee_045"', rowsArrayStart)
# Slice into 4 pieces
cut1 = row15Pos + len('"15", "Employee_015"')
piece1 = jsonStr[:cut1]
cut2 = row30Pos + len('"30", "Employee_030"')
piece2 = jsonStr[cut1 - len('"15", "Employee_015"'):cut2]
cut3 = row45Pos + len('"45", "Employee_045"')
piece3 = jsonStr[cut2 - len('"30", "Employee_030"'):cut3]
piece4 = jsonStr[cut3 - len('"45", "Employee_045"'):]
print(f"Piece 1 length: {len(piece1)} chars")
print(f"Piece 2 length: {len(piece2)} chars")
print(f"Piece 3 length: {len(piece3)} chars")
print(f"Piece 4 length: {len(piece4)} chars")
# Accumulate pieces
accumulatedJsonString = piece1
allSections = []
print(f"Iteration 1: Starting accumulation with {len(accumulatedJsonString)} chars")
for iteration, piece in enumerate([piece2, piece3, piece4], start=2):
accumulatedJsonString, sections, isComplete, parsedResult = \
JsonResponseHandler.accumulateAndParseJsonFragments(
accumulatedJsonString,
piece,
allSections,
iteration
)
if sections:
allSections = sections
print(f"Iteration {iteration}: Accumulated, {len(allSections)} sections, complete={isComplete}")
# Verify final result
if allSections:
tableSection = None
for section in allSections:
if section.get('id') == 'section_large_table':
tableSection = section
break
if tableSection:
elements = tableSection.get('elements', [])
if isinstance(elements, list) and len(elements) > 0:
element = elements[0]
rows = element.get('rows', [])
else:
rows = []
print(f"✅ Final result: {len(rows)} rows")
assert len(rows) == 50, f"Expected 50 rows, got {len(rows)}"
else:
print("❌ Large table section not found")
assert False, "Large table section should exist"
else:
print("❌ No sections after accumulation")
assert False, "Accumulation should produce sections"
def testPattern7_MixedContentSliced():
"""Test Pattern 7: Slice mixed content section with various data types"""
print("\n" + "="*60)
print("PATTERN 7: Mixed Content Sliced (String Accumulation)")
print("="*60)
bigJson = createComplexJsonStructure()
jsonStr = json.dumps(bigJson, ensure_ascii=False)
print(f"Full JSON string length: {len(jsonStr)} chars")
# Find where to slice - in the middle of mixed content
mixedStart = jsonStr.find('"section_mixed_content"')
highlightsStart = jsonStr.find('"highlights": [', mixedStart)
highlight15Pos = jsonStr.find('"Highlight 15"', highlightsStart)
highlight25Pos = jsonStr.find('"Highlight 25"', highlightsStart)
# Slice into 3 pieces
cut1 = highlight15Pos + len('"Highlight 15"')
piece1 = jsonStr[:cut1]
cut2 = highlight25Pos + len('"Highlight 25"')
piece2 = jsonStr[cut1 - len('"Highlight 15"'):cut2]
piece3 = jsonStr[cut2 - len('"Highlight 25"'):]
print(f"Piece 1 length: {len(piece1)} chars")
print(f"Piece 2 length: {len(piece2)} chars")
print(f"Piece 3 length: {len(piece3)} chars")
# Accumulate pieces
accumulatedJsonString = piece1
allSections = []
print(f"Iteration 1: Starting accumulation with {len(accumulatedJsonString)} chars")
for iteration, piece in enumerate([piece2, piece3], start=2):
accumulatedJsonString, sections, isComplete, parsedResult = \
JsonResponseHandler.accumulateAndParseJsonFragments(
accumulatedJsonString,
piece,
allSections,
iteration
)
if sections:
allSections = sections
print(f"Iteration {iteration}: Accumulated, {len(allSections)} sections, complete={isComplete}")
# Verify final result
if allSections:
mixedSection = None
for section in allSections:
if section.get('id') == 'section_mixed_content':
mixedSection = section
break
if mixedSection:
elements = mixedSection.get('elements', [])
if isinstance(elements, list) and len(elements) > 0:
element = elements[0]
highlights = element.get('highlights', [])
tags = element.get('metadata', {}).get('tags', [])
else:
highlights = []
tags = []
print(f"✅ Final result: {len(highlights)} highlights, {len(tags)} tags")
assert len(highlights) == 30, f"Expected 30 highlights, got {len(highlights)}"
assert len(tags) == 20, f"Expected 20 tags, got {len(tags)}"
else:
print("❌ Mixed content section not found")
assert False, "Mixed content section should exist"
else:
print("❌ No sections after accumulation")
assert False, "Accumulation should produce sections"
def testPattern9_RealWorldPrimeNumbersTable():
"""Test Pattern 9: Real-world example - Prime numbers table from debug files"""
print("\n" + "="*60)
print("PATTERN 9: Real-World Prime Numbers Table (String Accumulation)")
print("="*60)
# Create a simplified but realistic test: JSON with rows 1-10, slice at row 8
# This simulates the real-world scenario where JSON is cut mid-row
complete_json = {
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": [{
"id": "doc_1",
"title": "Prime Numbers Table",
"filename": "prime_numbers_table.json",
"sections": [{
"id": "section_prime_numbers_table",
"content_type": "table",
"elements": [{
"headers": ["Index", "Prime 1", "Prime 2", "Prime 3", "Prime 4", "Prime 5", "Prime 6", "Prime 7", "Prime 8", "Prime 9", "Prime 10"],
"rows": [
["1", "2", "3", "5", "7", "11", "13", "17", "19", "23", "29"],
["2", "31", "37", "41", "43", "47", "53", "59", "61", "67", "71"],
["3", "73", "79", "83", "89", "97", "101", "103", "107", "109", "113"],
["4", "127", "131", "137", "139", "149", "151", "157", "163", "167", "173"],
["5", "179", "181", "191", "193", "197", "199", "211", "223", "227", "229"],
["6", "233", "239", "241", "251", "257", "263", "269", "271", "277", "281"],
["7", "283", "293", "307", "311", "313", "317", "331", "337", "347", "349"],
["8", "353", "359", "367", "373", "379", "383", "389", "397", "401", "409"],
["9", "419", "421", "431", "433", "439", "443", "449", "457", "461", "463"],
["10", "467", "479", "487", "491", "499", "503", "509", "521", "523", "541"]
]
}]
}]
}]
}
# Convert to JSON string and slice it realistically
jsonStr = json.dumps(complete_json, ensure_ascii=False)
# Find where to slice - at row 8, cut after "401" (incomplete row 8)
# This simulates the real scenario where JSON is cut mid-row
row8Start = jsonStr.find('["8", "353"')
cutPos = jsonStr.find('"401"', row8Start) + len('"401"')
piece1 = jsonStr[:cutPos]
# Piece 2: Continue from "401" to end (overlaps with "401")
piece2 = jsonStr[cutPos - len('"401"'):]
print(f"Piece 1 length: {len(piece1)} chars")
print(f"Piece 2 length: {len(piece2)} chars")
# Accumulate pieces
accumulatedJsonString = piece1
allSections = []
print(f"Iteration 1: Starting accumulation with {len(accumulatedJsonString)} chars")
accumulatedJsonString, sections, isComplete, parsedResult = \
JsonResponseHandler.accumulateAndParseJsonFragments(
accumulatedJsonString,
piece2,
allSections,
2
)
if sections:
allSections = sections
print(f"Iteration 2: Accumulated, {len(allSections)} sections, complete={isComplete}")
# Verify final result
if allSections:
tableSection = None
for section in allSections:
if section.get('id') == 'section_prime_numbers_table':
tableSection = section
break
if tableSection:
elements = tableSection.get('elements', [])
if isinstance(elements, list) and len(elements) > 0:
element = elements[0]
rows = element.get('rows', [])
else:
rows = []
print(f"✅ Final result: {len(rows)} rows")
# Should have all 10 rows from the complete JSON
assert len(rows) == 10, f"Expected 10 rows, got {len(rows)}"
# Verify last row is row 10
if rows:
lastRow = rows[-1]
assert lastRow[0] == "10", f"Expected last row index to be 10, got {lastRow[0]}"
# Verify row 8 is complete (should have "409" as last value)
row8 = rows[7] # Index 7 = row 8
assert row8[0] == "8", f"Expected row 8, got row {row8[0]}"
assert row8[-1] == "409", f"Expected row 8 to end with 409, got {row8[-1]}"
else:
print("❌ Prime numbers table section not found")
assert False, "Prime numbers table section should exist"
else:
print("❌ No sections after accumulation")
assert False, "Accumulation should produce sections"
def testPattern8_CrossSectionSlice():
"""Test Pattern 8: Slice across multiple sections (boundary crossing)"""
print("\n" + "="*60)
print("PATTERN 8: Cross-Section Slice (String Accumulation)")
print("="*60)
bigJson = createComplexJsonStructure()
jsonStr = json.dumps(bigJson, ensure_ascii=False)
print(f"Full JSON string length: {len(jsonStr)} chars")
# Slice across section boundaries
# Piece 1: End of large_list section
largeListEnd = jsonStr.find('"section_nested_structure"')
cut1 = largeListEnd - 50 # Cut before nested structure starts
piece1 = jsonStr[:cut1]
# Piece 2: Middle of nested structure, start of large table
nestedEnd = jsonStr.find('"section_large_table"')
cut2 = nestedEnd - 30
piece2 = jsonStr[cut1 - 20:cut2] # Small overlap
# Piece 3: Rest of document
piece3 = jsonStr[cut2 - 20:]
print(f"Piece 1 length: {len(piece1)} chars")
print(f"Piece 2 length: {len(piece2)} chars")
print(f"Piece 3 length: {len(piece3)} chars")
# Accumulate pieces
accumulatedJsonString = piece1
allSections = []
print(f"Iteration 1: Starting accumulation with {len(accumulatedJsonString)} chars")
for iteration, piece in enumerate([piece2, piece3], start=2):
accumulatedJsonString, sections, isComplete, parsedResult = \
JsonResponseHandler.accumulateAndParseJsonFragments(
accumulatedJsonString,
piece,
allSections,
iteration
)
if sections:
allSections = sections
print(f"Iteration {iteration}: Accumulated, {len(allSections)} sections, complete={isComplete}")
# Verify final result - should have all sections
print(f"✅ Final result: {len(allSections)} sections")
assert len(allSections) >= 4, f"Expected at least 4 sections, got {len(allSections)}"
if __name__ == "__main__":
print("\n" + "="*60)
print("JSON STRING ACCUMULATION TEST SUITE")
print("="*60)
print("Testing by slicing JSON string into pieces and accumulating")
print("="*60)
try:
# Basic tests
testPattern1_ArraySliced()
testPattern2_TableSliced()
testPattern3_CodeBlockSliced()
# Complex tests with larger structures
testPattern4_LargeListSliced()
testPattern5_NestedStructureSliced()
testPattern6_LargeTableSliced()
testPattern7_MixedContentSliced()
testPattern8_CrossSectionSlice()
# Real-world test with actual JSON from debug files
testPattern9_RealWorldPrimeNumbersTable()
print("\n" + "="*60)
print("✅ ALL TESTS COMPLETED")
print("="*60)
except AssertionError as e:
print(f"\n❌ TEST FAILED: {e}")
sys.exit(1)
except Exception as e:
print(f"\n❌ ERROR: {e}")
import traceback
traceback.print_exc()
sys.exit(1)