From d7470549769b5ca315029934da4f61723f0d313b Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Mon, 5 Jan 2026 21:16:10 +0100
Subject: [PATCH] full refactory of core json cut handling system
---
modules/datamodels/datamodelAi.py | 19 +-
modules/services/serviceAi/merge_1.txt | 947 +-------
modules/services/serviceAi/merge_2.txt | 121 +
.../services/serviceAi/subAiCallLooping.py | 14 +
.../services/serviceAi/subStructureFilling.py | 80 +-
.../serviceAi/subStructureGeneration.py | 176 +-
.../services/serviceAi/test_json_merger.py | 594 -----
.../serviceGeneration/paths/codePath.py | 60 +-
.../subPromptBuilderGeneration.py | 22 +-
modules/shared/jsonContinuation.md | 164 ++
modules/shared/jsonContinuation.py | 1232 ++++++++++
modules/shared/jsonUtils.py | 2135 +----------------
tests/functional/test12_json_split_merge.py | 694 ++++++
tests/test_overlap_context.py | 216 --
14 files changed, 2387 insertions(+), 4087 deletions(-)
create mode 100644 modules/services/serviceAi/merge_2.txt
delete mode 100644 modules/services/serviceAi/test_json_merger.py
create mode 100644 modules/shared/jsonContinuation.md
create mode 100644 modules/shared/jsonContinuation.py
create mode 100644 tests/functional/test12_json_split_merge.py
delete mode 100644 tests/test_overlap_context.py
diff --git a/modules/datamodels/datamodelAi.py b/modules/datamodels/datamodelAi.py
index b4ce76b7..5ca26951 100644
--- a/modules/datamodels/datamodelAi.py
+++ b/modules/datamodels/datamodelAi.py
@@ -261,13 +261,26 @@ class ContinuationContext(BaseModel):
"""Pydantic model for continuation context information."""
section_count: int
delivered_summary: str
- cut_off_element: Optional[str] = None
- element_before_cutoff: Optional[str] = None
template_structure: Optional[str] = None
last_complete_part: Optional[str] = None
incomplete_part: Optional[str] = None
- structure_context: Optional[str] = None
last_raw_json: Optional[str] = None
+ overlap_context: Optional[str] = None # From jsonContinuation.getContexts() - innermost element containing cut
+ hierarchy_context: Optional[str] = None # From jsonContinuation.getContexts() - full structure from root to cut
+
+
+class JsonContinuationContexts(BaseModel):
+ """
+ Pydantic model for JSON continuation contexts.
+
+ Contains three contexts for truncated JSON strings:
+ - overlapContext: The innermost object/array element containing the cut point (for merging)
+ - hierarchyContext: Full structure from root to cut with budget-limited values
+ - completePart: Valid JSON with all structures properly closed
+ """
+ overlapContext: str = Field(description="The innermost object/array element containing the cut point (for merging)")
+ hierarchyContext: str = Field(description="Full structure from root to cut with budget-limited values")
+ completePart: str = Field(description="Valid JSON with all structures properly closed")
class SectionPromptArgs(BaseModel):
diff --git a/modules/services/serviceAi/merge_1.txt b/modules/services/serviceAi/merge_1.txt
index 0a9a9895..1b08b35b 100644
--- a/modules/services/serviceAi/merge_1.txt
+++ b/modules/services/serviceAi/merge_1.txt
@@ -1,64 +1,57 @@
================================================================================
JSON MERGE OPERATION #1
================================================================================
-Timestamp: 2026-01-04T23:08:13.252204
+Timestamp: 2026-01-05T08:30:55.469646
INPUT:
- Accumulated length: 31737 chars
- New Fragment length: 10178 chars
- Accumulated: 409 lines (showing first 5 and last 5)
+ Accumulated length: 419 chars
+ New Fragment length: 120 chars
+ Accumulated: 20 lines (showing first 5 and last 5)
{
- "elements": [
- {
- "type": "table",
- "content": {
- ... (399 lines omitted) ...
- [37517, 37529, 37537, 37547, 37549, 37561, 37567, 37571, 37573, 37579],
- [37589, 37591, 37607, 37619, 37633, 37643, 37649, 37657, 37663, 37691],
- [37693, 37699, 37717, 37747, 37781, 37783, 37799, 37811, 37813, 37831],
- [37847, 37853, 37861, 37871, 37879, 37889, 37897, 37907, 37951, 37957],
- [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039
- New Fragment: 135 lines (showing first 5 and last 5)
- ```json
- {
- "elements": [
- {
- "type": "table",
- ... (125 lines omitted) ...
- }
- }
- ]
- }
- ```
+ "metadata": {
+ "title": "Test Document",
+ "author": "Test Author",
+ "date": "2025-01-05"
+ ... (10 lines omitted) ...
+ {
+ "type": "heading",
+ "content": {
+ "text": "Introduction",
+
+ New Fragment: 8 lines (showing first 5 and last 5)
+ "level": 1
+ }
+ }
+ ]
+ },
+ {
+ "id": "sec2",
+ "conten
- Normalized Accumulated (31737 chars)
- (showing first 5 and last 5 of 409 lines)
+ Normalized Accumulated (407 chars)
+ (showing first 5 and last 5 of 19 lines)
{
- "elements": [
- {
- "type": "table",
- "content": {
- ... (399 lines omitted) ...
- [37517, 37529, 37537, 37547, 37549, 37561, 37567, 37571, 37573, 37579],
- [37589, 37591, 37607, 37619, 37633, 37643, 37649, 37657, 37663, 37691],
- [37693, 37699, 37717, 37747, 37781, 37783, 37799, 37811, 37813, 37831],
- [37847, 37853, 37861, 37871, 37879, 37889, 37897, 37907, 37951, 37957],
- [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039
+ "metadata": {
+ "title": "Test Document",
+ "author": "Test Author",
+ "date": "2025-01-05"
+ ... (9 lines omitted) ...
+ "elements": [
+ {
+ "type": "heading",
+ "content": {
+ "text": "Introduction",
- Normalized New Fragment (10166 chars)
- (showing first 5 and last 5 of 133 lines)
- {
- "elements": [
- {
- "type": "table",
- "content": {
- ... (123 lines omitted) ...
- ]
- }
- }
- ]
- }
+ Normalized New Fragment (115 chars)
+ "level": 1
+ }
+ }
+ ]
+ },
+ {
+ "id": "sec2",
+ "conten
STEP: PHASE 1
Description: Finding overlap between JSON strings
⏳ In progress...
@@ -70,832 +63,52 @@ STEP: PHASE 1
⚠️ NO OVERLAP FOUND - This indicates iterations should stop
Closing JSON and returning final result
- Closed JSON (31743 chars):
+ Closed JSON (414 chars):
==============================================================================
{
- "elements": [
+ "metadata": {
+ "title": "Test Document",
+ "author": "Test Author",
+ "date": "2025-01-05"
+ },
+ "documents": [
{
- "type": "table",
- "content": {
- "headers": ["Spalte1", "Spalte2", "Spalte3", "Spalte4", "Spalte5", "Spalte6", "Spalte7", "Spalte8", "Spalte9", "Spalte10"],
- "rows": [
- [2, 3, 5, 7, 11, 13, 17, 19, 23, 29],
- [31, 37, 41, 43, 47, 53, 59, 61, 67, 71],
- [73, 79, 83, 89, 97, 101, 103, 107, 109, 113],
- [127, 131, 137, 139, 149, 151, 157, 163, 167, 173],
- [179, 181, 191, 193, 197, 199, 211, 223, 227, 229],
- [233, 239, 241, 251, 257, 263, 269, 271, 277, 281],
- [283, 293, 307, 311, 313, 317, 331, 337, 347, 349],
- [353, 359, 367, 373, 379, 383, 389, 397, 401, 409],
- [419, 421, 431, 433, 439, 443, 449, 457, 461, 463],
- [467, 479, 487, 491, 499, 503, 509, 521, 523, 541],
- [547, 557, 563, 569, 571, 577, 587, 593, 599, 601],
- [607, 613, 617, 619, 631, 641, 643, 647, 653, 659],
- [661, 673, 677, 683, 691, 701, 709, 719, 727, 733],
- [739, 743, 751, 757, 761, 769, 773, 787, 797, 809],
- [811, 821, 823, 827, 829, 839, 853, 857, 859, 863],
- [877, 881, 883, 887, 907, 911, 919, 929, 937, 941],
- [947, 953, 967, 971, 977, 983, 991, 997, 1009, 1013],
- [1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069],
- [1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151],
- [1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223],
- [1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 1289, 1291],
- [1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373],
- [1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451],
- [1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511],
- [1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583],
- [1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 1637, 1657],
- [1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733],
- [1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811],
- [1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 1879, 1889],
- [1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 1979, 1987],
- [1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053],
- [2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129],
- [2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213],
- [2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 2281, 2287],
- [2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357],
- [2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423],
- [2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 2521, 2531],
- [2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617],
- [2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687],
- [2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741],
- [2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819],
- [2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 2897, 2903],
- [2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999],
- [3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079],
- [3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 3169, 3181],
- [3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257],
- [3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331],
- [3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413],
- [3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 3499, 3511],
- [3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 3559, 3571],
- [3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643],
- [3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727],
- [3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 3803, 3821],
- [3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907],
- [3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989],
- [4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057],
- [4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139],
- [4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 4229, 4231],
- [4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297],
- [4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409],
- [4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 4483, 4493],
- [4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 4567, 4583],
- [4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657],
- [4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751],
- [4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 4817, 4831],
- [4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 4933, 4937],
- [4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 4999, 5003],
- [5009, 5011, 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087],
- [5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 5171, 5179],
- [5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 5273, 5279],
- [5281, 5297, 5303, 5309, 5323, 5333, 5347, 5351, 5381, 5387],
- [5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443],
- [5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 5519, 5521],
- [5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, 5623, 5639],
- [5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 5689, 5693],
- [5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791],
- [5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 5851, 5857],
- [5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 5927, 5939],
- [5953, 5981, 5987, 6007, 6011, 6029, 6037, 6043, 6047, 6053],
- [6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133],
- [6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 6217, 6221],
- [6229, 6247, 6257, 6263, 6269, 6271, 6277, 6287, 6299, 6301],
- [6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 6361, 6367],
- [6373, 6379, 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473],
- [6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 6569, 6571],
- [6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 6661, 6673],
- [6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 6737, 6761],
- [6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833],
- [6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 6911, 6917],
- [6947, 6949, 6959, 6961, 6967, 6971, 6977, 6983, 6991, 6997],
- [7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 7079, 7103],
- [7109, 7121, 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207],
- [7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 7283, 7297],
- [7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 7393, 7411],
- [7417, 7433, 7451, 7457, 7459, 7477, 7481, 7487, 7489, 7499],
- [7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561],
- [7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 7639, 7643],
- [7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 7717, 7723],
- [7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 7823, 7829],
- [7841, 7853, 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919],
- [7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, 8011, 8017],
- [8039, 8053, 8059, 8069, 8081, 8087, 8089, 8093, 8101, 8111],
- [8117, 8123, 8147, 8161, 8167, 8171, 8179, 8191, 8209, 8219],
- [8221, 8231, 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291],
- [8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, 8377, 8387],
- [8389, 8419, 8423, 8429, 8431, 8443, 8447, 8461, 8467, 8501],
- [8513, 8521, 8527, 8537, 8539, 8543, 8563, 8573, 8581, 8597],
- [8599, 8609, 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677],
- [8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, 8737, 8741],
- [8747, 8753, 8761, 8779, 8783, 8803, 8807, 8819, 8821, 8831],
- [8837, 8839, 8849, 8861, 8863, 8867, 8887, 8893, 8923, 8929],
- [8933, 8941, 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011],
- [9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, 9103, 9109],
- [9127, 9133, 9137, 9151, 9157, 9161, 9173, 9181, 9187, 9199],
- [9203, 9209, 9221, 9227, 9239, 9241, 9257, 9277, 9281, 9283],
- [9293, 9311, 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377],
- [9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, 9437, 9439],
- [9461, 9463, 9467, 9473, 9479, 9491, 9497, 9511, 9521, 9533],
- [9539, 9547, 9551, 9587, 9601, 9613, 9619, 9623, 9629, 9631],
- [9643, 9649, 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733],
- [9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, 9803, 9811],
- [9817, 9829, 9833, 9839, 9851, 9857, 9859, 9871, 9883, 9887],
- [9901, 9907, 9923, 9929, 9931, 9941, 9949, 9967, 9973, 10007],
- [10009, 10037, 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099],
- [10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163, 10169, 10177],
- [10181, 10193, 10211, 10223, 10243, 10247, 10253, 10259, 10267, 10271],
- [10273, 10289, 10301, 10303, 10313, 10321, 10331, 10333, 10337, 10343],
- [10357, 10369, 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459],
- [10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, 10559, 10567],
- [10589, 10597, 10601, 10607, 10613, 10627, 10631, 10639, 10651, 10657],
- [10663, 10667, 10687, 10691, 10709, 10711, 10723, 10729, 10733, 10739],
- [10753, 10771, 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859],
- [10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, 10939, 10949],
- [10957, 10973, 10979, 10987, 10993, 11003, 11027, 11047, 11057, 11059],
- [11069, 11071, 11083, 11087, 11093, 11113, 11117, 11119, 11131, 11149],
- [11159, 11161, 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251],
- [11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, 11321, 11329],
- [11351, 11353, 11369, 11383, 11393, 11399, 11411, 11423, 11437, 11443],
- [11447, 11467, 11471, 11483, 11489, 11491, 11497, 11503, 11519, 11527],
- [11549, 11551, 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657],
- [11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, 11743, 11777],
- [11779, 11783, 11789, 11801, 11807, 11813, 11821, 11827, 11831, 11833],
- [11839, 11863, 11867, 11887, 11897, 11903, 11909, 11923, 11927, 11933],
- [11939, 11941, 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011],
- [12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, 12107, 12109],
- [12113, 12119, 12143, 12149, 12157, 12161, 12163, 12197, 12203, 12211],
- [12227, 12239, 12241, 12251, 12253, 12263, 12269, 12277, 12281, 12289],
- [12301, 12323, 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401],
- [12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473, 12479, 12487],
- [12491, 12497, 12503, 12511, 12517, 12527, 12539, 12541, 12547, 12553],
- [12569, 12577, 12583, 12589, 12601, 12611, 12613, 12619, 12637, 12641],
- [12647, 12653, 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739],
- [12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, 12823, 12829],
- [12841, 12853, 12889, 12893, 12899, 12907, 12911, 12917, 12919, 12923],
- [12941, 12953, 12959, 12967, 12973, 12979, 12983, 13001, 13003, 13007],
- [13009, 13033, 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109],
- [13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177, 13183, 13187],
- [13217, 13219, 13229, 13241, 13249, 13259, 13267, 13291, 13297, 13309],
- [13313, 13327, 13331, 13337, 13339, 13367, 13381, 13397, 13399, 13411],
- [13417, 13421, 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499],
- [13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, 13613, 13619],
- [13627, 13633, 13649, 13669, 13679, 13681, 13687, 13691, 13693, 13697],
- [13709, 13711, 13721, 13723, 13729, 13751, 13757, 13759, 13763, 13781],
- [13789, 13799, 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879],
- [13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, 13963, 13967],
- [13997, 13999, 14009, 14011, 14029, 14033, 14051, 14057, 14071, 14081],
- [14083, 14087, 14107, 14143, 14149, 14153, 14159, 14173, 14177, 14197],
- [14207, 14221, 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323],
- [14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, 14411, 14419],
- [14423, 14431, 14437, 14447, 14449, 14461, 14479, 14489, 14503, 14519],
- [14533, 14537, 14543, 14549, 14551, 14557, 14561, 14563, 14591, 14593],
- [14621, 14627, 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699],
- [14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, 14759, 14767],
- [14771, 14779, 14783, 14797, 14813, 14821, 14827, 14831, 14843, 14851],
- [14867, 14869, 14879, 14887, 14891, 14897, 14923, 14929, 14939, 14947],
- [14951, 14957, 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073],
- [15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, 15139, 15149],
- [15161, 15173, 15187, 15193, 15199, 15217, 15227, 15233, 15241, 15259],
- [15263, 15269, 15271, 15277, 15287, 15289, 15299, 15307, 15313, 15319],
- [15329, 15331, 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401],
- [15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473, 15493, 15497],
- [15511, 15527, 15541, 15551, 15559, 15569, 15581, 15583, 15601, 15607],
- [15619, 15629, 15641, 15643, 15647, 15649, 15661, 15667, 15671, 15679],
- [15683, 15727, 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773],
- [15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, 15877, 15881],
- [15887, 15889, 15901, 15907, 15913, 15919, 15923, 15937, 15959, 15971],
- [15973, 15991, 16001, 16007, 16033, 16057, 16061, 16063, 16067, 16069],
- [16073, 16087, 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183],
- [16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249, 16253, 16267],
- [16273, 16301, 16319, 16333, 16339, 16349, 16361, 16363, 16369, 16381],
- [16411, 16417, 16421, 16427, 16433, 16447, 16451, 16453, 16477, 16481],
- [16487, 16493, 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603],
- [16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, 16673, 16691],
- [16693, 16699, 16703, 16729, 16741, 16747, 16759, 16763, 16787, 16811],
- [16823, 16829, 16831, 16843, 16871, 16879, 16883, 16889, 16901, 16903],
- [16921, 16927, 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993],
- [17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, 17077, 17093],
- [17099, 17107, 17117, 17123, 17137, 17159, 17167, 17183, 17189, 17191],
- [17203, 17207, 17209, 17231, 17239, 17257, 17291, 17293, 17299, 17317],
- [17321, 17327, 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389],
- [17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, 17471, 17477],
- [17483, 17489, 17491, 17497, 17509, 17519, 17539, 17551, 17569, 17573],
- [17579, 17581, 17597, 17599, 17609, 17623, 17627, 17657, 17659, 17669],
- [17681, 17683, 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783],
- [17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863, 17881, 17891],
- [17903, 17909, 17911, 17921, 17923, 17929, 17939, 17957, 17959, 17971],
- [17977, 17981, 17987, 17989, 18013, 18041, 18043, 18047, 18049, 18059],
- [18061, 18077, 18089, 18097, 18119, 18121, 18127, 18131, 18133, 18143],
- [18149, 18169, 18181, 18191, 18199, 18211, 18217, 18223, 18229, 18233],
- [18251, 18253, 18257, 18269, 18287, 18289, 18301, 18307, 18311, 18313],
- [18329, 18341, 18353, 18367, 18371, 18379, 18397, 18401, 18413, 18427],
- [18433, 18439, 18443, 18451, 18457, 18461, 18481, 18493, 18503, 18517],
- [18521, 18523, 18539, 18541, 18553, 18583, 18587, 18593, 18617, 18637],
- [18661, 18671, 18679, 18691, 18701, 18713, 18719, 18731, 18743, 18749],
- [18757, 18773, 18787, 18793, 18797, 18803, 18839, 18859, 18869, 18899],
- [18911, 18913, 18917, 18919, 18947, 18959, 18973, 18979, 19001, 19009],
- [19013, 19031, 19037, 19051, 19069, 19073, 19079, 19081, 19087, 19121],
- [19139, 19141, 19157, 19163, 19181, 19183, 19207, 19211, 19213, 19219],
- [19231, 19237, 19249, 19259, 19267, 19273, 19289, 19301, 19309, 19319],
- [19333, 19373, 19379, 19381, 19387, 19391, 19403, 19417, 19421, 19423],
- [19427, 19429, 19433, 19441, 19447, 19457, 19463, 19469, 19471, 19477],
- [19483, 19501, 19507, 19531, 19541, 19543, 19553, 19559, 19571, 19577],
- [19583, 19597, 19603, 19609, 19661, 19681, 19687, 19697, 19699, 19709],
- [19717, 19727, 19739, 19751, 19753, 19759, 19763, 19777, 19793, 19801],
- [19813, 19819, 19841, 19843, 19853, 19861, 19867, 19889, 19891, 19913],
- [19919, 19927, 19937, 19949, 19961, 19963, 19973, 19979, 19991, 19993],
- [19997, 20011, 20021, 20023, 20029, 20047, 20051, 20063, 20071, 20089],
- [20101, 20107, 20113, 20117, 20123, 20129, 20143, 20147, 20149, 20161],
- [20173, 20177, 20183, 20201, 20219, 20231, 20233, 20249, 20261, 20269],
- [20287, 20297, 20323, 20327, 20333, 20341, 20347, 20353, 20357, 20359],
- [20369, 20389, 20393, 20399, 20407, 20411, 20431, 20441, 20443, 20477],
- [20479, 20483, 20507, 20509, 20521, 20533, 20543, 20549, 20551, 20563],
- [20593, 20599, 20611, 20627, 20639, 20641, 20663, 20681, 20693, 20707],
- [20717, 20719, 20731, 20743, 20747, 20749, 20753, 20759, 20771, 20773],
- [20789, 20807, 20809, 20849, 20857, 20873, 20879, 20887, 20897, 20899],
- [20903, 20921, 20929, 20939, 20947, 20959, 20963, 20981, 20983, 21001],
- [21011, 21013, 21017, 21019, 21023, 21031, 21059, 21061, 21067, 21089],
- [21101, 21107, 21121, 21139, 21143, 21149, 21157, 21163, 21169, 21179],
- [21187, 21191, 21193, 21211, 21221, 21227, 21247, 21269, 21277, 21283],
- [21313, 21317, 21319, 21323, 21341, 21347, 21377, 21379, 21383, 21391],
- [21397, 21401, 21407, 21419, 21433, 21467, 21481, 21487, 21491, 21493],
- [21499, 21503, 21517, 21521, 21523, 21529, 21557, 21559, 21563, 21569],
- [21577, 21587, 21589, 21599, 21601, 21611, 21613, 21617, 21647, 21649],
- [21661, 21673, 21683, 21701, 21713, 21727, 21737, 21739, 21751, 21757],
- [21767, 21773, 21787, 21799, 21803, 21817, 21821, 21839, 21841, 21851],
- [21859, 21863, 21871, 21881, 21893, 21911, 21929, 21937, 21943, 21961],
- [21977, 21991, 21997, 22003, 22013, 22027, 22031, 22037, 22039, 22051],
- [22063, 22067, 22073, 22079, 22091, 22093, 22109, 22111, 22123, 22129],
- [22133, 22147, 22153, 22157, 22159, 22171, 22189, 22193, 22229, 22247],
- [22259, 22271, 22273, 22277, 22279, 22283, 22291, 22303, 22307, 22343],
- [22349, 22367, 22369, 22381, 22391, 22397, 22409, 22433, 22441, 22447],
- [22453, 22469, 22481, 22483, 22501, 22511, 22531, 22541, 22543, 22549],
- [22567, 22571, 22573, 22613, 22619, 22621, 22637, 22639, 22643, 22651],
- [22669, 22679, 22691, 22697, 22699, 22709, 22717, 22721, 22727, 22739],
- [22741, 22751, 22769, 22777, 22783, 22787, 22807, 22811, 22817, 22853],
- [22859, 22861, 22871, 22877, 22901, 22907, 22921, 22937, 22943, 22961],
- [22963, 22973, 22993, 23003, 23011, 23017, 23021, 23027, 23029, 23039],
- [23041, 23053, 23057, 23059, 23063, 23071, 23081, 23087, 23099, 23117],
- [23131, 23143, 23159, 23167, 23173, 23189, 23197, 23201, 23203, 23209],
- [23227, 23251, 23269, 23279, 23291, 23293, 23297, 23311, 23321, 23327],
- [23333, 23339, 23357, 23369, 23371, 23399, 23417, 23431, 23447, 23459],
- [23473, 23497, 23509, 23531, 23537, 23539, 23549, 23557, 23561, 23563],
- [23567, 23581, 23593, 23599, 23603, 23609, 23623, 23627, 23629, 23633],
- [23663, 23669, 23671, 23677, 23687, 23689, 23719, 23741, 23743, 23747],
- [23753, 23761, 23767, 23773, 23789, 23801, 23813, 23819, 23827, 23831],
- [23833, 23857, 23869, 23873, 23879, 23887, 23893, 23899, 23909, 23911],
- [23917, 23929, 23957, 23971, 23977, 23981, 23993, 24001, 24007, 24019],
- [24023, 24029, 24043, 24049, 24061, 24071, 24077, 24083, 24091, 24097],
- [24103, 24107, 24109, 24113, 24121, 24133, 24137, 24151, 24169, 24179],
- [24181, 24197, 24203, 24223, 24229, 24239, 24247, 24251, 24281, 24317],
- [24329, 24337, 24359, 24371, 24373, 24379, 24391, 24407, 24413, 24419],
- [24421, 24439, 24443, 24469, 24473, 24481, 24499, 24509, 24517, 24527],
- [24533, 24547, 24551, 24571, 24593, 24611, 24623, 24631, 24659, 24671],
- [24677, 24683, 24691, 24697, 24709, 24733, 24749, 24763, 24767, 24781],
- [24793, 24799, 24809, 24821, 24841, 24847, 24851, 24859, 24877, 24889],
- [24907, 24917, 24919, 24923, 24943, 24953, 24967, 24971, 24977, 24979],
- [24989, 25013, 25031, 25033, 25037, 25057, 25073, 25087, 25097, 25111],
- [25117, 25121, 25127, 25147, 25153, 25163, 25169, 25171, 25183, 25189],
- [25219, 25229, 25237, 25243, 25247, 25253, 25261, 25301, 25303, 25307],
- [25309, 25321, 25339, 25343, 25349, 25357, 25367, 25373, 25391, 25409],
- [25411, 25423, 25439, 25447, 25453, 25457, 25463, 25469, 25471, 25523],
- [25537, 25541, 25561, 25577, 25579, 25583, 25589, 25601, 25603, 25609],
- [25621, 25633, 25639, 25643, 25657, 25667, 25673, 25679, 25693, 25703],
- [25717, 25733, 25741, 25747, 25759, 25763, 25771, 25793, 25799, 25801],
- [25819, 25841, 25847, 25849, 25867, 25873, 25889, 25903, 25913, 25919],
- [25931, 25933, 25939, 25943, 25951, 25969, 25981, 25997, 25999, 26003],
- [26017, 26021, 26029, 26041, 26053, 26083, 26099, 26107, 26111, 26113],
- [26119, 26141, 26153, 26161, 26171, 26177, 26183, 26189, 26203, 26209],
- [26227, 26237, 26249, 26251, 26261, 26263, 26267, 26293, 26297, 26309],
- [26317, 26321, 26339, 26347, 26357, 26371, 26387, 26393, 26399, 26407],
- [26417, 26423, 26431, 26437, 26449, 26459, 26479, 26489, 26497, 26501],
- [26513, 26539, 26557, 26561, 26573, 26591, 26597, 26627, 26633, 26641],
- [26647, 26669, 26681, 26683, 26687, 26693, 26699, 26701, 26711, 26713],
- [26717, 26723, 26729, 26731, 26737, 26759, 26777, 26783, 26801, 26813],
- [26821, 26833, 26839, 26849, 26861, 26863, 26879, 26881, 26891, 26893],
- [26903, 26921, 26927, 26947, 26951, 26953, 26959, 26981, 26987, 26993],
- [27011, 27017, 27031, 27043, 27059, 27061, 27067, 27073, 27077, 27091],
- [27103, 27107, 27109, 27127, 27143, 27179, 27191, 27197, 27211, 27239],
- [27241, 27253, 27259, 27271, 27277, 27281, 27283, 27299, 27329, 27337],
- [27361, 27367, 27397, 27407, 27409, 27427, 27431, 27437, 27449, 27457],
- [27479, 27481, 27487, 27509, 27527, 27529, 27539, 27541, 27551, 27581],
- [27583, 27611, 27617, 27631, 27647, 27653, 27673, 27689, 27691, 27697],
- [27701, 27733, 27737, 27739, 27743, 27749, 27751, 27763, 27767, 27773],
- [27779, 27791, 27793, 27799, 27803, 27809, 27817, 27823, 27827, 27847],
- [27851, 27883, 27893, 27901, 27917, 27919, 27941, 27943, 27947, 27953],
- [27961, 27967, 27983, 27997, 28001, 28019, 28027, 28031, 28051, 28057],
- [28069, 28081, 28087, 28097, 28099, 28109, 28111, 28123, 28151, 28163],
- [28181, 28183, 28201, 28211, 28219, 28229, 28277, 28279, 28283, 28289],
- [28297, 28307, 28309, 28319, 28349, 28351, 28387, 28393, 28403, 28409],
- [28411, 28429, 28433, 28439, 28447, 28463, 28477, 28493, 28499, 28513],
- [28517, 28537, 28541, 28547, 28549, 28559, 28571, 28573, 28579, 28591],
- [28597, 28603, 28607, 28619, 28621, 28627, 28631, 28643, 28649, 28657],
- [28661, 28663, 28669, 28687, 28697, 28703, 28711, 28723, 28729, 28751],
- [28753, 28759, 28771, 28789, 28793, 28807, 28813, 28817, 28837, 28843],
- [28859, 28867, 28871, 28879, 28901, 28909, 28921, 28927, 28933, 28949],
- [28961, 28979, 29009, 29017, 29021, 29023, 29027, 29033, 29059, 29063],
- [29077, 29101, 29123, 29129, 29131, 29137, 29147, 29153, 29167, 29173],
- [29179, 29191, 29201, 29207, 29209, 29221, 29231, 29243, 29251, 29269],
- [29287, 29297, 29303, 29311, 29327, 29333, 29339, 29347, 29363, 29383],
- [29387, 29389, 29399, 29401, 29411, 29423, 29429, 29437, 29443, 29453],
- [29473, 29483, 29501, 29527, 29531, 29537, 29567, 29569, 29573, 29581],
- [29587, 29599, 29611, 29629, 29633, 29641, 29663, 29669, 29671, 29683],
- [29717, 29723, 29741, 29753, 29759, 29761, 29789, 29803, 29819, 29833],
- [29837, 29851, 29863, 29867, 29873, 29879, 29881, 29917, 29921, 29927],
- [29947, 29959, 29983, 29989, 30011, 30013, 30029, 30047, 30059, 30071],
- [30089, 30091, 30097, 30103, 30109, 30113, 30119, 30133, 30137, 30139],
- [30161, 30169, 30181, 30187, 30197, 30203, 30211, 30223, 30241, 30253],
- [30259, 30269, 30271, 30293, 30307, 30313, 30319, 30323, 30341, 30347],
- [30367, 30389, 30391, 30403, 30427, 30431, 30449, 30467, 30469, 30491],
- [30493, 30497, 30509, 30517, 30529, 30539, 30553, 30557, 30559, 30577],
- [30593, 30631, 30637, 30643, 30649, 30661, 30671, 30677, 30689, 30697],
- [30703, 30707, 30713, 30727, 30757, 30763, 30773, 30781, 30803, 30809],
- [30817, 30829, 30839, 30841, 30851, 30853, 30859, 30869, 30871, 30881],
- [30893, 30911, 30931, 30937, 30941, 30949, 30971, 30977, 30983, 31013],
- [31019, 31033, 31039, 31051, 31063, 31069, 31079, 31081, 31091, 31121],
- [31123, 31139, 31147, 31151, 31153, 31159, 31177, 31181, 31183, 31189],
- [31193, 31219, 31223, 31231, 31237, 31247, 31249, 31253, 31259, 31267],
- [31271, 31277, 31307, 31319, 31321, 31327, 31333, 31337, 31357, 31379],
- [31387, 31391, 31393, 31397, 31469, 31477, 31481, 31489, 31511, 31513],
- [31517, 31531, 31541, 31543, 31547, 31567, 31573, 31583, 31601, 31607],
- [31627, 31643, 31649, 31657, 31663, 31667, 31687, 31699, 31721, 31723],
- [31727, 31729, 31741, 31751, 31769, 31771, 31793, 31799, 31817, 31847],
- [31849, 31859, 31873, 31883, 31891, 31907, 31957, 31963, 31973, 31981],
- [31991, 32003, 32009, 32027, 32029, 32051, 32057, 32059, 32063, 32069],
- [32077, 32083, 32089, 32099, 32117, 32119, 32141, 32143, 32159, 32173],
- [32183, 32189, 32191, 32203, 32213, 32233, 32237, 32251, 32257, 32261],
- [32297, 32299, 32303, 32309, 32321, 32323, 32327, 32341, 32353, 32359],
- [32363, 32369, 32371, 32377, 32381, 32401, 32411, 32413, 32423, 32429],
- [32441, 32443, 32467, 32479, 32491, 32497, 32503, 32507, 32531, 32533],
- [32537, 32561, 32563, 32569, 32573, 32579, 32587, 32603, 32609, 32611],
- [32621, 32633, 32647, 32653, 32687, 32693, 32707, 32713, 32717, 32719],
- [32749, 32771, 32779, 32783, 32789, 32797, 32801, 32803, 32831, 32833],
- [32839, 32843, 32869, 32887, 32909, 32911, 32917, 32933, 32939, 32941],
- [32957, 32969, 32971, 32983, 32987, 32993, 32999, 33013, 33023, 33029],
- [33037, 33049, 33053, 33071, 33073, 33083, 33091, 33107, 33113, 33119],
- [33149, 33151, 33161, 33179, 33181, 33191, 33199, 33203, 33211, 33223],
- [33247, 33287, 33289, 33301, 33311, 33317, 33329, 33331, 33343, 33347],
- [33349, 33353, 33359, 33377, 33391, 33403, 33409, 33413, 33427, 33457],
- [33461, 33469, 33479, 33487, 33493, 33503, 33521, 33529, 33533, 33547],
- [33563, 33569, 33577, 33581, 33587, 33589, 33599, 33601, 33613, 33617],
- [33619, 33623, 33629, 33637, 33641, 33647, 33679, 33703, 33713, 33721],
- [33739, 33749, 33751, 33757, 33767, 33769, 33773, 33791, 33797, 33809],
- [33811, 33827, 33829, 33851, 33857, 33863, 33871, 33889, 33893, 33911],
- [33923, 33931, 33937, 33941, 33961, 33967, 33997, 34019, 34031, 34033],
- [34039, 34057, 34061, 34123, 34127, 34129, 34141, 34147, 34157, 34159],
- [34171, 34183, 34211, 34213, 34217, 34231, 34253, 34259, 34261, 34267],
- [34273, 34283, 34297, 34301, 34303, 34313, 34319, 34327, 34337, 34351],
- [34361, 34367, 34369, 34381, 34403, 34421, 34429, 34439, 34457, 34469],
- [34471, 34483, 34487, 34499, 34501, 34511, 34513, 34519, 34537, 34543],
- [34549, 34583, 34589, 34591, 34603, 34607, 34613, 34631, 34649, 34651],
- [34667, 34673, 34679, 34687, 34693, 34703, 34721, 34729, 34739, 34747],
- [34757, 34759, 34763, 34781, 34807, 34819, 34841, 34843, 34847, 34849],
- [34871, 34877, 34883, 34897, 34913, 34919, 34939, 34949, 34961, 34963],
- [34981, 35023, 35027, 35051, 35053, 35059, 35069, 35081, 35083, 35089],
- [35099, 35107, 35111, 35117, 35129, 35141, 35149, 35153, 35159, 35171],
- [35201, 35221, 35227, 35251, 35257, 35267, 35279, 35281, 35291, 35311],
- [35317, 35323, 35327, 35339, 35353, 35363, 35381, 35393, 35401, 35407],
- [35419, 35423, 35437, 35447, 35449, 35461, 35491, 35507, 35509, 35521],
- [35527, 35531, 35533, 35537, 35543, 35569, 35573, 35591, 35593, 35597],
- [35603, 35617, 35671, 35677, 35729, 35731, 35747, 35753, 35759, 35771],
- [35797, 35801, 35803, 35809, 35831, 35837, 35839, 35851, 35863, 35869],
- [35879, 35897, 35899, 35911, 35923, 35933, 35951, 35963, 35969, 35977],
- [35983, 35993, 35999, 36007, 36011, 36013, 36017, 36037, 36061, 36067],
- [36073, 36083, 36097, 36107, 36109, 36131, 36137, 36151, 36161, 36187],
- [36191, 36209, 36217, 36229, 36241, 36251, 36263, 36269, 36277, 36293],
- [36299, 36307, 36313, 36319, 36341, 36343, 36353, 36373, 36383, 36389],
- [36433, 36451, 36457, 36467, 36469, 36473, 36479, 36493, 36497, 36523],
- [36527, 36529, 36541, 36551, 36559, 36563, 36571, 36583, 36587, 36599],
- [36607, 36629, 36637, 36643, 36653, 36671, 36677, 36683, 36691, 36697],
- [36709, 36713, 36721, 36739, 36749, 36761, 36767, 36779, 36781, 36787],
- [36791, 36793, 36809, 36821, 36833, 36847, 36857, 36871, 36877, 36887],
- [36899, 36901, 36913, 36919, 36923, 36929, 36931, 36943, 36947, 36973],
- [36979, 36997, 37003, 37013, 37019, 37021, 37039, 37049, 37057, 37061],
- [37087, 37097, 37117, 37123, 37139, 37159, 37171, 37181, 37189, 37199],
- [37201, 37217, 37223, 37243, 37253, 37273, 37277, 37307, 37309, 37313],
- [37321, 37337, 37339, 37357, 37361, 37363, 37369, 37379, 37397, 37409],
- [37423, 37441, 37447, 37463, 37483, 37489, 37493, 37501, 37507, 37511],
- [37517, 37529, 37537, 37547, 37549, 37561, 37567, 37571, 37573, 37579],
- [37589, 37591, 37607, 37619, 37633, 37643, 37649, 37657, 37663, 37691],
- [37693, 37699, 37717, 37747, 37781, 37783, 37799, 37811, 37813, 37831],
- [37847, 37853, 37861, 37871, 37879, 37889, 37897, 37907, 37951, 37957],
- [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039]]}}]}
+ "id": "doc1",
+ "title": "Document 1",
+ "sections": [
+ {
+ "id": "sec1",
+ "content_type": "heading",
+ "elements": [
+ {
+ "type": "heading",
+ "content": {
+ "text": "Introduction"}}]}]}]}
==============================================================================
================================================================================
MERGE RESULT: ✅ SUCCESS
================================================================================
-Final result length: 31743 chars
+Final result length: 414 chars
Final result (COMPLETE):
================================================================================
{
- "elements": [
+ "metadata": {
+ "title": "Test Document",
+ "author": "Test Author",
+ "date": "2025-01-05"
+ },
+ "documents": [
{
- "type": "table",
- "content": {
- "headers": ["Spalte1", "Spalte2", "Spalte3", "Spalte4", "Spalte5", "Spalte6", "Spalte7", "Spalte8", "Spalte9", "Spalte10"],
- "rows": [
- [2, 3, 5, 7, 11, 13, 17, 19, 23, 29],
- [31, 37, 41, 43, 47, 53, 59, 61, 67, 71],
- [73, 79, 83, 89, 97, 101, 103, 107, 109, 113],
- [127, 131, 137, 139, 149, 151, 157, 163, 167, 173],
- [179, 181, 191, 193, 197, 199, 211, 223, 227, 229],
- [233, 239, 241, 251, 257, 263, 269, 271, 277, 281],
- [283, 293, 307, 311, 313, 317, 331, 337, 347, 349],
- [353, 359, 367, 373, 379, 383, 389, 397, 401, 409],
- [419, 421, 431, 433, 439, 443, 449, 457, 461, 463],
- [467, 479, 487, 491, 499, 503, 509, 521, 523, 541],
- [547, 557, 563, 569, 571, 577, 587, 593, 599, 601],
- [607, 613, 617, 619, 631, 641, 643, 647, 653, 659],
- [661, 673, 677, 683, 691, 701, 709, 719, 727, 733],
- [739, 743, 751, 757, 761, 769, 773, 787, 797, 809],
- [811, 821, 823, 827, 829, 839, 853, 857, 859, 863],
- [877, 881, 883, 887, 907, 911, 919, 929, 937, 941],
- [947, 953, 967, 971, 977, 983, 991, 997, 1009, 1013],
- [1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069],
- [1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151],
- [1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223],
- [1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 1289, 1291],
- [1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373],
- [1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451],
- [1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511],
- [1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583],
- [1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 1637, 1657],
- [1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733],
- [1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811],
- [1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 1879, 1889],
- [1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 1979, 1987],
- [1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053],
- [2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129],
- [2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213],
- [2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 2281, 2287],
- [2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357],
- [2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423],
- [2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 2521, 2531],
- [2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617],
- [2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687],
- [2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741],
- [2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819],
- [2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 2897, 2903],
- [2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999],
- [3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079],
- [3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 3169, 3181],
- [3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257],
- [3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331],
- [3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413],
- [3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 3499, 3511],
- [3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 3559, 3571],
- [3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643],
- [3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727],
- [3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 3803, 3821],
- [3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907],
- [3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989],
- [4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057],
- [4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139],
- [4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 4229, 4231],
- [4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297],
- [4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409],
- [4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 4483, 4493],
- [4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 4567, 4583],
- [4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657],
- [4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751],
- [4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 4817, 4831],
- [4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 4933, 4937],
- [4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 4999, 5003],
- [5009, 5011, 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087],
- [5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 5171, 5179],
- [5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 5273, 5279],
- [5281, 5297, 5303, 5309, 5323, 5333, 5347, 5351, 5381, 5387],
- [5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443],
- [5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 5519, 5521],
- [5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, 5623, 5639],
- [5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 5689, 5693],
- [5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791],
- [5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 5851, 5857],
- [5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 5927, 5939],
- [5953, 5981, 5987, 6007, 6011, 6029, 6037, 6043, 6047, 6053],
- [6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133],
- [6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 6217, 6221],
- [6229, 6247, 6257, 6263, 6269, 6271, 6277, 6287, 6299, 6301],
- [6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 6361, 6367],
- [6373, 6379, 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473],
- [6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 6569, 6571],
- [6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 6661, 6673],
- [6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 6737, 6761],
- [6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833],
- [6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 6911, 6917],
- [6947, 6949, 6959, 6961, 6967, 6971, 6977, 6983, 6991, 6997],
- [7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 7079, 7103],
- [7109, 7121, 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207],
- [7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 7283, 7297],
- [7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 7393, 7411],
- [7417, 7433, 7451, 7457, 7459, 7477, 7481, 7487, 7489, 7499],
- [7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561],
- [7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 7639, 7643],
- [7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 7717, 7723],
- [7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 7823, 7829],
- [7841, 7853, 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919],
- [7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, 8011, 8017],
- [8039, 8053, 8059, 8069, 8081, 8087, 8089, 8093, 8101, 8111],
- [8117, 8123, 8147, 8161, 8167, 8171, 8179, 8191, 8209, 8219],
- [8221, 8231, 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291],
- [8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, 8377, 8387],
- [8389, 8419, 8423, 8429, 8431, 8443, 8447, 8461, 8467, 8501],
- [8513, 8521, 8527, 8537, 8539, 8543, 8563, 8573, 8581, 8597],
- [8599, 8609, 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677],
- [8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, 8737, 8741],
- [8747, 8753, 8761, 8779, 8783, 8803, 8807, 8819, 8821, 8831],
- [8837, 8839, 8849, 8861, 8863, 8867, 8887, 8893, 8923, 8929],
- [8933, 8941, 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011],
- [9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, 9103, 9109],
- [9127, 9133, 9137, 9151, 9157, 9161, 9173, 9181, 9187, 9199],
- [9203, 9209, 9221, 9227, 9239, 9241, 9257, 9277, 9281, 9283],
- [9293, 9311, 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377],
- [9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, 9437, 9439],
- [9461, 9463, 9467, 9473, 9479, 9491, 9497, 9511, 9521, 9533],
- [9539, 9547, 9551, 9587, 9601, 9613, 9619, 9623, 9629, 9631],
- [9643, 9649, 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733],
- [9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, 9803, 9811],
- [9817, 9829, 9833, 9839, 9851, 9857, 9859, 9871, 9883, 9887],
- [9901, 9907, 9923, 9929, 9931, 9941, 9949, 9967, 9973, 10007],
- [10009, 10037, 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099],
- [10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163, 10169, 10177],
- [10181, 10193, 10211, 10223, 10243, 10247, 10253, 10259, 10267, 10271],
- [10273, 10289, 10301, 10303, 10313, 10321, 10331, 10333, 10337, 10343],
- [10357, 10369, 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459],
- [10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, 10559, 10567],
- [10589, 10597, 10601, 10607, 10613, 10627, 10631, 10639, 10651, 10657],
- [10663, 10667, 10687, 10691, 10709, 10711, 10723, 10729, 10733, 10739],
- [10753, 10771, 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859],
- [10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, 10939, 10949],
- [10957, 10973, 10979, 10987, 10993, 11003, 11027, 11047, 11057, 11059],
- [11069, 11071, 11083, 11087, 11093, 11113, 11117, 11119, 11131, 11149],
- [11159, 11161, 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251],
- [11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, 11321, 11329],
- [11351, 11353, 11369, 11383, 11393, 11399, 11411, 11423, 11437, 11443],
- [11447, 11467, 11471, 11483, 11489, 11491, 11497, 11503, 11519, 11527],
- [11549, 11551, 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657],
- [11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, 11743, 11777],
- [11779, 11783, 11789, 11801, 11807, 11813, 11821, 11827, 11831, 11833],
- [11839, 11863, 11867, 11887, 11897, 11903, 11909, 11923, 11927, 11933],
- [11939, 11941, 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011],
- [12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, 12107, 12109],
- [12113, 12119, 12143, 12149, 12157, 12161, 12163, 12197, 12203, 12211],
- [12227, 12239, 12241, 12251, 12253, 12263, 12269, 12277, 12281, 12289],
- [12301, 12323, 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401],
- [12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473, 12479, 12487],
- [12491, 12497, 12503, 12511, 12517, 12527, 12539, 12541, 12547, 12553],
- [12569, 12577, 12583, 12589, 12601, 12611, 12613, 12619, 12637, 12641],
- [12647, 12653, 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739],
- [12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, 12823, 12829],
- [12841, 12853, 12889, 12893, 12899, 12907, 12911, 12917, 12919, 12923],
- [12941, 12953, 12959, 12967, 12973, 12979, 12983, 13001, 13003, 13007],
- [13009, 13033, 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109],
- [13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177, 13183, 13187],
- [13217, 13219, 13229, 13241, 13249, 13259, 13267, 13291, 13297, 13309],
- [13313, 13327, 13331, 13337, 13339, 13367, 13381, 13397, 13399, 13411],
- [13417, 13421, 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499],
- [13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, 13613, 13619],
- [13627, 13633, 13649, 13669, 13679, 13681, 13687, 13691, 13693, 13697],
- [13709, 13711, 13721, 13723, 13729, 13751, 13757, 13759, 13763, 13781],
- [13789, 13799, 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879],
- [13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, 13963, 13967],
- [13997, 13999, 14009, 14011, 14029, 14033, 14051, 14057, 14071, 14081],
- [14083, 14087, 14107, 14143, 14149, 14153, 14159, 14173, 14177, 14197],
- [14207, 14221, 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323],
- [14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, 14411, 14419],
- [14423, 14431, 14437, 14447, 14449, 14461, 14479, 14489, 14503, 14519],
- [14533, 14537, 14543, 14549, 14551, 14557, 14561, 14563, 14591, 14593],
- [14621, 14627, 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699],
- [14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, 14759, 14767],
- [14771, 14779, 14783, 14797, 14813, 14821, 14827, 14831, 14843, 14851],
- [14867, 14869, 14879, 14887, 14891, 14897, 14923, 14929, 14939, 14947],
- [14951, 14957, 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073],
- [15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, 15139, 15149],
- [15161, 15173, 15187, 15193, 15199, 15217, 15227, 15233, 15241, 15259],
- [15263, 15269, 15271, 15277, 15287, 15289, 15299, 15307, 15313, 15319],
- [15329, 15331, 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401],
- [15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473, 15493, 15497],
- [15511, 15527, 15541, 15551, 15559, 15569, 15581, 15583, 15601, 15607],
- [15619, 15629, 15641, 15643, 15647, 15649, 15661, 15667, 15671, 15679],
- [15683, 15727, 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773],
- [15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, 15877, 15881],
- [15887, 15889, 15901, 15907, 15913, 15919, 15923, 15937, 15959, 15971],
- [15973, 15991, 16001, 16007, 16033, 16057, 16061, 16063, 16067, 16069],
- [16073, 16087, 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183],
- [16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249, 16253, 16267],
- [16273, 16301, 16319, 16333, 16339, 16349, 16361, 16363, 16369, 16381],
- [16411, 16417, 16421, 16427, 16433, 16447, 16451, 16453, 16477, 16481],
- [16487, 16493, 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603],
- [16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, 16673, 16691],
- [16693, 16699, 16703, 16729, 16741, 16747, 16759, 16763, 16787, 16811],
- [16823, 16829, 16831, 16843, 16871, 16879, 16883, 16889, 16901, 16903],
- [16921, 16927, 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993],
- [17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, 17077, 17093],
- [17099, 17107, 17117, 17123, 17137, 17159, 17167, 17183, 17189, 17191],
- [17203, 17207, 17209, 17231, 17239, 17257, 17291, 17293, 17299, 17317],
- [17321, 17327, 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389],
- [17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, 17471, 17477],
- [17483, 17489, 17491, 17497, 17509, 17519, 17539, 17551, 17569, 17573],
- [17579, 17581, 17597, 17599, 17609, 17623, 17627, 17657, 17659, 17669],
- [17681, 17683, 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783],
- [17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863, 17881, 17891],
- [17903, 17909, 17911, 17921, 17923, 17929, 17939, 17957, 17959, 17971],
- [17977, 17981, 17987, 17989, 18013, 18041, 18043, 18047, 18049, 18059],
- [18061, 18077, 18089, 18097, 18119, 18121, 18127, 18131, 18133, 18143],
- [18149, 18169, 18181, 18191, 18199, 18211, 18217, 18223, 18229, 18233],
- [18251, 18253, 18257, 18269, 18287, 18289, 18301, 18307, 18311, 18313],
- [18329, 18341, 18353, 18367, 18371, 18379, 18397, 18401, 18413, 18427],
- [18433, 18439, 18443, 18451, 18457, 18461, 18481, 18493, 18503, 18517],
- [18521, 18523, 18539, 18541, 18553, 18583, 18587, 18593, 18617, 18637],
- [18661, 18671, 18679, 18691, 18701, 18713, 18719, 18731, 18743, 18749],
- [18757, 18773, 18787, 18793, 18797, 18803, 18839, 18859, 18869, 18899],
- [18911, 18913, 18917, 18919, 18947, 18959, 18973, 18979, 19001, 19009],
- [19013, 19031, 19037, 19051, 19069, 19073, 19079, 19081, 19087, 19121],
- [19139, 19141, 19157, 19163, 19181, 19183, 19207, 19211, 19213, 19219],
- [19231, 19237, 19249, 19259, 19267, 19273, 19289, 19301, 19309, 19319],
- [19333, 19373, 19379, 19381, 19387, 19391, 19403, 19417, 19421, 19423],
- [19427, 19429, 19433, 19441, 19447, 19457, 19463, 19469, 19471, 19477],
- [19483, 19501, 19507, 19531, 19541, 19543, 19553, 19559, 19571, 19577],
- [19583, 19597, 19603, 19609, 19661, 19681, 19687, 19697, 19699, 19709],
- [19717, 19727, 19739, 19751, 19753, 19759, 19763, 19777, 19793, 19801],
- [19813, 19819, 19841, 19843, 19853, 19861, 19867, 19889, 19891, 19913],
- [19919, 19927, 19937, 19949, 19961, 19963, 19973, 19979, 19991, 19993],
- [19997, 20011, 20021, 20023, 20029, 20047, 20051, 20063, 20071, 20089],
- [20101, 20107, 20113, 20117, 20123, 20129, 20143, 20147, 20149, 20161],
- [20173, 20177, 20183, 20201, 20219, 20231, 20233, 20249, 20261, 20269],
- [20287, 20297, 20323, 20327, 20333, 20341, 20347, 20353, 20357, 20359],
- [20369, 20389, 20393, 20399, 20407, 20411, 20431, 20441, 20443, 20477],
- [20479, 20483, 20507, 20509, 20521, 20533, 20543, 20549, 20551, 20563],
- [20593, 20599, 20611, 20627, 20639, 20641, 20663, 20681, 20693, 20707],
- [20717, 20719, 20731, 20743, 20747, 20749, 20753, 20759, 20771, 20773],
- [20789, 20807, 20809, 20849, 20857, 20873, 20879, 20887, 20897, 20899],
- [20903, 20921, 20929, 20939, 20947, 20959, 20963, 20981, 20983, 21001],
- [21011, 21013, 21017, 21019, 21023, 21031, 21059, 21061, 21067, 21089],
- [21101, 21107, 21121, 21139, 21143, 21149, 21157, 21163, 21169, 21179],
- [21187, 21191, 21193, 21211, 21221, 21227, 21247, 21269, 21277, 21283],
- [21313, 21317, 21319, 21323, 21341, 21347, 21377, 21379, 21383, 21391],
- [21397, 21401, 21407, 21419, 21433, 21467, 21481, 21487, 21491, 21493],
- [21499, 21503, 21517, 21521, 21523, 21529, 21557, 21559, 21563, 21569],
- [21577, 21587, 21589, 21599, 21601, 21611, 21613, 21617, 21647, 21649],
- [21661, 21673, 21683, 21701, 21713, 21727, 21737, 21739, 21751, 21757],
- [21767, 21773, 21787, 21799, 21803, 21817, 21821, 21839, 21841, 21851],
- [21859, 21863, 21871, 21881, 21893, 21911, 21929, 21937, 21943, 21961],
- [21977, 21991, 21997, 22003, 22013, 22027, 22031, 22037, 22039, 22051],
- [22063, 22067, 22073, 22079, 22091, 22093, 22109, 22111, 22123, 22129],
- [22133, 22147, 22153, 22157, 22159, 22171, 22189, 22193, 22229, 22247],
- [22259, 22271, 22273, 22277, 22279, 22283, 22291, 22303, 22307, 22343],
- [22349, 22367, 22369, 22381, 22391, 22397, 22409, 22433, 22441, 22447],
- [22453, 22469, 22481, 22483, 22501, 22511, 22531, 22541, 22543, 22549],
- [22567, 22571, 22573, 22613, 22619, 22621, 22637, 22639, 22643, 22651],
- [22669, 22679, 22691, 22697, 22699, 22709, 22717, 22721, 22727, 22739],
- [22741, 22751, 22769, 22777, 22783, 22787, 22807, 22811, 22817, 22853],
- [22859, 22861, 22871, 22877, 22901, 22907, 22921, 22937, 22943, 22961],
- [22963, 22973, 22993, 23003, 23011, 23017, 23021, 23027, 23029, 23039],
- [23041, 23053, 23057, 23059, 23063, 23071, 23081, 23087, 23099, 23117],
- [23131, 23143, 23159, 23167, 23173, 23189, 23197, 23201, 23203, 23209],
- [23227, 23251, 23269, 23279, 23291, 23293, 23297, 23311, 23321, 23327],
- [23333, 23339, 23357, 23369, 23371, 23399, 23417, 23431, 23447, 23459],
- [23473, 23497, 23509, 23531, 23537, 23539, 23549, 23557, 23561, 23563],
- [23567, 23581, 23593, 23599, 23603, 23609, 23623, 23627, 23629, 23633],
- [23663, 23669, 23671, 23677, 23687, 23689, 23719, 23741, 23743, 23747],
- [23753, 23761, 23767, 23773, 23789, 23801, 23813, 23819, 23827, 23831],
- [23833, 23857, 23869, 23873, 23879, 23887, 23893, 23899, 23909, 23911],
- [23917, 23929, 23957, 23971, 23977, 23981, 23993, 24001, 24007, 24019],
- [24023, 24029, 24043, 24049, 24061, 24071, 24077, 24083, 24091, 24097],
- [24103, 24107, 24109, 24113, 24121, 24133, 24137, 24151, 24169, 24179],
- [24181, 24197, 24203, 24223, 24229, 24239, 24247, 24251, 24281, 24317],
- [24329, 24337, 24359, 24371, 24373, 24379, 24391, 24407, 24413, 24419],
- [24421, 24439, 24443, 24469, 24473, 24481, 24499, 24509, 24517, 24527],
- [24533, 24547, 24551, 24571, 24593, 24611, 24623, 24631, 24659, 24671],
- [24677, 24683, 24691, 24697, 24709, 24733, 24749, 24763, 24767, 24781],
- [24793, 24799, 24809, 24821, 24841, 24847, 24851, 24859, 24877, 24889],
- [24907, 24917, 24919, 24923, 24943, 24953, 24967, 24971, 24977, 24979],
- [24989, 25013, 25031, 25033, 25037, 25057, 25073, 25087, 25097, 25111],
- [25117, 25121, 25127, 25147, 25153, 25163, 25169, 25171, 25183, 25189],
- [25219, 25229, 25237, 25243, 25247, 25253, 25261, 25301, 25303, 25307],
- [25309, 25321, 25339, 25343, 25349, 25357, 25367, 25373, 25391, 25409],
- [25411, 25423, 25439, 25447, 25453, 25457, 25463, 25469, 25471, 25523],
- [25537, 25541, 25561, 25577, 25579, 25583, 25589, 25601, 25603, 25609],
- [25621, 25633, 25639, 25643, 25657, 25667, 25673, 25679, 25693, 25703],
- [25717, 25733, 25741, 25747, 25759, 25763, 25771, 25793, 25799, 25801],
- [25819, 25841, 25847, 25849, 25867, 25873, 25889, 25903, 25913, 25919],
- [25931, 25933, 25939, 25943, 25951, 25969, 25981, 25997, 25999, 26003],
- [26017, 26021, 26029, 26041, 26053, 26083, 26099, 26107, 26111, 26113],
- [26119, 26141, 26153, 26161, 26171, 26177, 26183, 26189, 26203, 26209],
- [26227, 26237, 26249, 26251, 26261, 26263, 26267, 26293, 26297, 26309],
- [26317, 26321, 26339, 26347, 26357, 26371, 26387, 26393, 26399, 26407],
- [26417, 26423, 26431, 26437, 26449, 26459, 26479, 26489, 26497, 26501],
- [26513, 26539, 26557, 26561, 26573, 26591, 26597, 26627, 26633, 26641],
- [26647, 26669, 26681, 26683, 26687, 26693, 26699, 26701, 26711, 26713],
- [26717, 26723, 26729, 26731, 26737, 26759, 26777, 26783, 26801, 26813],
- [26821, 26833, 26839, 26849, 26861, 26863, 26879, 26881, 26891, 26893],
- [26903, 26921, 26927, 26947, 26951, 26953, 26959, 26981, 26987, 26993],
- [27011, 27017, 27031, 27043, 27059, 27061, 27067, 27073, 27077, 27091],
- [27103, 27107, 27109, 27127, 27143, 27179, 27191, 27197, 27211, 27239],
- [27241, 27253, 27259, 27271, 27277, 27281, 27283, 27299, 27329, 27337],
- [27361, 27367, 27397, 27407, 27409, 27427, 27431, 27437, 27449, 27457],
- [27479, 27481, 27487, 27509, 27527, 27529, 27539, 27541, 27551, 27581],
- [27583, 27611, 27617, 27631, 27647, 27653, 27673, 27689, 27691, 27697],
- [27701, 27733, 27737, 27739, 27743, 27749, 27751, 27763, 27767, 27773],
- [27779, 27791, 27793, 27799, 27803, 27809, 27817, 27823, 27827, 27847],
- [27851, 27883, 27893, 27901, 27917, 27919, 27941, 27943, 27947, 27953],
- [27961, 27967, 27983, 27997, 28001, 28019, 28027, 28031, 28051, 28057],
- [28069, 28081, 28087, 28097, 28099, 28109, 28111, 28123, 28151, 28163],
- [28181, 28183, 28201, 28211, 28219, 28229, 28277, 28279, 28283, 28289],
- [28297, 28307, 28309, 28319, 28349, 28351, 28387, 28393, 28403, 28409],
- [28411, 28429, 28433, 28439, 28447, 28463, 28477, 28493, 28499, 28513],
- [28517, 28537, 28541, 28547, 28549, 28559, 28571, 28573, 28579, 28591],
- [28597, 28603, 28607, 28619, 28621, 28627, 28631, 28643, 28649, 28657],
- [28661, 28663, 28669, 28687, 28697, 28703, 28711, 28723, 28729, 28751],
- [28753, 28759, 28771, 28789, 28793, 28807, 28813, 28817, 28837, 28843],
- [28859, 28867, 28871, 28879, 28901, 28909, 28921, 28927, 28933, 28949],
- [28961, 28979, 29009, 29017, 29021, 29023, 29027, 29033, 29059, 29063],
- [29077, 29101, 29123, 29129, 29131, 29137, 29147, 29153, 29167, 29173],
- [29179, 29191, 29201, 29207, 29209, 29221, 29231, 29243, 29251, 29269],
- [29287, 29297, 29303, 29311, 29327, 29333, 29339, 29347, 29363, 29383],
- [29387, 29389, 29399, 29401, 29411, 29423, 29429, 29437, 29443, 29453],
- [29473, 29483, 29501, 29527, 29531, 29537, 29567, 29569, 29573, 29581],
- [29587, 29599, 29611, 29629, 29633, 29641, 29663, 29669, 29671, 29683],
- [29717, 29723, 29741, 29753, 29759, 29761, 29789, 29803, 29819, 29833],
- [29837, 29851, 29863, 29867, 29873, 29879, 29881, 29917, 29921, 29927],
- [29947, 29959, 29983, 29989, 30011, 30013, 30029, 30047, 30059, 30071],
- [30089, 30091, 30097, 30103, 30109, 30113, 30119, 30133, 30137, 30139],
- [30161, 30169, 30181, 30187, 30197, 30203, 30211, 30223, 30241, 30253],
- [30259, 30269, 30271, 30293, 30307, 30313, 30319, 30323, 30341, 30347],
- [30367, 30389, 30391, 30403, 30427, 30431, 30449, 30467, 30469, 30491],
- [30493, 30497, 30509, 30517, 30529, 30539, 30553, 30557, 30559, 30577],
- [30593, 30631, 30637, 30643, 30649, 30661, 30671, 30677, 30689, 30697],
- [30703, 30707, 30713, 30727, 30757, 30763, 30773, 30781, 30803, 30809],
- [30817, 30829, 30839, 30841, 30851, 30853, 30859, 30869, 30871, 30881],
- [30893, 30911, 30931, 30937, 30941, 30949, 30971, 30977, 30983, 31013],
- [31019, 31033, 31039, 31051, 31063, 31069, 31079, 31081, 31091, 31121],
- [31123, 31139, 31147, 31151, 31153, 31159, 31177, 31181, 31183, 31189],
- [31193, 31219, 31223, 31231, 31237, 31247, 31249, 31253, 31259, 31267],
- [31271, 31277, 31307, 31319, 31321, 31327, 31333, 31337, 31357, 31379],
- [31387, 31391, 31393, 31397, 31469, 31477, 31481, 31489, 31511, 31513],
- [31517, 31531, 31541, 31543, 31547, 31567, 31573, 31583, 31601, 31607],
- [31627, 31643, 31649, 31657, 31663, 31667, 31687, 31699, 31721, 31723],
- [31727, 31729, 31741, 31751, 31769, 31771, 31793, 31799, 31817, 31847],
- [31849, 31859, 31873, 31883, 31891, 31907, 31957, 31963, 31973, 31981],
- [31991, 32003, 32009, 32027, 32029, 32051, 32057, 32059, 32063, 32069],
- [32077, 32083, 32089, 32099, 32117, 32119, 32141, 32143, 32159, 32173],
- [32183, 32189, 32191, 32203, 32213, 32233, 32237, 32251, 32257, 32261],
- [32297, 32299, 32303, 32309, 32321, 32323, 32327, 32341, 32353, 32359],
- [32363, 32369, 32371, 32377, 32381, 32401, 32411, 32413, 32423, 32429],
- [32441, 32443, 32467, 32479, 32491, 32497, 32503, 32507, 32531, 32533],
- [32537, 32561, 32563, 32569, 32573, 32579, 32587, 32603, 32609, 32611],
- [32621, 32633, 32647, 32653, 32687, 32693, 32707, 32713, 32717, 32719],
- [32749, 32771, 32779, 32783, 32789, 32797, 32801, 32803, 32831, 32833],
- [32839, 32843, 32869, 32887, 32909, 32911, 32917, 32933, 32939, 32941],
- [32957, 32969, 32971, 32983, 32987, 32993, 32999, 33013, 33023, 33029],
- [33037, 33049, 33053, 33071, 33073, 33083, 33091, 33107, 33113, 33119],
- [33149, 33151, 33161, 33179, 33181, 33191, 33199, 33203, 33211, 33223],
- [33247, 33287, 33289, 33301, 33311, 33317, 33329, 33331, 33343, 33347],
- [33349, 33353, 33359, 33377, 33391, 33403, 33409, 33413, 33427, 33457],
- [33461, 33469, 33479, 33487, 33493, 33503, 33521, 33529, 33533, 33547],
- [33563, 33569, 33577, 33581, 33587, 33589, 33599, 33601, 33613, 33617],
- [33619, 33623, 33629, 33637, 33641, 33647, 33679, 33703, 33713, 33721],
- [33739, 33749, 33751, 33757, 33767, 33769, 33773, 33791, 33797, 33809],
- [33811, 33827, 33829, 33851, 33857, 33863, 33871, 33889, 33893, 33911],
- [33923, 33931, 33937, 33941, 33961, 33967, 33997, 34019, 34031, 34033],
- [34039, 34057, 34061, 34123, 34127, 34129, 34141, 34147, 34157, 34159],
- [34171, 34183, 34211, 34213, 34217, 34231, 34253, 34259, 34261, 34267],
- [34273, 34283, 34297, 34301, 34303, 34313, 34319, 34327, 34337, 34351],
- [34361, 34367, 34369, 34381, 34403, 34421, 34429, 34439, 34457, 34469],
- [34471, 34483, 34487, 34499, 34501, 34511, 34513, 34519, 34537, 34543],
- [34549, 34583, 34589, 34591, 34603, 34607, 34613, 34631, 34649, 34651],
- [34667, 34673, 34679, 34687, 34693, 34703, 34721, 34729, 34739, 34747],
- [34757, 34759, 34763, 34781, 34807, 34819, 34841, 34843, 34847, 34849],
- [34871, 34877, 34883, 34897, 34913, 34919, 34939, 34949, 34961, 34963],
- [34981, 35023, 35027, 35051, 35053, 35059, 35069, 35081, 35083, 35089],
- [35099, 35107, 35111, 35117, 35129, 35141, 35149, 35153, 35159, 35171],
- [35201, 35221, 35227, 35251, 35257, 35267, 35279, 35281, 35291, 35311],
- [35317, 35323, 35327, 35339, 35353, 35363, 35381, 35393, 35401, 35407],
- [35419, 35423, 35437, 35447, 35449, 35461, 35491, 35507, 35509, 35521],
- [35527, 35531, 35533, 35537, 35543, 35569, 35573, 35591, 35593, 35597],
- [35603, 35617, 35671, 35677, 35729, 35731, 35747, 35753, 35759, 35771],
- [35797, 35801, 35803, 35809, 35831, 35837, 35839, 35851, 35863, 35869],
- [35879, 35897, 35899, 35911, 35923, 35933, 35951, 35963, 35969, 35977],
- [35983, 35993, 35999, 36007, 36011, 36013, 36017, 36037, 36061, 36067],
- [36073, 36083, 36097, 36107, 36109, 36131, 36137, 36151, 36161, 36187],
- [36191, 36209, 36217, 36229, 36241, 36251, 36263, 36269, 36277, 36293],
- [36299, 36307, 36313, 36319, 36341, 36343, 36353, 36373, 36383, 36389],
- [36433, 36451, 36457, 36467, 36469, 36473, 36479, 36493, 36497, 36523],
- [36527, 36529, 36541, 36551, 36559, 36563, 36571, 36583, 36587, 36599],
- [36607, 36629, 36637, 36643, 36653, 36671, 36677, 36683, 36691, 36697],
- [36709, 36713, 36721, 36739, 36749, 36761, 36767, 36779, 36781, 36787],
- [36791, 36793, 36809, 36821, 36833, 36847, 36857, 36871, 36877, 36887],
- [36899, 36901, 36913, 36919, 36923, 36929, 36931, 36943, 36947, 36973],
- [36979, 36997, 37003, 37013, 37019, 37021, 37039, 37049, 37057, 37061],
- [37087, 37097, 37117, 37123, 37139, 37159, 37171, 37181, 37189, 37199],
- [37201, 37217, 37223, 37243, 37253, 37273, 37277, 37307, 37309, 37313],
- [37321, 37337, 37339, 37357, 37361, 37363, 37369, 37379, 37397, 37409],
- [37423, 37441, 37447, 37463, 37483, 37489, 37493, 37501, 37507, 37511],
- [37517, 37529, 37537, 37547, 37549, 37561, 37567, 37571, 37573, 37579],
- [37589, 37591, 37607, 37619, 37633, 37643, 37649, 37657, 37663, 37691],
- [37693, 37699, 37717, 37747, 37781, 37783, 37799, 37811, 37813, 37831],
- [37847, 37853, 37861, 37871, 37879, 37889, 37897, 37907, 37951, 37957],
- [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039]]}}]}
+ "id": "doc1",
+ "title": "Document 1",
+ "sections": [
+ {
+ "id": "sec1",
+ "content_type": "heading",
+ "elements": [
+ {
+ "type": "heading",
+ "content": {
+ "text": "Introduction"}}]}]}]}
================================================================================
diff --git a/modules/services/serviceAi/merge_2.txt b/modules/services/serviceAi/merge_2.txt
new file mode 100644
index 00000000..83374f93
--- /dev/null
+++ b/modules/services/serviceAi/merge_2.txt
@@ -0,0 +1,121 @@
+================================================================================
+JSON MERGE OPERATION #2
+================================================================================
+Timestamp: 2026-01-05T08:30:55.472639
+
+INPUT:
+ Accumulated length: 414 chars
+ New Fragment length: 245 chars
+ Accumulated: 19 lines (showing first 5 and last 5)
+ {
+ "metadata": {
+ "title": "Test Document",
+ "author": "Test Author",
+ "date": "2025-01-05"
+ ... (9 lines omitted) ...
+ "elements": [
+ {
+ "type": "heading",
+ "content": {
+ "text": "Introduction"}}]}]}]}
+ New Fragment: 14 lines (showing first 5 and last 5)
+ t_type": "paragraph",
+ "elements": [
+ {
+ "type": "paragraph",
+ "content": {
+ ... (4 lines omitted) ...
+ }
+ ]
+ }
+ ]
+ }
+
+
+ Normalized Accumulated (414 chars)
+ (showing first 5 and last 5 of 19 lines)
+ {
+ "metadata": {
+ "title": "Test Document",
+ "author": "Test Author",
+ "date": "2025-01-05"
+ ... (9 lines omitted) ...
+ "elements": [
+ {
+ "type": "heading",
+ "content": {
+ "text": "Introduction"}}]}]}]}
+
+ Normalized New Fragment (245 chars)
+ (showing first 5 and last 5 of 14 lines)
+ t_type": "paragraph",
+ "elements": [
+ {
+ "type": "paragraph",
+ "content": {
+ ... (4 lines omitted) ...
+ }
+ ]
+ }
+ ]
+ }
+STEP: PHASE 1
+ Description: Finding overlap between JSON strings
+ ⏳ In progress...
+
+ Overlap Detection (string):
+ Overlap length: 0
+ ⚠️ No overlap detected - appending all
+
+ ⚠️ NO OVERLAP FOUND - This indicates iterations should stop
+ Closing JSON and returning final result
+
+ Closed JSON (414 chars):
+ ==============================================================================
+ {
+ "metadata": {
+ "title": "Test Document",
+ "author": "Test Author",
+ "date": "2025-01-05"
+ },
+ "documents": [
+ {
+ "id": "doc1",
+ "title": "Document 1",
+ "sections": [
+ {
+ "id": "sec1",
+ "content_type": "heading",
+ "elements": [
+ {
+ "type": "heading",
+ "content": {
+ "text": "Introduction"}}]}]}]}
+ ==============================================================================
+
+================================================================================
+MERGE RESULT: ✅ SUCCESS
+================================================================================
+Final result length: 414 chars
+Final result (COMPLETE):
+================================================================================
+{
+ "metadata": {
+ "title": "Test Document",
+ "author": "Test Author",
+ "date": "2025-01-05"
+ },
+ "documents": [
+ {
+ "id": "doc1",
+ "title": "Document 1",
+ "sections": [
+ {
+ "id": "sec1",
+ "content_type": "heading",
+ "elements": [
+ {
+ "type": "heading",
+ "content": {
+ "text": "Introduction"}}]}]}]}
+================================================================================
diff --git a/modules/services/serviceAi/subAiCallLooping.py b/modules/services/serviceAi/subAiCallLooping.py
index 021b1f95..63051d8b 100644
--- a/modules/services/serviceAi/subAiCallLooping.py
+++ b/modules/services/serviceAi/subAiCallLooping.py
@@ -324,6 +324,8 @@ class AiCallLooper:
# JSON is already closed by mergeJsonStringsWithOverlap when no overlap
# Use the merged (closed) JSON string directly
result = mergedJsonString
+ # CRITICAL: Update lastRawResponse with merged result for next iteration
+ lastRawResponse = mergedJsonString
# Try to parse it to get parsedJsonForUseCase
try:
extracted = extractJsonString(mergedJsonString)
@@ -333,6 +335,8 @@ class AiCallLooper:
normalized = self._normalizeJsonStructure(parsed, useCase)
parsedJsonForUseCase = normalized
result = json.dumps(normalized, indent=2, ensure_ascii=False)
+ # CRITICAL: Update lastRawResponse with final result
+ lastRawResponse = result
else:
# Parsing failed - try to repair JSON
from modules.shared.jsonUtils import repairBrokenJson
@@ -346,6 +350,8 @@ class AiCallLooper:
normalized = self._normalizeJsonStructure(repairedJson, useCase)
parsedJsonForUseCase = normalized
result = json.dumps(normalized, indent=2, ensure_ascii=False)
+ # CRITICAL: Update lastRawResponse with final result
+ lastRawResponse = result
logger.info(f"Iteration {iteration}: Successfully repaired JSON after no-overlap merge")
except Exception as e:
# Last resort: try repair on the original merged string
@@ -379,6 +385,8 @@ class AiCallLooper:
normalized = self._normalizeJsonStructure(parsed, useCase)
parsedJsonForUseCase = normalized
result = json.dumps(normalized, indent=2, ensure_ascii=False)
+ # CRITICAL: Update lastRawResponse with merged result
+ lastRawResponse = result
else:
# Parsing failed - try to extract partial data using Deep-Structure-Merging
# This fallback works for all use cases: parse what we can from each part
@@ -404,9 +412,13 @@ class AiCallLooper:
parsedJsonForUseCase = mergedJsonObj
result = json.dumps(mergedJsonObj, indent=2, ensure_ascii=False)
+ # CRITICAL: Update lastRawResponse with merged result
+ lastRawResponse = result
else:
# All parsing failed - use string merge result
result = mergedJsonString
+ # CRITICAL: Update lastRawResponse with merged result
+ lastRawResponse = mergedJsonString
except Exception as e:
logger.warning(f"Failed data-based merge, falling back to string merging: {e}")
# Fallback to string merging
@@ -424,6 +436,8 @@ class AiCallLooper:
hasOverlap = False
logger.info(f"Iteration {iteration}: No overlap found in final fallback merge - stopping iterations")
result = mergedJsonString
+ # CRITICAL: Update lastRawResponse with merged result
+ lastRawResponse = mergedJsonString
# If no overlap was found, mark as complete and use closed JSON
if not hasOverlap:
diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py
index f6f3032c..5918d641 100644
--- a/modules/services/serviceAi/subStructureFilling.py
+++ b/modules/services/serviceAi/subStructureFilling.py
@@ -2198,75 +2198,15 @@ Output requirements:
incompletePart = continuationContext.incomplete_part
lastRawJson = continuationContext.last_raw_json
- # Build overlap context: extract cut part and full part before (same level) for overlap
+ # Generate both overlap context and hierarchy context using jsonContinuation
overlapContext = ""
- if lastRawJson:
- # Find break position in raw JSON
- lastCompletePart = continuationContext.last_complete_part
- breakPos = len(lastRawJson.rstrip())
-
- if lastCompletePart:
- from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText
- normalizedRaw = stripCodeFences(normalizeJsonText(lastRawJson)).strip()
- normalizedComplete = stripCodeFences(normalizeJsonText(lastCompletePart)).strip()
-
- # Find where normalizedComplete ends in normalizedRaw
- pos = normalizedRaw.find(normalizedComplete)
- if pos >= 0:
- breakPos = pos + len(normalizedComplete)
- else:
- pos = lastRawJson.find(lastCompletePart)
- if pos >= 0:
- breakPos = pos + len(lastCompletePart)
- elif incompletePart:
- pos = lastRawJson.find(incompletePart)
- if pos >= 0:
- breakPos = pos
-
- # Extract cut part and full part before (same level)
- overlapContext = self._extractOverlapContext(lastRawJson, breakPos)
-
- # Build unified context showing structure hierarchy with cut point
unifiedContext = ""
if lastRawJson:
- # Find break position in raw JSON
- # Use last_complete_part length to find where complete part ends
- lastCompletePart = continuationContext.last_complete_part
- if lastCompletePart:
- # Break position is where the complete part ends
- # Normalize lastRawJson to match the normalized lastCompletePart
- from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText
- normalizedRaw = stripCodeFences(normalizeJsonText(lastRawJson)).strip()
- normalizedComplete = stripCodeFences(normalizeJsonText(lastCompletePart)).strip()
-
- # Find where normalizedComplete ends in normalizedRaw
- breakPos = normalizedRaw.find(normalizedComplete)
- if breakPos >= 0:
- breakPos = breakPos + len(normalizedComplete)
- else:
- # Fallback: use length of lastCompletePart in original string
- breakPos = lastRawJson.find(lastCompletePart)
- if breakPos >= 0:
- breakPos = breakPos + len(lastCompletePart)
- else:
- # Last resort: use incompletePart position
- if incompletePart:
- breakPos = lastRawJson.find(incompletePart)
- if breakPos == -1:
- breakPos = len(lastRawJson.rstrip())
- else:
- breakPos = len(lastRawJson.rstrip())
- elif incompletePart:
- # If no complete part, find where incomplete part starts
- breakPos = lastRawJson.find(incompletePart)
- if breakPos == -1:
- breakPos = len(lastRawJson.rstrip())
- else:
- breakPos = len(lastRawJson.rstrip())
-
- # Build intelligent context showing hierarchy
- from modules.shared.jsonUtils import buildIncompleteContext
- unifiedContext = buildIncompleteContext(lastRawJson, breakPos)
+ # Get contexts directly from jsonContinuation
+ from modules.shared.jsonContinuation import getContexts
+ contexts = getContexts(lastRawJson)
+ overlapContext = contexts.overlapContext
+ unifiedContext = contexts.hierarchyContext
elif incompletePart:
unifiedContext = incompletePart
else:
@@ -2308,14 +2248,6 @@ CRITICAL:
- Complete the incomplete element and continue with remaining elements"""
return continuationPrompt
- def _extractOverlapContext(self, jsonContent: str, breakPosition: int) -> str:
- """
- Extract overlap context: cut part and full part before (same level).
- Delegates to shared function in jsonUtils for consistency.
- """
- from modules.shared.jsonUtils import extractOverlapContext
- return extractOverlapContext(jsonContent, breakPosition)
-
def _extractAndMergeMultipleJsonBlocks(self, responseText: str, contentType: str, sectionId: str) -> List[Dict[str, Any]]:
"""
Extract multiple JSON blocks from response and merge them appropriately.
diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py
index 44b4a76d..fca65197 100644
--- a/modules/services/serviceAi/subStructureGeneration.py
+++ b/modules/services/serviceAi/subStructureGeneration.py
@@ -128,33 +128,15 @@ class StructureGenerator:
incompletePart = continuationContext.incomplete_part
lastRawJson = continuationContext.last_raw_json
- # Build overlap context: extract cut part and full part before (same level) for overlap
+ # Generate both overlap context and hierarchy context using jsonContinuation
overlapContext = ""
- if lastRawJson:
- # Find break position
- breakPos = len(lastRawJson.rstrip())
- if incompletePart:
- pos = lastRawJson.find(incompletePart)
- if pos >= 0:
- breakPos = pos
-
- # Extract cut part and full part before (same level)
- overlapContext = StructureGenerator._extractOverlapContext(lastRawJson, breakPos)
-
- # Build unified context showing structure hierarchy with cut point
unifiedContext = ""
if lastRawJson:
- # Find break position in raw JSON
- if incompletePart:
- breakPos = lastRawJson.find(incompletePart)
- if breakPos == -1:
- breakPos = len(lastRawJson.rstrip())
- else:
- breakPos = len(lastRawJson.rstrip())
-
- # Build intelligent context showing hierarchy
- from modules.shared.jsonUtils import buildIncompleteContext
- unifiedContext = buildIncompleteContext(lastRawJson, breakPos)
+ # Get contexts directly from jsonContinuation
+ from modules.shared.jsonContinuation import getContexts
+ contexts = getContexts(lastRawJson)
+ overlapContext = contexts.overlapContext
+ unifiedContext = contexts.hierarchyContext
elif incompletePart:
unifiedContext = incompletePart
else:
@@ -195,144 +177,6 @@ CRITICAL:
- Start with overlap context (cut part and full part before at same level) then continue seamlessly
- Complete the incomplete element and continue with remaining elements"""
return continuationPrompt
- """
- Extract overlap context: cut part and full part before (same level).
-
- Returns a string showing:
- 1. The last complete element at the same level before the cut point
- 2. The cut part (incomplete element at the cut point)
- """
- if not jsonContent or breakPosition <= 0:
- return jsonContent[-200:].strip() if jsonContent else ""
-
- from modules.shared.jsonUtils import findStructureHierarchy, extractCutPiece
-
- # Find structure hierarchy
- hierarchy = findStructureHierarchy(jsonContent, breakPosition)
- if not hierarchy:
- # Fallback: show last 200 chars before break
- start = max(0, breakPosition - 200)
- return jsonContent[start:breakPosition + 100].strip()
-
- # Get cut level (the array/object containing the cut piece)
- cutLevel = hierarchy[-1]
- cutLevelStart = cutLevel['start_pos']
- cutLevelType = cutLevel['type']
-
- # Extract cut piece (incomplete element)
- cutPiece = extractCutPiece(jsonContent, breakPosition)
-
- # Find the last complete element at the same level before the cut point
- overlapParts = []
-
- if cutLevelType == 'array':
- # Find the last complete array element before breakPosition
- i = breakPosition - 1
- depth = 0
- inString = False
- escapeNext = False
- elementStart = breakPosition
-
- # Find the start of the incomplete element (or last complete element)
- while i >= cutLevelStart:
- char = jsonContent[i]
-
- if escapeNext:
- escapeNext = False
- i -= 1
- continue
-
- if char == '\\':
- escapeNext = True
- i -= 1
- continue
-
- if char == '"':
- inString = not inString
- i -= 1
- continue
-
- if not inString:
- if char == ']':
- depth += 1
- elif char == '[':
- depth -= 1
- if depth < 0:
- elementStart = i + 1
- break
- elif char == ',' and depth == 0:
- elementStart = i + 1
- break
-
- i -= 1
-
- # Extract the last complete element (if exists) and the cut part
- if elementStart < breakPosition:
- contentBeforeBreak = jsonContent[max(cutLevelStart, elementStart - 500):breakPosition].strip()
-
- # Find the last complete element by looking for balanced brackets/braces
- lastCompleteEnd = breakPosition
- braceCount = 0
- bracketCount = 0
- inString = False
- escapeNext = False
-
- # Go backwards from breakPosition to find where last complete element ends
- for j in range(breakPosition - 1, max(cutLevelStart, breakPosition - 1000), -1):
- char = jsonContent[j]
-
- if escapeNext:
- escapeNext = False
- continue
-
- if char == '\\':
- escapeNext = True
- continue
-
- if char == '"':
- inString = not inString
- continue
-
- if not inString:
- if char == '}':
- braceCount += 1
- elif char == '{':
- braceCount -= 1
- if braceCount == 0 and bracketCount == 0:
- lastCompleteEnd = j
- break
- elif char == ']':
- bracketCount += 1
- elif char == '[':
- bracketCount -= 1
- if bracketCount == 0 and braceCount == 0:
- lastCompleteEnd = j + 1
- break
- elif char == ',' and braceCount == 0 and bracketCount == 0:
- lastCompleteEnd = j + 1
- break
-
- # Extract last complete element and cut part
- if lastCompleteEnd < breakPosition:
- lastCompleteElement = jsonContent[max(cutLevelStart, lastCompleteEnd - 300):lastCompleteEnd].strip()
- cutPart = jsonContent[lastCompleteEnd:breakPosition + len(cutPiece)].strip()
-
- if lastCompleteElement:
- overlapParts.append(f"Last complete element at same level:\n{lastCompleteElement}")
- if cutPart:
- overlapParts.append(f"Cut part (incomplete):\n{cutPart}")
- else:
- contextStart = max(cutLevelStart, breakPosition - 300)
- overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip())
- else:
- contextStart = max(cutLevelStart, breakPosition - 300)
- overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip())
- else:
- # For objects or other types, show context around break point
- contextStart = max(cutLevelStart, breakPosition - 300)
- overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip())
-
- return "\n\n".join(overlapParts) if overlapParts else jsonContent[max(0, breakPosition - 200):breakPosition + 100].strip()
# Call AI with looping support
# NOTE: Do NOT pass contentParts here - we only need metadata for structure generation
@@ -457,14 +301,6 @@ CRITICAL:
raise
@staticmethod
- def _extractOverlapContext(jsonContent: str, breakPosition: int) -> str:
- """
- Extract overlap context: cut part and full part before (same level).
- Delegates to shared function in jsonUtils for consistency.
- """
- from modules.shared.jsonUtils import extractOverlapContext
- return extractOverlapContext(jsonContent, breakPosition)
-
def _buildChapterStructurePrompt(
self,
userPrompt: str,
diff --git a/modules/services/serviceAi/test_json_merger.py b/modules/services/serviceAi/test_json_merger.py
deleted file mode 100644
index 13fa780c..00000000
--- a/modules/services/serviceAi/test_json_merger.py
+++ /dev/null
@@ -1,594 +0,0 @@
-# Copyright (c) 2025 Patrick Motsch
-# All rights reserved.
-"""
-Test cases for JSON merger with different use cases and random cuts.
-
-Tests the robustness of the JSON merger by:
-1. Creating test JSON for different use cases
-2. Cutting it randomly at various points
-3. Running the merger for each piece
-4. Checking completeness against original
-"""
-
-import json
-import random
-import logging
-import sys
-import os
-from typing import Dict, Any, List, Tuple
-
-# Add project root to Python path
-# Find project root by looking for gateway/modules structure
-currentFile = os.path.abspath(__file__)
-currentDir = os.path.dirname(currentFile)
-
-# Navigate up from: gateway/modules/services/serviceAi/test_json_merger.py
-# To project root: D:\Athi\Local\Web\poweron
-# Try different levels up
-candidates = [
- os.path.abspath(os.path.join(currentDir, '../../../../')), # From gateway/modules/services/serviceAi
- os.path.abspath(os.path.join(currentDir, '../../..')), # Alternative
- os.path.abspath(os.path.join(currentDir, '../..')), # Another alternative
-]
-
-projectRoot = None
-for candidate in candidates:
- gatewayModulesPath = os.path.join(candidate, 'gateway', 'modules')
- if os.path.exists(gatewayModulesPath):
- projectRoot = candidate
- break
-
-# If still not found, try to find by looking for gateway directory
-if projectRoot is None:
- searchDir = currentDir
- for _ in range(10): # Max 10 levels up
- gatewayPath = os.path.join(searchDir, 'gateway')
- if os.path.exists(gatewayPath) and os.path.exists(os.path.join(gatewayPath, 'modules')):
- projectRoot = searchDir
- break
- parent = os.path.dirname(searchDir)
- if parent == searchDir: # Reached root
- break
- searchDir = parent
-
-if projectRoot is None:
- raise RuntimeError(f"Could not find project root. Current file: {currentFile}")
-
-# Add gateway directory to Python path (not project root)
-gatewayPath = os.path.join(projectRoot, 'gateway')
-if gatewayPath not in sys.path:
- sys.path.insert(0, gatewayPath)
-
-# Verify the path is correct
-modulesPath = os.path.join(projectRoot, 'gateway', 'modules')
-if not os.path.exists(modulesPath):
- raise RuntimeError(f"Project root verification failed. Expected gateway/modules at: {modulesPath}")
-
-try:
- from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
- from modules.services.serviceAi.subJsonMerger import JsonMergeLogger
- from modules.shared.jsonUtils import (
- normalizeJsonText, stripCodeFences, closeJsonStructures, tryParseJson,
- extractJsonStructureContext
- )
-except ImportError as e:
- # Try to help debug
- print(f"Import error: {e}")
- print(f"Project root: {projectRoot}")
- print(f"Gateway path: {gatewayPath}")
- print(f"Python path (first 3): {sys.path[:3]}")
- print(f"Looking for modules at: {modulesPath}")
- print(f"Exists: {os.path.exists(modulesPath)}")
- if os.path.exists(modulesPath):
- print(f"Contents: {os.listdir(modulesPath)[:5]}")
- raise
-
-logger = logging.getLogger(__name__)
-
-
-def createTestJsonForUseCase(useCaseId: str, size: int = 100) -> Dict[str, Any]:
- """
- Create test JSON for a specific use case.
-
- Args:
- useCaseId: Use case ID (section_content, chapter_structure, etc.)
- size: Size of test data (number of elements/rows/items)
-
- Returns:
- Test JSON dictionary
- """
- if useCaseId == "section_content":
- # Create table with rows
- elements = [{
- "type": "table",
- "content": {
- "headers": ["Year", "Value"],
- "rows": [[str(1947 + i), str(10000 + i * 100)] for i in range(size)]
- }
- }]
- return {"elements": elements}
-
- elif useCaseId == "chapter_structure":
- chapters = [{
- "id": f"chapter_{i}",
- "title": f"Chapter {i}",
- "level": 1
- } for i in range(size)]
- return {"documents": [{"chapters": chapters}]}
-
- elif useCaseId == "code_structure":
- files = [{
- "id": f"file_{i}",
- "filename": f"file_{i}.py",
- "fileType": "python",
- "functions": [f"function_{i}_{j}" for j in range(5)]
- } for i in range(size)]
- return {"files": files}
-
- elif useCaseId == "code_content":
- files = [{
- "id": f"file_{i}",
- "content": f"# File {i}\ndef function_{i}():\n pass\n" * 10,
- "functions": [{"name": f"function_{i}_{j}", "line": j * 3} for j in range(5)]
- } for i in range(size)]
- return {"files": files}
-
- else:
- raise ValueError(f"Unknown use case: {useCaseId}")
-
-
-def cutJsonRandomly(jsonString: str, numCuts: int = 5, overlapSize: int = 100) -> List[str]:
- """
- Cut JSON string RANDOMLY at different points WITH OVERLAP between fragments.
- Each fragment overlaps with the previous one to help merging.
-
- Args:
- jsonString: JSON string to cut
- numCuts: Number of cuts to make
- overlapSize: Size of overlap between fragments (in characters)
-
- Returns:
- List of JSON fragments with overlap
- """
- fragments = []
- currentPos = 0
- totalLength = len(jsonString)
-
- if totalLength == 0:
- return []
-
- # First fragment: from start to first cut point
- if numCuts > 0:
- # First cut point (between 20% and 40% of total)
- firstCutPoint = random.randint(int(totalLength * 0.2), int(totalLength * 0.4))
- fragment = jsonString[:firstCutPoint]
- fragments.append(fragment)
- currentPos = firstCutPoint
- else:
- # No cuts - return whole string
- return [jsonString]
-
- # Subsequent fragments: each starts with overlap from previous, then continues
- for i in range(numCuts - 1):
- if currentPos >= totalLength:
- break
-
- # Calculate overlap start (go back overlapSize from current position)
- overlapStart = max(0, currentPos - overlapSize)
-
- # Calculate next cut point (between 20% and 40% of remaining)
- remaining = totalLength - currentPos
- if remaining < overlapSize * 2:
- # Not enough remaining - add rest as last fragment
- fragment = jsonString[overlapStart:]
- fragments.append(fragment)
- break
-
- # Next cut point from current position
- nextCutPoint = currentPos + random.randint(int(remaining * 0.2), int(remaining * 0.4))
- nextCutPoint = min(nextCutPoint, totalLength)
-
- # Fragment: from overlap start to next cut point
- fragment = jsonString[overlapStart:nextCutPoint]
- fragments.append(fragment)
-
- currentPos = nextCutPoint
-
- # Add remaining as last fragment (with overlap)
- if currentPos < totalLength:
- overlapStart = max(0, currentPos - overlapSize)
- fragment = jsonString[overlapStart:]
- fragments.append(fragment)
-
- return fragments
-
-
-def testMergerWithFragments(
- originalJson: Dict[str, Any],
- fragments: List[str],
- useCaseId: str
-) -> Tuple[bool, Dict[str, Any], str]:
- """
- Test merger by merging fragments sequentially.
-
- Args:
- originalJson: Original complete JSON
- fragments: List of JSON fragments to merge
- useCaseId: Use case ID
-
- Returns:
- Tuple of (success, merged_json, error_message)
- """
- if not fragments:
- return False, {}, "No fragments provided"
-
- # Log structure context for each fragment (especially incomplete ones)
- print(f"\n{'='*60}")
- print(f"FRAGMENT ANALYSIS (use case: {useCaseId})")
- print(f"{'='*60}")
-
- for fragIdx, fragment in enumerate(fragments):
- print(f"\nFragment {fragIdx + 1}/{len(fragments)}:")
- print(f" Length: {len(fragment)} chars")
-
- # Extract structure context for this fragment
- try:
- structureContext = extractJsonStructureContext(fragment, useCaseId)
-
- templateStructure = structureContext.get("template_structure", "")
- lastCompletePart = structureContext.get("last_complete_part", "")
- incompletePart = structureContext.get("incomplete_part", "")
- structureContextJson = structureContext.get("structure_context", "")
-
- # Check if fragment is incomplete
- normalized = stripCodeFences(normalizeJsonText(fragment)).strip()
- parsed, parseErr, _ = tryParseJson(normalized)
- isIncomplete = parseErr is not None or (parsed is None)
-
- if isIncomplete:
- print(f" Status: INCOMPLETE (cut off)")
- print(f" Template Structure:")
- if templateStructure:
- # Show first few lines of template
- templateLines = templateStructure.split('\n')
- templateLinesToShow = templateLines[:5]
- for line in templateLinesToShow:
- print(f" {line}")
- if len(templateLines) > 5:
- remainingLines = len(templateLines) - 5
- print(f" ... ({remainingLines} more lines)")
- else:
- print(f" (not available)")
-
- print(f" Structure Context:")
- if structureContextJson:
- # Show structure context
- contextLines = structureContextJson.split('\n')
- contextLinesToShow = contextLines[:5]
- for line in contextLinesToShow:
- print(f" {line}")
- if len(contextLines) > 5:
- remainingContextLines = len(contextLines) - 5
- print(f" ... ({remainingContextLines} more lines)")
- else:
- print(f" (not available)")
-
- print(f" Last Complete Part:")
- if lastCompletePart:
- # Show last complete part (truncated if too long)
- if len(lastCompletePart) > 200:
- print(f" {lastCompletePart[:200]}... ({len(lastCompletePart)} chars total)")
- else:
- print(f" {lastCompletePart}")
- else:
- print(f" (not available)")
-
- print(f" Incomplete Part:")
- if incompletePart:
- # Show incomplete part (truncated if too long)
- if len(incompletePart) > 200:
- print(f" {incompletePart[:200]}... ({len(incompletePart)} chars total)")
- else:
- print(f" {incompletePart}")
- else:
- print(f" (not available)")
- else:
- print(f" Status: COMPLETE")
- if structureContextJson:
- print(f" Structure Context:")
- contextLines = structureContextJson.split('\n')
- contextLinesToShow = contextLines[:3]
- for line in contextLinesToShow:
- print(f" {line}")
- if len(contextLines) > 3:
- remainingContextLines = len(contextLines) - 3
- print(f" ... ({remainingContextLines} more lines)")
- except Exception as e:
- print(f" Error extracting structure context: {e}")
-
- print(f"\n{'='*60}\n")
-
- # Start with first fragment
- accumulated = fragments[0]
-
- # Merge each subsequent fragment
- for i, fragment in enumerate(fragments[1:], 1):
- try:
- accumulated, hasOverlap = JsonResponseHandler.mergeJsonStringsWithOverlap(
- accumulated, fragment
- )
- # Log if no overlap was found (iterations would stop in real scenario)
- if not hasOverlap:
- print(f" ⚠️ Fragment {i}: No overlap found - iterations would stop here")
-
- # Check if result is empty (should never happen)
- if not accumulated or accumulated.strip() in ['{"elements": []}', '{}', '']:
- return False, {}, f"Merge {i} returned empty JSON"
-
- except Exception as e:
- return False, {}, f"Merge {i} failed with error: {str(e)}"
-
- # Parse merged result
- try:
- # Normalize and try to parse
- normalized = stripCodeFences(normalizeJsonText(accumulated)).strip()
-
- # Try to parse directly
- parsed, parseErr, _ = tryParseJson(normalized)
-
- if parseErr is not None:
- # Try closing structures if incomplete
- try:
- closed = closeJsonStructures(normalized)
- parsed, parseErr2, _ = tryParseJson(closed)
- if parseErr2 is not None:
- # Try to extract valid JSON prefix
- # JsonResponseHandler is already imported at module level
- validPrefix = JsonResponseHandler._extractValidJsonPrefix(normalized)
- if validPrefix:
- parsed, parseErr3, _ = tryParseJson(validPrefix)
- if parseErr3 is not None:
- return False, {}, f"Final parse error: {str(parseErr3)}"
- else:
- return False, {}, f"Final parse error: {str(parseErr2)}"
- except Exception as parseErr:
- return False, {}, f"Final parse error: {str(parseErr)}"
-
- if not parsed:
- return False, {}, "Final parse returned None"
-
- # CRITICAL: Ensure parsed is a dict, not a list
- # If it's a list, wrap it in the expected structure based on use case
- if isinstance(parsed, list):
- # Try to normalize list to expected structure
- if useCaseId == "section_content":
- # List of elements - wrap in elements structure
- parsed = {"elements": parsed}
- elif useCaseId == "chapter_structure":
- # List of chapters - wrap in documents structure
- parsed = {"documents": [{"chapters": parsed}]}
- elif useCaseId == "code_structure":
- # List of files - wrap in files structure
- parsed = {"files": parsed}
- elif useCaseId == "code_content":
- # List of files - wrap in files structure
- parsed = {"files": parsed}
- else:
- # Unknown use case - try to wrap as elements
- parsed = {"elements": parsed}
-
- # Ensure it's a dict now
- if not isinstance(parsed, dict):
- return False, {}, f"Final parse returned unexpected type: {type(parsed).__name__}"
-
- return True, parsed, ""
-
- except Exception as e:
- return False, {}, f"Final parse failed: {str(e)}"
-
-
-def compareJsonCompleteness(
- original: Dict[str, Any],
- merged: Dict[str, Any],
- useCaseId: str
-) -> Tuple[bool, str]:
- """
- Compare merged JSON with original to check completeness.
-
- Args:
- original: Original JSON
- merged: Merged JSON (must be a dict)
- useCaseId: Use case ID
-
- Returns:
- Tuple of (is_complete, message)
- """
- # CRITICAL: Ensure merged is a dict
- if not isinstance(merged, dict):
- return False, f"Merged JSON is not a dict, got {type(merged).__name__}"
-
- if useCaseId == "section_content":
- origElements = original.get("elements", [])
- mergedElements = merged.get("elements", [])
-
- if not isinstance(origElements, list):
- return False, f"Original elements is not a list: {type(origElements).__name__}"
- if not isinstance(mergedElements, list):
- return False, f"Merged elements is not a list: {type(mergedElements).__name__}"
-
- if len(mergedElements) < len(origElements):
- return False, f"Missing elements: {len(origElements)} expected, {len(mergedElements)} found"
-
- # Check table rows
- if origElements and mergedElements:
- origTable = origElements[0] if isinstance(origElements[0], dict) else {}
- mergedTable = mergedElements[0] if isinstance(mergedElements[0], dict) else {}
-
- if not origTable or not mergedTable:
- return False, f"Table structure missing: origTable={bool(origTable)}, mergedTable={bool(mergedTable)}"
-
- origRows = origTable.get("content", {}).get("rows", []) if isinstance(origTable.get("content"), dict) else origTable.get("rows", [])
- mergedRows = mergedTable.get("content", {}).get("rows", []) if isinstance(mergedTable.get("content"), dict) else mergedTable.get("rows", [])
-
- if not isinstance(origRows, list):
- return False, f"Original rows is not a list: {type(origRows).__name__}"
- if not isinstance(mergedRows, list):
- return False, f"Merged rows is not a list: {type(mergedRows).__name__}"
-
- if len(mergedRows) < len(origRows):
- return False, f"Missing rows: {len(origRows)} expected, {len(mergedRows)} found"
-
- return True, "Complete"
-
- elif useCaseId == "chapter_structure":
- origChapters = original.get("documents", [{}])[0].get("chapters", [])
- mergedChapters = merged.get("documents", [{}])[0].get("chapters", [])
-
- if len(mergedChapters) < len(origChapters):
- return False, f"Missing chapters: {len(origChapters)} expected, {len(mergedChapters)} found"
-
- return True, "Complete"
-
- elif useCaseId == "code_structure":
- origFiles = original.get("files", [])
- mergedFiles = merged.get("files", [])
-
- if len(mergedFiles) < len(origFiles):
- return False, f"Missing files: {len(origFiles)} expected, {len(mergedFiles)} found"
-
- return True, "Complete"
-
- elif useCaseId == "code_content":
- origFiles = original.get("files", [])
- mergedFiles = merged.get("files", [])
-
- if len(mergedFiles) < len(origFiles):
- return False, f"Missing files: {len(origFiles)} expected, {len(mergedFiles)} found"
-
- return True, "Complete"
-
- else:
- return False, f"Unknown use case: {useCaseId}"
-
-
-def runTestForUseCase(useCaseId: str, size: int = 50, numTests: int = 10) -> Dict[str, Any]:
- """
- Run multiple tests for a use case with random cuts.
-
- Args:
- useCaseId: Use case ID
- size: Size of test data
- numTests: Number of test runs
-
- Returns:
- Test results dictionary
- """
- results = {
- "useCaseId": useCaseId,
- "size": size,
- "numTests": numTests,
- "passed": 0,
- "failed": 0,
- "errors": []
- }
-
- for testNum in range(numTests):
- try:
- # Create test JSON
- originalJson = createTestJsonForUseCase(useCaseId, size)
- originalString = json.dumps(originalJson, indent=2, ensure_ascii=False)
-
- # Cut randomly
- fragments = cutJsonRandomly(originalString, numCuts=random.randint(3, 7))
-
- # Test merger
- success, mergedJson, errorMsg = testMergerWithFragments(
- originalJson, fragments, useCaseId
- )
-
- if not success:
- results["failed"] += 1
- results["errors"].append(f"Test {testNum + 1}: {errorMsg}")
- continue
-
- # Check completeness
- isComplete, completenessMsg = compareJsonCompleteness(
- originalJson, mergedJson, useCaseId
- )
-
- if isComplete:
- results["passed"] += 1
- else:
- results["failed"] += 1
- results["errors"].append(f"Test {testNum + 1}: {completenessMsg}")
-
- except Exception as e:
- results["failed"] += 1
- results["errors"].append(f"Test {testNum + 1}: Exception - {str(e)}")
-
- return results
-
-
-def runAllTests():
- """Run tests for all use cases."""
- useCases = [
- "section_content",
- "chapter_structure",
- "code_structure",
- "code_content"
- ]
-
- allResults = []
-
- for useCaseId in useCases:
- print(f"\n{'='*60}")
- print(f"Testing use case: {useCaseId}")
- print(f"{'='*60}")
-
- # Initialize log file for this use case
- # Initialize log file (overwrite on each test run)
- logFileName = f"json_merger_{useCaseId}.txt"
- JsonMergeLogger.initializeLogFile(logFileName)
- print(f"Log file: {logFileName}")
-
- results = runTestForUseCase(useCaseId, size=50, numTests=10)
- allResults.append(results)
-
- print(f"Passed: {results['passed']}/{results['numTests']}")
- print(f"Failed: {results['failed']}/{results['numTests']}")
-
- if results["errors"]:
- print("\nErrors:")
- for error in results["errors"][:5]: # Show first 5 errors
- print(f" - {error}")
-
- # Summary
- print(f"\n{'='*60}")
- print("SUMMARY")
- print(f"{'='*60}")
-
- totalPassed = sum(r["passed"] for r in allResults)
- totalFailed = sum(r["failed"] for r in allResults)
- totalTests = sum(r["numTests"] for r in allResults)
-
- print(f"Total tests: {totalTests}")
- print(f"Passed: {totalPassed}")
- print(f"Failed: {totalFailed}")
- print(f"Success rate: {totalPassed / totalTests * 100:.1f}%")
-
- return allResults
-
-
-if __name__ == "__main__":
- # Set up logging - use WARNING level to reduce noise from jsonUtils
- logging.basicConfig(level=logging.WARNING)
-
- # Run tests
- results = runAllTests()
-
- # Save results to file (in project root)
- resultsFile = os.path.join(projectRoot, "test_json_merger_results.json")
- with open(resultsFile, "w", encoding="utf-8") as f:
- json.dump(results, f, indent=2, ensure_ascii=False)
-
- print(f"\nResults saved to {resultsFile}")
diff --git a/modules/services/serviceGeneration/paths/codePath.py b/modules/services/serviceGeneration/paths/codePath.py
index b385c192..273d6229 100644
--- a/modules/services/serviceGeneration/paths/codePath.py
+++ b/modules/services/serviceGeneration/paths/codePath.py
@@ -26,14 +26,6 @@ class CodeGenerationPath:
def __init__(self, services):
self.services = services
- @staticmethod
- def _extractOverlapContext(jsonContent: str, breakPosition: int) -> str:
- """
- Extract overlap context: cut part and full part before (same level).
- Delegates to shared function in jsonUtils for consistency.
- """
- from modules.shared.jsonUtils import extractOverlapContext
- return extractOverlapContext(jsonContent, breakPosition)
async def generateCode(
self,
@@ -346,25 +338,15 @@ Return ONLY valid JSON matching the request above.
incompletePart = continuationContext.incomplete_part
lastRawJson = continuationContext.last_raw_json
- # Build overlap context: extract last ~100 characters from the response for overlap
+ # Generate both overlap context and hierarchy context using jsonContinuation
overlapContext = ""
- if lastRawJson:
- overlapContext = lastRawJson[-100:].strip()
-
- # Build unified context showing structure hierarchy with cut point
unifiedContext = ""
if lastRawJson:
- # Find break position in raw JSON
- if incompletePart:
- breakPos = lastRawJson.find(incompletePart)
- if breakPos == -1:
- breakPos = len(lastRawJson.rstrip())
- else:
- breakPos = len(lastRawJson.rstrip())
-
- # Build intelligent context showing hierarchy
- from modules.shared.jsonUtils import buildIncompleteContext
- unifiedContext = buildIncompleteContext(lastRawJson, breakPos)
+ # Get contexts directly from jsonContinuation
+ from modules.shared.jsonContinuation import getContexts
+ contexts = getContexts(lastRawJson)
+ overlapContext = contexts.overlapContext
+ unifiedContext = contexts.hierarchyContext
elif incompletePart:
unifiedContext = incompletePart
else:
@@ -808,33 +790,15 @@ Return ONLY valid JSON in this format:
incompletePart = continuationContext.incomplete_part
lastRawJson = continuationContext.last_raw_json
- # Build overlap context: extract cut part and full part before (same level) for overlap
+ # Generate both overlap context and hierarchy context using jsonContinuation
overlapContext = ""
- if lastRawJson:
- # Find break position
- breakPos = len(lastRawJson.rstrip())
- if incompletePart:
- pos = lastRawJson.find(incompletePart)
- if pos >= 0:
- breakPos = pos
-
- # Extract cut part and full part before (same level)
- overlapContext = CodeGenerationPath._extractOverlapContext(lastRawJson, breakPos)
-
- # Build unified context showing structure hierarchy with cut point
unifiedContext = ""
if lastRawJson:
- # Find break position in raw JSON
- if incompletePart:
- breakPos = lastRawJson.find(incompletePart)
- if breakPos == -1:
- breakPos = len(lastRawJson.rstrip())
- else:
- breakPos = len(lastRawJson.rstrip())
-
- # Build intelligent context showing hierarchy
- from modules.shared.jsonUtils import buildIncompleteContext
- unifiedContext = buildIncompleteContext(lastRawJson, breakPos)
+ # Get contexts directly from jsonContinuation
+ from modules.shared.jsonContinuation import getContexts
+ contexts = getContexts(lastRawJson)
+ overlapContext = contexts.overlapContext
+ unifiedContext = contexts.hierarchyContext
elif incompletePart:
unifiedContext = incompletePart
else:
diff --git a/modules/services/serviceGeneration/subPromptBuilderGeneration.py b/modules/services/serviceGeneration/subPromptBuilderGeneration.py
index 0ee6fa5e..f0222dce 100644
--- a/modules/services/serviceGeneration/subPromptBuilderGeneration.py
+++ b/modules/services/serviceGeneration/subPromptBuilderGeneration.py
@@ -64,25 +64,27 @@ async def buildGenerationPrompt(
)
if hasContinuation:
- # CONTINUATION PROMPT - use new summary format from buildContinuationContext
+ # CONTINUATION PROMPT - use centralized jsonContinuation system
delivered_summary = continuationContext.get("delivered_summary", "")
- element_before_cutoff = continuationContext.get("element_before_cutoff")
- cut_off_element = continuationContext.get("cut_off_element")
+
+ # Use centralized system: overlap_context and hierarchy_context from jsonContinuation.getContexts()
+ overlap_context = continuationContext.get("overlap_context")
+ hierarchy_context = continuationContext.get("hierarchy_context")
# Build continuation text with delivered summary and cut-off information
# CRITICAL: Always include cut-off information if available (per loop_plan.md)
continuationText = f"{delivered_summary}\n\n"
continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
- # Add cut-off point information (per loop_plan.md: always add if available)
+ # Add cut-off point information using centralized jsonContinuation contexts
# These are shown ONLY as REFERENCE to know where generation stopped
- if element_before_cutoff:
- continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
- continuationText += f"{element_before_cutoff}\n\n"
+ if hierarchy_context:
+ continuationText += "# REFERENCE: Structure context (already delivered - DO NOT repeat):\n"
+ continuationText += f"{hierarchy_context}\n\n"
- if cut_off_element:
- continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
- continuationText += f"{cut_off_element}\n\n"
+ if overlap_context:
+ continuationText += "# REFERENCE: Overlap context - incomplete element at cut point (DO NOT repeat):\n"
+ continuationText += f"{overlap_context}\n\n"
continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
diff --git a/modules/shared/jsonContinuation.md b/modules/shared/jsonContinuation.md
new file mode 100644
index 00000000..b7e93cb4
--- /dev/null
+++ b/modules/shared/jsonContinuation.md
@@ -0,0 +1,164 @@
+# JSON Continuation Context Module
+
+Ein Python-Modul zur Generierung von Kontextinformationen für abgeschnittene JSON-Strings, um AI-Modellen die Fortsetzung zu ermöglichen.
+
+## Problem
+
+Wenn eine AI-Antwort als JSON abgeschnitten wird (z.B. Token-Limit erreicht), muss die nächste Iteration wissen:
+- **Wo** der JSON abgeschnitten wurde
+- **Was** bereits generiert wurde
+- **Was** als nächstes geliefert werden soll
+
+## Lösung: Drei Kontexte
+
+### 1. Overlap Context
+- Zeigt das **innerste Objekt/Array-Element**, das den Cut-Punkt enthält
+- Wird verwendet, um den abgeschnittenen Teil mit dem neuen Teil zu **mergen**
+- Exakt so wie im Original-String (für String-Matching beim Merge)
+
+### 2. Hierarchy Context
+- Zeigt die **hierarchische Struktur** vom Root bis zum Cut-Punkt
+- Mit **Budget-Logik**: Näher am Cut = vollständige Werte, weiter weg = `"..."` Platzhalter
+- Gibt der AI den Kontext der gesamten JSON-Struktur
+
+### 3. Complete Part (NEU)
+- Der **vollständige, valide JSON** bis zum Cut-Punkt
+- Alle offenen Strukturen werden geschlossen (`}`, `]`, `"`)
+- Unvollständige Keys werden entfernt
+- Kann direkt als valides JSON geparst werden
+
+## Installation
+
+```bash
+# Keine externen Abhängigkeiten erforderlich
+cp json_continuation.py /your/project/
+```
+
+## Modulkonstanten
+
+```python
+# Diese Konstanten können vor dem Import angepasst werden
+BUDGET_LIMIT: int = 500 # Zeichen-Budget für Datenwerte
+OVERLAP_MAX_CHARS: int = 1000 # Max Zeichen für Overlap Context
+```
+
+## Verwendung
+
+### Grundlegende Verwendung
+
+```python
+from json_continuation import extract_continuation_contexts
+
+truncated_json = '''{"customers": [
+ {"id": 1, "name": "John"},
+ {"id": 2, "name": "Jane", "email": "jane@exa'''
+
+overlap, hierarchy, complete = extract_continuation_contexts(truncated_json)
+
+print("Overlap Context:")
+print(overlap)
+# {"id": 2, "name": "Jane", "email": "jane@exa
+
+print("Hierarchy Context:")
+print(hierarchy)
+# {"customers": [...structure with budget logic...]
+
+print("Complete Part (valid JSON):")
+print(complete)
+# {"customers": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane", "email": "jane@exa"}]}
+
+import json
+parsed = json.loads(complete) # ✓ Funktioniert!
+```
+
+### Mit Dictionary-Interface
+
+```python
+from json_continuation import get_contexts
+
+contexts = get_contexts(truncated_json)
+
+print(contexts['overlap'])
+print(contexts['hierarchy'])
+print(contexts['complete_part'])
+```
+
+### Konstanten anpassen
+
+```python
+import json_continuation
+
+# Budget anpassen bevor Funktionen aufgerufen werden
+json_continuation.BUDGET_LIMIT = 200
+json_continuation.OVERLAP_MAX_CHARS = 500
+
+overlap, hierarchy, complete = json_continuation.extract_continuation_contexts(truncated_json)
+```
+
+## Rückgabewerte
+
+| Rückgabe | Typ | Beschreibung |
+|----------|-----|--------------|
+| `overlap` | str | Innerstes Element mit Cut-Punkt (für Merge) |
+| `hierarchy` | str | Volle Struktur mit Budget-Logik |
+| `complete_part` | str | Valides JSON mit geschlossenen Strukturen |
+
+## Beispiele
+
+### Verschachtelte Objekte
+
+```python
+json_str = '{"user": {"profile": {"bio": "Hello Wor'
+
+overlap, hierarchy, complete = extract_continuation_contexts(json_str)
+
+# Overlap: {"bio": "Hello Wor
+# Hierarchy: {"user": {"profile": {"bio": "Hello Wor
+# Complete: {"user": {"profile": {"bio": "Hello Wor"}}} ← Valides JSON!
+```
+
+### Array von Objekten mit unvollständigem Key
+
+```python
+json_str = '''{
+ "items": [
+ {"id": 1, "name": "First"},
+ {"id": 2, "name": "Second"},
+ {"id": 3, "name": "Third", "add'''
+
+overlap, hierarchy, complete = extract_continuation_contexts(json_str)
+
+# Complete entfernt den unvollständigen Key "add":
+# {"items": [{"id": 1, ...}, {"id": 2, ...}, {"id": 3, "name": "Third"}]}
+```
+
+## Budget-Logik
+
+Die Budget-Logik funktioniert wie folgt:
+
+1. **Sammeln**: Alle String-Werte werden mit ihrer Position gesammelt
+2. **Sortieren**: Nach Entfernung zum Cut-Punkt (näher = höhere Priorität)
+3. **Zuweisen**: Budget wird von hinten nach vorne aufgebraucht
+4. **Ersetzen**: Werte außerhalb des Budgets werden durch `"..."` ersetzt
+
+## Tests ausführen
+
+```bash
+python -m unittest test_json_continuation -v
+```
+
+## API Referenz
+
+### `extract_continuation_contexts(truncated_json: str) -> Tuple[str, str, str]`
+
+Hauptfunktion. Gibt `(overlap, hierarchy, complete_part)` zurück.
+
+### `get_contexts(truncated_json: str) -> dict`
+
+Convenience-Funktion. Gibt Dictionary mit Keys `'overlap'`, `'hierarchy'`, `'complete_part'` zurück.
+
+### Modulkonstanten
+
+- `BUDGET_LIMIT`: int (default: 500) - Zeichen-Budget für Hierarchy-Context
+- `OVERLAP_MAX_CHARS`: int (default: 1000) - Max Zeichen für Overlap-Context
+
diff --git a/modules/shared/jsonContinuation.py b/modules/shared/jsonContinuation.py
new file mode 100644
index 00000000..2fabd103
--- /dev/null
+++ b/modules/shared/jsonContinuation.py
@@ -0,0 +1,1232 @@
+"""
+JSON Continuation Context Module
+
+Generiert drei Kontexte für abgeschnittene JSON-Strings:
+1. Overlap Context: Das innerste Objekt/Array-Element, das den Cut-Punkt enthält
+2. Hierarchy Context: Die hierarchische Struktur vom Root bis zum Cut mit Budget-Logik
+3. Complete Part: Der vollständige Teil des JSONs mit allen Strukturen geschlossen
+
+Hauptfunktionen:
+- extractContinuationContexts(truncatedJson: str) -> Tuple[str, str, str]
+ Extrahiert alle drei Kontexte aus einem abgeschnittenen JSON-String.
+
+- getContexts(truncatedJson: str) -> JsonContinuationContexts
+ Gibt alle Kontexte als Pydantic-Modell zurück mit benannten Feldern.
+
+Modulkonstanten:
+- BUDGET_LIMIT: int = 500
+ Zeichen-Budget für vollständige Datenwerte im Hierarchy Context
+
+- OVERLAP_MAX_CHARS: int = 1000
+ Maximale Zeichen für den Overlap Context
+
+Verwendung:
+ >>> from modules.shared.jsonContinuation import getContexts
+ >>> jsonStr = '{"users": [{"name": "John", "bio": "Hello Wor'
+ >>> contexts = getContexts(jsonStr)
+ >>> print(contexts.overlapContext)
+ >>> print(contexts.hierarchyContext)
+ >>> print(contexts.completePart)
+
+Autor: Claude
+Version: 2.0
+"""
+
+from typing import Tuple, List, Optional, Any
+from dataclasses import dataclass
+from enum import Enum
+from modules.datamodels.datamodelAi import JsonContinuationContexts
+
+
+# =============================================================================
+# MODULE CONSTANTS
+# =============================================================================
+
+BUDGET_LIMIT: int = 500
+"""Zeichen-Budget für vollständige Datenwerte im Hierarchy Context"""
+
+OVERLAP_MAX_CHARS: int = 1000
+"""Maximale Zeichen für den Overlap Context"""
+
+
+# =============================================================================
+# TOKEN TYPES AND DATA CLASSES
+# =============================================================================
+
+
+class TokenType(Enum):
+ """JSON Token Types"""
+ OBJECT_START = "{"
+ OBJECT_END = "}"
+ ARRAY_START = "["
+ ARRAY_END = "]"
+ STRING = "string"
+ NUMBER = "number"
+ BOOLEAN = "boolean"
+ NULL = "null"
+ COLON = ":"
+ COMMA = ","
+ KEY = "key"
+ EOF = "eof"
+ TRUNCATED = "truncated"
+
+
+@dataclass
+class Token:
+ """Represents a JSON token with position info"""
+ type: TokenType
+ value: Any
+ start_pos: int
+ end_pos: int
+ raw: str # Original string representation
+
+
+@dataclass
+class StackFrame:
+ """Represents a level in the JSON hierarchy"""
+ type: str # "object" or "array"
+ start_pos: int
+ key: Optional[str] = None # Current key for objects
+ index: int = 0 # Current index for arrays
+ content: str = "" # Accumulated content for this frame
+ keys_seen: List[str] = None # Keys seen in this object
+
+ def __post_init__(self):
+ if self.keys_seen is None:
+ self.keys_seen = []
+
+
+class JsonTokenizer:
+ """Tokenizer for potentially truncated JSON strings"""
+
+ def __init__(self, jsonStr: str):
+ self.jsonStr = jsonStr
+ self.pos = 0
+ self.length = len(jsonStr)
+
+ def skipWhitespace(self):
+ """Skip whitespace characters"""
+ while self.pos < self.length and self.jsonStr[self.pos] in ' \t\n\r':
+ self.pos += 1
+
+ def peek(self) -> Optional[str]:
+ """Peek at current character without consuming"""
+ if self.pos < self.length:
+ return self.jsonStr[self.pos]
+ return None
+
+ def readString(self) -> Token:
+ """Read a JSON string token"""
+ start_pos = self.pos
+ self.pos += 1 # Skip opening quote
+
+ escaped = False
+ while self.pos < self.length:
+ char = self.jsonStr[self.pos]
+ if escaped:
+ escaped = False
+ self.pos += 1
+ elif char == '\\':
+ escaped = True
+ self.pos += 1
+ elif char == '"':
+ self.pos += 1
+ raw = self.jsonStr[start_pos:self.pos]
+ try:
+ # Try to decode the string value
+ value = raw[1:-1] # Remove quotes for value
+ except:
+ value = raw
+ return Token(TokenType.STRING, value, start_pos, self.pos, raw)
+ else:
+ self.pos += 1
+
+ # String was truncated
+ raw = self.jsonStr[start_pos:self.pos]
+ return Token(TokenType.TRUNCATED, raw[1:] if len(raw) > 1 else "", start_pos, self.pos, raw)
+
+ def readNumber(self) -> Token:
+ """Read a JSON number token"""
+ start_pos = self.pos
+
+ # Handle negative
+ if self.pos < self.length and self.jsonStr[self.pos] == '-':
+ self.pos += 1
+
+ # Read digits
+ while self.pos < self.length and self.jsonStr[self.pos].isdigit():
+ self.pos += 1
+
+ # Decimal part
+ if self.pos < self.length and self.jsonStr[self.pos] == '.':
+ self.pos += 1
+ while self.pos < self.length and self.jsonStr[self.pos].isdigit():
+ self.pos += 1
+
+ # Exponent
+ if self.pos < self.length and self.jsonStr[self.pos] in 'eE':
+ self.pos += 1
+ if self.pos < self.length and self.jsonStr[self.pos] in '+-':
+ self.pos += 1
+ while self.pos < self.length and self.jsonStr[self.pos].isdigit():
+ self.pos += 1
+
+ raw = self.jsonStr[start_pos:self.pos]
+ try:
+ value = float(raw) if '.' in raw or 'e' in raw.lower() else int(raw)
+ except ValueError:
+ value = raw
+
+ return Token(TokenType.NUMBER, value, start_pos, self.pos, raw)
+
+ def readKeyword(self) -> Token:
+ """Read true, false, or null"""
+ start_pos = self.pos
+
+ for keyword, token_type in [('true', TokenType.BOOLEAN),
+ ('false', TokenType.BOOLEAN),
+ ('null', TokenType.NULL)]:
+ if self.jsonStr[self.pos:].startswith(keyword):
+ self.pos += len(keyword)
+ value = True if keyword == 'true' else (False if keyword == 'false' else None)
+ return Token(token_type, value, start_pos, self.pos, keyword)
+
+ # Partial keyword (truncated)
+ while self.pos < self.length and self.jsonStr[self.pos].isalpha():
+ self.pos += 1
+ raw = self.jsonStr[start_pos:self.pos]
+ return Token(TokenType.TRUNCATED, raw, start_pos, self.pos, raw)
+
+ def nextToken(self) -> Token:
+ """Get the next token"""
+ self.skipWhitespace()
+
+ if self.pos >= self.length:
+ return Token(TokenType.EOF, None, self.pos, self.pos, "")
+
+ char = self.jsonStr[self.pos]
+ startPos = self.pos
+
+ if char == '{':
+ self.pos += 1
+ return Token(TokenType.OBJECT_START, '{', startPos, self.pos, '{')
+ elif char == '}':
+ self.pos += 1
+ return Token(TokenType.OBJECT_END, '}', startPos, self.pos, '}')
+ elif char == '[':
+ self.pos += 1
+ return Token(TokenType.ARRAY_START, '[', startPos, self.pos, '[')
+ elif char == ']':
+ self.pos += 1
+ return Token(TokenType.ARRAY_END, ']', startPos, self.pos, ']')
+ elif char == ':':
+ self.pos += 1
+ return Token(TokenType.COLON, ':', startPos, self.pos, ':')
+ elif char == ',':
+ self.pos += 1
+ return Token(TokenType.COMMA, ',', startPos, self.pos, ',')
+ elif char == '"':
+ return self.readString()
+ elif char == '-' or char.isdigit():
+ return self.readNumber()
+ elif char.isalpha():
+ return self.readKeyword()
+ else:
+ # Unknown character, treat as truncated
+ self.pos += 1
+ return Token(TokenType.TRUNCATED, char, startPos, self.pos, char)
+
+
+@dataclass
+class HierarchyLevel:
+ """Represents one level in the parsed hierarchy"""
+ type: str # "object" or "array"
+ start_pos: int
+ end_pos: int # -1 if not closed
+ key: Optional[str] # Key if this is a value in an object
+ index: Optional[int] # Index if this is in an array
+ content: dict # Parsed content at this level
+ raw_start: str # Raw string from start to children
+ children_content: List[Any] # For arrays: list of parsed elements
+
+
+def getJsonContinuationContext(
+ truncatedJson: str,
+ budgetLimit: Optional[int] = None,
+ overlapMaxChars: Optional[int] = None
+) -> Tuple[str, str, str]:
+ """
+ Generate continuation contexts for a truncated JSON string.
+
+ Generiert drei Kontexte für abgeschnittene JSON-Strings:
+ 1. Overlap Context: Das innerste Objekt/Array-Element, das den Cut-Punkt enthält
+ 2. Hierarchy Context: Die hierarchische Struktur vom Root bis zum Cut mit Budget-Logik
+ 3. Complete Part: Der vollständige Teil des JSONs mit allen Strukturen geschlossen
+
+ Args:
+ truncatedJson: The truncated JSON string
+ budgetLimit: Character budget for data values in hierarchy context (uses BUDGET_LIMIT if None)
+ overlapMaxChars: Maximum characters for overlap context (uses OVERLAP_MAX_CHARS if None)
+
+ Returns:
+ Tuple of (overlapContext, hierarchyContext, completePart):
+ - overlapContext: The innermost object/element containing the cut (for merging)
+ - hierarchyContext: Full structure from root to cut with budget-limited values
+ - completePart: Valid JSON with all structures properly closed
+ """
+ if budgetLimit is None:
+ budgetLimit = BUDGET_LIMIT
+ if overlapMaxChars is None:
+ overlapMaxChars = OVERLAP_MAX_CHARS
+
+ analyzer = JsonAnalyzer(truncatedJson, budgetLimit, overlapMaxChars)
+ return analyzer.analyze()
+
+
+class JsonAnalyzer:
+ """
+ Analyzes truncated JSON and generates continuation contexts.
+
+ Generates three contexts for truncated JSON strings:
+ 1. Overlap Context: The innermost object/array element containing the cut point
+ 2. Hierarchy Context: The hierarchical structure from root to cut with budget logic
+ 3. Complete Part: The complete part of the JSON with all structures properly closed
+ """
+
+ def __init__(self, jsonStr: str, budgetLimit: Optional[int] = None, overlapMaxChars: Optional[int] = None):
+ self.jsonStr = jsonStr
+ self.budgetLimit = budgetLimit if budgetLimit is not None else BUDGET_LIMIT
+ self.overlapMaxChars = overlapMaxChars if overlapMaxChars is not None else OVERLAP_MAX_CHARS
+ self.stack: List[StackFrame] = []
+ self.hierarchy: List[dict] = [] # Parsed hierarchy info
+
+ def analyze(self) -> Tuple[str, str, str]:
+ """
+ Analyze the truncated JSON and return all three contexts.
+
+ Returns:
+ Tuple of (overlapContext, hierarchyContext, completePart)
+ """
+ # Parse and track the structure
+ self._parseStructure()
+
+ # Generate overlap context
+ overlapContext = self._generateOverlapContext()
+
+ # Generate hierarchy context (use improved version)
+ hierarchyContext = self._renderWithBudgetV2()
+
+ # Generate complete part (JSON with all structures closed)
+ completePart = self._generateCompletePart()
+
+ return overlapContext, hierarchyContext, completePart
+
+ def _generateCompletePart(self) -> str:
+ """
+ Generate the complete part of the JSON with all structures properly closed.
+
+ This creates valid JSON by closing all open strings, brackets/braces.
+ Unvollständige Keys werden entfernt, damit das Ergebnis valides JSON ist.
+
+ Strategy:
+ 1. Take the full truncated JSON
+ 2. If we're in the middle of a string, close it
+ 3. Remove incomplete key-value pairs (keys without values)
+ 4. Close all open brackets/braces
+ """
+ result = self.jsonStr.rstrip()
+
+ # Remove trailing comma if present (after stripping)
+ if result.endswith(','):
+ result = result[:-1]
+
+ # Check if we need to close an open string
+ stringClosing = self._getStringClosing(result)
+ result += stringClosing
+
+ # Check if we're in the middle of a key (after colon)
+ # If string was just closed and we're after a colon with no value, remove the key
+ result = self._cleanIncompleteKeyValue(result)
+
+ # Close all open structures
+ closingBrackets = self._getClosingBrackets(result)
+
+ return result + closingBrackets
+
+ def _getStringClosing(self, jsonStr: str) -> str:
+ """Check if there's an unclosed string and return closing quote if needed."""
+ in_string = False
+ escaped = False
+
+ for char in jsonStr:
+ if escaped:
+ escaped = False
+ continue
+
+ if char == '\\' and in_string:
+ escaped = True
+ continue
+
+ if char == '"':
+ in_string = not in_string
+
+ return '"' if in_string else ""
+
+ def _cleanIncompleteKeyValue(self, jsonStr: str) -> str:
+ """
+ Clean up incomplete key-value pairs.
+ Handles cases like:
+ - {"key": "incompl -> keep (valid truncated value)
+ - {"key": -> remove key
+ - {"a": 1, "key -> remove incomplete key (was in middle of key name)
+ """
+ stripped = jsonStr.rstrip()
+
+ # Pattern: ends with colon (possibly with whitespace) - incomplete value
+ if stripped.endswith(':'):
+ # Find the start of this key and remove the whole key-value
+ return self._removeLastKey(stripped)
+
+ # Check if we just closed a string that was an incomplete key
+ # Pattern: ..., "something" or { "something" where something has no colon after
+ # This happens when we close a truncated key name like "add" -> "add"
+ if stripped.endswith('"'):
+ # Look for the pattern: comma/bracket + whitespace + "string"
+ # and check if this was supposed to be a key
+ if self._isIncompleteKey(stripped):
+ return self._removeLastKey(stripped)
+
+ return jsonStr
+
+ def _isIncompleteKey(self, jsonStr: str) -> bool:
+ """
+ Check if the last string in the JSON is an incomplete key in an object.
+ This happens when truncation occurred in the middle of a key name.
+ Only applies to objects, not arrays.
+ """
+ # Find the last complete string
+ pos = len(jsonStr) - 1
+ if jsonStr[pos] != '"':
+ return False
+
+ # Find the opening quote of this string
+ stringStart = pos - 1
+ while stringStart >= 0:
+ if jsonStr[stringStart] == '"':
+ # Check it's not escaped
+ numBackslashes = 0
+ checkPos = stringStart - 1
+ while checkPos >= 0 and jsonStr[checkPos] == '\\':
+ numBackslashes += 1
+ checkPos -= 1
+ if numBackslashes % 2 == 0:
+ break
+ stringStart -= 1
+
+ if stringStart < 0:
+ return False
+
+ # Now stringStart points to opening quote
+ # Check what's before it (skip whitespace)
+ beforePos = stringStart - 1
+ while beforePos >= 0 and jsonStr[beforePos] in ' \t\n\r':
+ beforePos -= 1
+
+ if beforePos < 0:
+ return False
+
+ # For this to be an incomplete key, it must be preceded by { or ,
+ # AND we must be inside an object (not an array)
+ if jsonStr[beforePos] not in ',{':
+ return False
+
+ # Now check if we're in an object context (not array)
+ # Count open braces/brackets to determine context
+ braceCount = 0
+ bracketCount = 0
+ inString = False
+
+ for i in range(beforePos + 1):
+ char = jsonStr[i]
+ if char == '"' and (i == 0 or jsonStr[i-1] != '\\'):
+ inString = not inString
+ elif not inString:
+ if char == '{':
+ braceCount += 1
+ elif char == '}':
+ braceCount -= 1
+ elif char == '[':
+ bracketCount += 1
+ elif char == ']':
+ bracketCount -= 1
+
+ # If we have more open braces than brackets at this point,
+ # we're in an object context
+ # Actually, we need to check the innermost container
+ # Let's track the stack properly
+ stack = []
+ inString = False
+
+ for i in range(beforePos + 1):
+ char = jsonStr[i]
+ if char == '"' and (i == 0 or jsonStr[i-1] != '\\'):
+ inString = not inString
+ elif not inString:
+ if char == '{':
+ stack.append('object')
+ elif char == '[':
+ stack.append('array')
+ elif char == '}':
+ if stack and stack[-1] == 'object':
+ stack.pop()
+ elif char == ']':
+ if stack and stack[-1] == 'array':
+ stack.pop()
+
+ # If innermost container is an object, this is an incomplete key
+ return len(stack) > 0 and stack[-1] == 'object'
+
+ def _removeLastKey(self, jsonStr: str) -> str:
+ """Remove the last incomplete key-value pair from the JSON string."""
+ stripped = jsonStr.rstrip()
+
+ # Find the last comma or opening bracket before the incomplete key
+ pos = len(stripped) - 1
+
+ # Skip past the current string/key
+ in_string = False
+ while pos >= 0:
+ char = stripped[pos]
+ if char == '"' and (pos == 0 or stripped[pos-1] != '\\'):
+ in_string = not in_string
+ if not in_string and char in ',{':
+ break
+ pos -= 1
+
+ if pos < 0:
+ return stripped
+
+ if stripped[pos] == ',':
+ # Remove from comma onwards
+ return stripped[:pos]
+ elif stripped[pos] == '{':
+ # Keep the opening brace
+ return stripped[:pos+1]
+
+ return stripped
+
+ def _findLastCompletePosition(self) -> int:
+ """Find the position of the last complete value in the JSON."""
+ tokenizer = JsonTokenizer(self.jsonStr)
+ last_complete_pos = 0
+ stack_depth = 0
+ last_value_end = 0
+ in_value = False
+
+ while True:
+ token = tokenizer.nextToken()
+
+ if token.type == TokenType.EOF:
+ break
+
+ if token.type == TokenType.TRUNCATED:
+ # Return position before the truncated part
+ break
+
+ if token.type in (TokenType.OBJECT_START, TokenType.ARRAY_START):
+ stack_depth += 1
+ in_value = True
+
+ elif token.type in (TokenType.OBJECT_END, TokenType.ARRAY_END):
+ stack_depth -= 1
+ last_value_end = token.end_pos
+ in_value = False
+
+ elif token.type == TokenType.STRING:
+ # Check if this is a key or a value
+ saved_pos = tokenizer.pos
+ tokenizer.skipWhitespace()
+ next_char = tokenizer.peek()
+ tokenizer.pos = saved_pos
+
+ if next_char != ':':
+ # It's a value
+ last_value_end = token.end_pos
+ in_value = False
+
+ elif token.type in (TokenType.NUMBER, TokenType.BOOLEAN, TokenType.NULL):
+ last_value_end = token.end_pos
+ in_value = False
+
+ elif token.type == TokenType.COMMA:
+ # After a comma, we've completed a value
+ last_complete_pos = last_value_end
+
+ # Return the last complete position
+ return last_value_end if last_value_end > 0 else len(self.jsonStr)
+
+ def _getClosingBrackets(self, jsonStr: str) -> str:
+ """Determine what closing brackets are needed."""
+ stack = []
+ in_string = False
+ escaped = False
+
+ for char in jsonStr:
+ if escaped:
+ escaped = False
+ continue
+
+ if char == '\\' and in_string:
+ escaped = True
+ continue
+
+ if char == '"':
+ in_string = not in_string
+ continue
+
+ if in_string:
+ continue
+
+ if char == '{':
+ stack.append('}')
+ elif char == '[':
+ stack.append(']')
+ elif char == '}':
+ if stack and stack[-1] == '}':
+ stack.pop()
+ elif char == ']':
+ if stack and stack[-1] == ']':
+ stack.pop()
+
+ # Return closing brackets in reverse order
+ return ''.join(reversed(stack))
+
+ def _parseStructure(self):
+ """Parse the JSON structure and track hierarchy"""
+ tokenizer = JsonTokenizer(self.jsonStr)
+
+ while True:
+ token = tokenizer.nextToken()
+
+ if token.type == TokenType.EOF or token.type == TokenType.TRUNCATED:
+ break
+
+ if token.type == TokenType.OBJECT_START:
+ frame = StackFrame(
+ type="object",
+ start_pos=token.start_pos,
+ keys_seen=[]
+ )
+ self.stack.append(frame)
+
+ elif token.type == TokenType.ARRAY_START:
+ frame = StackFrame(
+ type="array",
+ start_pos=token.start_pos,
+ index=0
+ )
+ self.stack.append(frame)
+
+ elif token.type == TokenType.OBJECT_END:
+ if self.stack and self.stack[-1].type == "object":
+ self.stack.pop()
+
+ elif token.type == TokenType.ARRAY_END:
+ if self.stack and self.stack[-1].type == "array":
+ self.stack.pop()
+
+ elif token.type == TokenType.STRING:
+ # Could be a key or a value
+ self._handleStringToken(token, tokenizer)
+
+ elif token.type == TokenType.COMMA:
+ # Increment array index
+ if self.stack and self.stack[-1].type == "array":
+ self.stack[-1].index += 1
+
+ def _handleStringToken(self, token: Token, tokenizer: JsonTokenizer):
+ """Handle a string token (could be key or value)"""
+ if self.stack and self.stack[-1].type == "object":
+ # Check if this is a key (followed by colon)
+ saved_pos = tokenizer.pos
+ tokenizer.skipWhitespace()
+ next_char = tokenizer.peek()
+
+ if next_char == ':':
+ # This is a key
+ self.stack[-1].key = token.value
+ self.stack[-1].keys_seen.append(token.value)
+
+ tokenizer.pos = saved_pos
+
+ def _generateOverlapContext(self) -> str:
+ """
+ Generate the overlap context - the innermost object/array element containing the cut.
+
+ Returns the raw string from the start of that element to the end of the truncated JSON.
+ Dieser Kontext wird verwendet, um den abgeschnittenen Teil mit dem neuen Teil zu mergen.
+ Exakt so wie im Original-String (für String-Matching beim Merge).
+ """
+ if not self.stack:
+ # No structure, return last overlap_max_chars characters
+ return self.jsonStr[-self.overlapMaxChars:]
+
+ # Find the innermost container that should be the overlap
+ # For arrays: the current array element
+ # For objects: the current object
+
+ innermost_start = self._findInnermostElementStart()
+
+ overlap = self.jsonStr[innermost_start:]
+
+ # Apply max chars limit
+ if len(overlap) > self.overlapMaxChars:
+ overlap = self.jsonStr[-self.overlapMaxChars:]
+
+ return overlap
+
+ def _findInnermostElementStart(self) -> int:
+ """Find the start position of the innermost element for overlap"""
+ if not self.stack:
+ return max(0, len(self.jsonStr) - self.overlapMaxChars)
+
+ # Walk through stack to find the innermost array element or object
+ # We want the innermost "atomic" unit that contains the cut
+
+ # Strategy:
+ # - If innermost is an object: return its start
+ # - If innermost is an array:
+ # - If current element is an object/array: return start of that element
+ # - If current element is a primitive: return start of array or last N chars
+
+ innermost = self.stack[-1]
+
+ if innermost.type == "object":
+ return innermost.start_pos
+ else:
+ # It's an array - find the start of the current element
+ element_start = self._findArrayElementStart(innermost)
+
+ # Check if the element is a primitive or complex type
+ element_content = self.jsonStr[element_start:].strip()
+
+ # If it starts with { or [ it's complex, return the element start
+ if element_content and element_content[0] in '{[':
+ return element_start
+ else:
+ # Primitive in array - check if there's a parent object
+ # or return overlap_max_chars from end
+ for i in range(len(self.stack) - 2, -1, -1):
+ if self.stack[i].type == "object":
+ return self.stack[i].start_pos
+
+ # No parent object, return max chars from end
+ return max(0, len(self.jsonStr) - self.overlapMaxChars)
+
+ def _findArrayElementStart(self, arrayFrame: StackFrame) -> int:
+ """Find the start position of the current array element"""
+ # We need to find the start of the current element in the array
+ # Parse from array start to find element boundaries
+
+ arrayContent = self.jsonStr[arrayFrame.start_pos:]
+
+ # Skip the opening bracket and whitespace
+ pos = 1
+ while pos < len(arrayContent) and arrayContent[pos] in ' \t\n\r':
+ pos += 1
+
+ elementStarts = [arrayFrame.start_pos + pos]
+ depth = 0
+ inString = False
+ escaped = False
+
+ i = pos
+ while i < len(arrayContent):
+ char = arrayContent[i]
+
+ if escaped:
+ escaped = False
+ i += 1
+ continue
+
+ if char == '\\' and inString:
+ escaped = True
+ i += 1
+ continue
+
+ if char == '"':
+ inString = not inString
+ i += 1
+ continue
+
+ if inString:
+ i += 1
+ continue
+
+ if char in '{[':
+ depth += 1
+ elif char in '}]':
+ depth -= 1
+ elif char == ',' and depth == 0:
+ # Found element boundary
+ i += 1
+ # Skip whitespace
+ while i < len(arrayContent) and arrayContent[i] in ' \t\n\r':
+ i += 1
+ elementStarts.append(arrayFrame.start_pos + i)
+
+ i += 1
+
+ # Return the start of the current element
+ if arrayFrame.index < len(elementStarts):
+ return elementStarts[arrayFrame.index]
+ elif elementStarts:
+ return elementStarts[-1]
+ else:
+ return arrayFrame.start_pos
+
+ def _generateHierarchyContext(self) -> str:
+ """
+ Generate the hierarchy context with budget logic.
+ Shows structure from root to cut point with data values limited by budget.
+ """
+ if not self.stack:
+ # No structure
+ return self.jsonStr[-self.overlapMaxChars:]
+
+ # We need to rebuild the JSON with budget logic
+ # Priority: elements closer to cut get full values, distant ones get "..."
+
+ return self._rebuildWithBudget()
+
+ def _rebuildWithBudget(self) -> str:
+ """Rebuild JSON from root to cut with budget constraints"""
+
+ # Strategy:
+ # 1. Parse the JSON structure tracking all values
+ # 2. Calculate total value size
+ # 3. Apply budget from cut backwards
+ # 4. Render with "..." for values outside budget
+
+ # First, get a structured representation
+ structure = self._parseForHierarchy()
+
+ # Now render with budget
+ return self._renderWithBudget(structure)
+
+ def _parseForHierarchy(self) -> dict:
+ """Parse JSON into a structure suitable for hierarchy rendering"""
+
+ result = {
+ 'type': 'root',
+ 'children': [],
+ 'raw_positions': []
+ }
+
+ tokenizer = JsonTokenizer(self.jsonStr)
+ stack = [result]
+ current_key = None
+
+ while True:
+ token = tokenizer.nextToken()
+
+ if token.type == TokenType.EOF:
+ break
+
+ if token.type == TokenType.TRUNCATED:
+ # Mark the truncation point
+ if stack:
+ current = stack[-1]
+ if current.get('type') == 'object':
+ if current_key:
+ current['children'].append({
+ 'type': 'truncated_value',
+ 'key': current_key,
+ 'raw': self.jsonStr[token.start_pos:],
+ 'start_pos': token.start_pos
+ })
+ elif current.get('type') == 'array':
+ current['children'].append({
+ 'type': 'truncated_value',
+ 'raw': self.jsonStr[token.start_pos:],
+ 'start_pos': token.start_pos
+ })
+ break
+
+ if token.type == TokenType.OBJECT_START:
+ obj = {
+ 'type': 'object',
+ 'key': current_key,
+ 'children': [],
+ 'start_pos': token.start_pos
+ }
+ if stack:
+ stack[-1]['children'].append(obj)
+ stack.append(obj)
+ current_key = None
+
+ elif token.type == TokenType.ARRAY_START:
+ arr = {
+ 'type': 'array',
+ 'key': current_key,
+ 'children': [],
+ 'start_pos': token.start_pos
+ }
+ if stack:
+ stack[-1]['children'].append(arr)
+ stack.append(arr)
+ current_key = None
+
+ elif token.type == TokenType.OBJECT_END:
+ if len(stack) > 1 and stack[-1].get('type') == 'object':
+ stack[-1]['end_pos'] = token.end_pos
+ stack[-1]['complete'] = True
+ stack.pop()
+
+ elif token.type == TokenType.ARRAY_END:
+ if len(stack) > 1 and stack[-1].get('type') == 'array':
+ stack[-1]['end_pos'] = token.end_pos
+ stack[-1]['complete'] = True
+ stack.pop()
+
+ elif token.type == TokenType.STRING:
+ # Check if it's a key
+ saved_pos = tokenizer.pos
+ tokenizer.skipWhitespace()
+ next_char = tokenizer.peek()
+
+ if next_char == ':' and stack and stack[-1].get('type') == 'object':
+ current_key = token.value
+ else:
+ # It's a value
+ value_node = {
+ 'type': 'value',
+ 'key': current_key,
+ 'value': token.value,
+ 'raw': token.raw,
+ 'start_pos': token.start_pos,
+ 'end_pos': token.end_pos,
+ 'value_type': 'string'
+ }
+ if stack:
+ stack[-1]['children'].append(value_node)
+ current_key = None
+
+ tokenizer.pos = saved_pos
+
+ elif token.type in (TokenType.NUMBER, TokenType.BOOLEAN, TokenType.NULL):
+ value_node = {
+ 'type': 'value',
+ 'key': current_key,
+ 'value': token.value,
+ 'raw': token.raw,
+ 'start_pos': token.start_pos,
+ 'end_pos': token.end_pos,
+ 'value_type': str(token.type.value)
+ }
+ if stack:
+ stack[-1]['children'].append(value_node)
+ current_key = None
+
+ return result
+
+ def _renderWithBudget(self, structure: dict) -> str:
+ """Render the structure with budget constraints"""
+
+ # First, collect all value nodes with their distances from cut
+ cutPos = len(self.jsonStr)
+ allValues = self._collectValuesWithDistance(structure, cutPos)
+
+ # Sort by distance (closest to cut first)
+ allValues.sort(key=lambda x: x['distance'])
+
+ # Determine which values get full rendering
+ budgetRemaining = self.budgetLimit
+ valuesWithBudget = set()
+
+ for valInfo in allValues:
+ valSize = len(str(valInfo['raw']))
+ if budgetRemaining >= valSize:
+ valuesWithBudget.add(valInfo['id'])
+ budgetRemaining -= valSize
+
+ # Now render the structure
+ return self._renderNode(structure, valuesWithBudget, indent=0)
+
+ def _collectValuesWithDistance(self, node: dict, cutPos: int, depth: int = 0) -> list:
+ """Collect all value nodes with their distance from cut point"""
+ values = []
+
+ if node.get('type') == 'value':
+ endPos = node.get('end_pos', cutPos)
+ distance = cutPos - endPos
+ values.append({
+ 'id': id(node),
+ 'node': node,
+ 'distance': distance,
+ 'raw': node.get('raw', ''),
+ 'depth': depth
+ })
+ elif node.get('type') == 'truncated_value':
+ values.append({
+ 'id': id(node),
+ 'node': node,
+ 'distance': 0, # Truncated values are at the cut
+ 'raw': node.get('raw', ''),
+ 'depth': depth
+ })
+
+ for child in node.get('children', []):
+ values.extend(self._collectValuesWithDistance(child, cutPos, depth + 1))
+
+ return values
+
+ def _renderNode(self, node: dict, valuesWithBudget: set, indent: int = 0) -> str:
+ """Render a node with budget constraints"""
+ indent_str = " " * indent
+
+ node_type = node.get('type')
+
+ if node_type == 'root':
+ parts = []
+ for child in node.get('children', []):
+ parts.append(self._renderNode(child, valuesWithBudget, indent))
+ return '\n'.join(parts)
+
+ elif node_type == 'object':
+ return self._renderObject(node, valuesWithBudget, indent)
+
+ elif node_type == 'array':
+ return self._renderArray(node, valuesWithBudget, indent)
+
+ elif node_type == 'value':
+ return self._renderValue(node, valuesWithBudget, indent)
+
+ elif node_type == 'truncated_value':
+ return node.get('raw', '')
+
+ return ''
+
+ def _renderObject(self, node: dict, valuesWithBudget: set, indent: int) -> str:
+ """Render an object node"""
+ indent_str = " " * indent
+ inner_indent = " " * (indent + 1)
+
+ key_prefix = ""
+ if node.get('key'):
+ key_prefix = f'"{node["key"]}": '
+
+ if not node.get('children'):
+ if node.get('complete'):
+ return f"{key_prefix}{{}}"
+ else:
+ return f"{key_prefix}{{"
+
+ parts = [f"{key_prefix}{{"]
+
+ children = node.get('children', [])
+ for i, child in enumerate(children):
+ child_rendered = self._renderNode(child, valuesWithBudget, indent + 1)
+
+ # Add comma if not last and next sibling exists
+ if i < len(children) - 1:
+ if child.get('type') != 'truncated_value':
+ parts.append(f"{inner_indent}{child_rendered},")
+ else:
+ parts.append(f"{inner_indent}{child_rendered}")
+ else:
+ parts.append(f"{inner_indent}{child_rendered}")
+
+ if node.get('complete'):
+ parts.append(f"{indent_str}}}")
+
+ return '\n'.join(parts)
+
+ def _renderArray(self, node: dict, valuesWithBudget: set, indent: int) -> str:
+ """Render an array node"""
+ indent_str = " " * indent
+ inner_indent = " " * (indent + 1)
+
+ key_prefix = ""
+ if node.get('key'):
+ key_prefix = f'"{node["key"]}": '
+
+ if not node.get('children'):
+ if node.get('complete'):
+ return f"{key_prefix}[]"
+ else:
+ return f"{key_prefix}["
+
+ parts = [f"{key_prefix}["]
+
+ children = node.get('children', [])
+ for i, child in enumerate(children):
+ child_rendered = self._renderNode(child, valuesWithBudget, indent + 1)
+
+ if i < len(children) - 1:
+ if child.get('type') != 'truncated_value':
+ parts.append(f"{inner_indent}{child_rendered},")
+ else:
+ parts.append(f"{inner_indent}{child_rendered}")
+ else:
+ parts.append(f"{inner_indent}{child_rendered}")
+
+ if node.get('complete'):
+ parts.append(f"{indent_str}]")
+
+ return '\n'.join(parts)
+
+ def _renderValue(self, node: dict, valuesWithBudget: set, indent: int) -> str:
+ """Render a value node"""
+ key_prefix = ""
+ if node.get('key'):
+ key_prefix = f'"{node["key"]}": '
+
+ if id(node) in valuesWithBudget:
+ # Full value
+ default_raw = '"...\"'
+ raw_value = node.get('raw', default_raw)
+ return f"{key_prefix}{raw_value}"
+ else:
+ # Placeholder
+ return f'{key_prefix}"..."'
+
+ def _renderWithBudgetV2(self) -> str:
+ """
+ Generate hierarchy context with budget logic.
+
+ Alternative rendering that stays closer to the original truncated string.
+ Shows full context near the cut, replaces distant values with "...".
+
+ Budget-Logik:
+ 1. Sammeln: Alle String-Werte werden mit ihrer Position gesammelt
+ 2. Sortieren: Nach Entfernung zum Cut-Punkt (näher = höhere Priorität)
+ 3. Zuweisen: Budget wird von hinten nach vorne aufgebraucht
+ 4. Ersetzen: Werte außerhalb des Budgets werden durch "..." ersetzt
+ """
+ # Parse to understand structure, but render from original string with modifications
+ structure = self._parseForHierarchy()
+
+ # Collect all complete value nodes with positions
+ allValues = self._collectCompleteValues(structure)
+
+ # Sort by end position (furthest from cut = first to be truncated)
+ allValues.sort(key=lambda x: x['end_pos'])
+
+ # Apply budget: replace values from the start until budget exhausted
+ budgetUsed = 0
+ totalAvailable = sum(len(v['raw']) for v in allValues)
+
+ valuesToReplace = []
+
+ for val in allValues:
+ valSize = len(val['raw'])
+ if totalAvailable - budgetUsed > self.budgetLimit:
+ # This value should be replaced with "..."
+ valuesToReplace.append(val)
+ budgetUsed += valSize
+ else:
+ break
+
+ # Build the modified string
+ result = self.jsonStr
+
+ # Replace from end to start to preserve positions
+ valuesToReplace.sort(key=lambda x: x['start_pos'], reverse=True)
+
+ for val in valuesToReplace:
+ start = val['start_pos']
+ end = val['end_pos']
+ result = result[:start] + '"..."' + result[end:]
+
+ return result
+
+ def _collectCompleteValues(self, node: dict) -> list:
+ """Collect all complete (non-truncated) value nodes"""
+ values = []
+
+ if node.get('type') == 'value' and node.get('value_type') == 'string':
+ values.append({
+ 'start_pos': node['start_pos'],
+ 'end_pos': node['end_pos'],
+ 'raw': node['raw'],
+ 'key': node.get('key')
+ })
+
+ for child in node.get('children', []):
+ values.extend(self._collectCompleteValues(child))
+
+ return values
+
+
+def extractContinuationContexts(
+ truncatedJson: str
+) -> Tuple[str, str, str]:
+ """
+ Main entry point: Extract all three continuation contexts from a truncated JSON.
+
+ Generiert drei Kontexte für abgeschnittene JSON-Strings:
+ 1. Overlap Context: Das innerste Objekt/Array-Element, das den Cut-Punkt enthält
+ - Wird verwendet, um den abgeschnittenen Teil mit dem neuen Teil zu mergen
+ - Exakt so wie im Original-String (für String-Matching beim Merge)
+
+ 2. Hierarchy Context: Die hierarchische Struktur vom Root bis zum Cut-Punkt
+ - Mit Budget-Logik: Näher am Cut = vollständige Werte, weiter weg = "..." Platzhalter
+ - Gibt der AI den Kontext der gesamten JSON-Struktur
+
+ 3. Complete Part: Der vollständige, valide JSON bis zum Cut-Punkt
+ - Alle offenen Strukturen werden geschlossen (}, ], ")
+ - Unvollständige Keys werden entfernt
+ - Kann direkt als valides JSON geparst werden
+
+ Uses module constants BUDGET_LIMIT and OVERLAP_MAX_CHARS.
+
+ Args:
+ truncatedJson: The truncated JSON string
+
+ Returns:
+ Tuple of (overlapContext, hierarchyContext, completePart):
+ - overlapContext: The innermost object/element containing the cut (for merging)
+ - hierarchyContext: Full structure from root to cut with budget-limited values
+ - completePart: Valid JSON with all structures properly closed
+
+ Example:
+ >>> jsonStr = '{"users": [{"name": "John", "bio": "Hello Wor'
+ >>> overlap, hierarchy, complete = extractContinuationContexts(jsonStr)
+ >>> import json
+ >>> parsed = json.loads(complete) # ✓ Funktioniert!
+ """
+ return getJsonContinuationContext(truncatedJson)
+
+
+# Convenience function with named results
+def getContexts(
+ truncatedJson: str
+) -> JsonContinuationContexts:
+ """
+ Get all contexts as a Pydantic model with named fields.
+
+ Uses module constants BUDGET_LIMIT and OVERLAP_MAX_CHARS.
+
+ Args:
+ truncatedJson: The truncated JSON string
+
+ Returns:
+ JsonContinuationContexts Pydantic model with:
+ - overlapContext: The innermost object/element containing the cut
+ - hierarchyContext: Full structure with budget-limited values
+ - completePart: Valid JSON with all structures properly closed
+
+ Example:
+ >>> json_str = '{"users": [{"name": "John", "bio": "Hello Wor'
+ >>> contexts = getContexts(json_str)
+ >>> print(contexts.overlapContext)
+ >>> print(contexts.hierarchyContext)
+ >>> print(contexts.completePart)
+ """
+ overlap, hierarchy, completePart = extractContinuationContexts(truncatedJson)
+ return JsonContinuationContexts(
+ overlapContext=overlap,
+ hierarchyContext=hierarchy,
+ completePart=completePart
+ )
diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py
index d89b2f76..2e60ce69 100644
--- a/modules/shared/jsonUtils.py
+++ b/modules/shared/jsonUtils.py
@@ -703,144 +703,6 @@ def extractSectionsFromDocument(documentData: Dict[str, Any]) -> List[Dict[str,
return []
-def _extractOverlapFromElement(elem: Dict[str, Any], elemType: str) -> Optional[Dict[str, Any]]:
- """
- GENERIC function to extract overlap portion from an element.
-
- Handles elements of any size, including very long strings:
- - Paragraphs: Extract last N characters/words
- - Code blocks: Extract last N lines
- - Tables: Extract last N rows
- - Lists: Extract last N items
- - Other elements: Extract representative portion
-
- Args:
- elem: Element dictionary
- elemType: Element type (table, paragraph, code_block, etc.)
-
- Returns:
- Overlap element dictionary with size-limited content, or None
- """
- if not isinstance(elem, dict):
- return None
-
- # Get content (handle both flat and nested structures)
- content = elem.get("content", {}) if isinstance(elem.get("content"), dict) else {}
-
- if elemType == "table":
- rows = elem.get("rows", []) or content.get("rows", [])
- headers = elem.get("headers", []) or content.get("headers", [])
-
- if rows:
- # Extract last 3-5 rows as overlap (enough for context, not too large)
- overlapRowCount = min(5, len(rows))
- overlapRows = rows[-overlapRowCount:]
-
- overlapElem = {
- "type": "table",
- "content": {
- "headers": headers,
- "rows": overlapRows
- }
- }
- return overlapElem
-
- elif elemType in ["bullet_list", "numbered_list"]:
- items = elem.get("items", []) or content.get("items", [])
-
- if items:
- # Extract last 5-10 items as overlap
- overlapItemCount = min(10, len(items))
- overlapItems = items[-overlapItemCount:]
-
- overlapElem = {
- "type": elemType,
- "content": {
- "items": overlapItems
- }
- }
- return overlapElem
-
- elif elemType == "paragraph":
- text = elem.get("text", "") or content.get("text", "")
-
- if text:
- # Extract last portion of text
- # For very long text, use last 300-500 characters
- # For shorter text, use all of it
- maxOverlapChars = 500
- minOverlapChars = 100
-
- if len(text) > maxOverlapChars:
- # Very long text - extract last portion
- # Try to break at word boundary for readability
- textSnippet = text[-maxOverlapChars:]
- # Find first space/newline to start from word boundary
- firstSpace = textSnippet.find(' ')
- if firstSpace > 0 and firstSpace < 50:
- textSnippet = textSnippet[firstSpace + 1:]
- overlapText = textSnippet
- elif len(text) > minOverlapChars:
- # Medium text - use last portion
- overlapText = text[-minOverlapChars:]
- else:
- # Short text - use all
- overlapText = text
-
- overlapElem = {
- "type": "paragraph",
- "content": {
- "text": overlapText
- }
- }
- return overlapElem
-
- elif elemType == "code_block":
- code = elem.get("code", "") or content.get("code", "")
-
- if code:
- # Extract last N lines of code
- codeLines = code.split('\n')
- # Use last 10-20 lines as overlap (enough context for continuation)
- overlapLineCount = min(20, len(codeLines))
- overlapLines = codeLines[-overlapLineCount:]
- overlapCode = '\n'.join(overlapLines)
-
- overlapElem = {
- "type": "code_block",
- "content": {
- "code": overlapCode
- }
- }
- return overlapElem
-
- elif elemType == "heading":
- # Headings are usually short - return as-is
- return elem
-
- elif elemType == "image":
- # Images are usually small - return as-is
- return elem
-
- else:
- # Generic element - try to extract a representative portion
- # Convert to JSON and limit size
- elemJson = json.dumps(elem, ensure_ascii=False)
-
- # If element is very large, try to extract key fields only
- if len(elemJson) > 1000:
- # Extract only essential fields
- overlapElem = {
- "type": elemType,
- "id": elem.get("id"),
- "content": "..." # Indicate truncated content
- }
- return overlapElem
-
- # Small element - return as-is
- return elem
-
-
def buildContinuationContext(
allSections: List[Dict[str, Any]],
lastRawResponse: Optional[str] = None,
@@ -980,1989 +842,52 @@ def buildContinuationContext(
delivered_summary = "\n".join(summary_lines)
- # Extract cut-off point using new algorithm
- # 1. Loop over all sections until finding incomplete section
- # 2. In incomplete section, loop through elements until finding cut-off element
- # CRITICAL: There is always only ONE section incomplete (JSON cut-off point)
- cut_off_element = None
- element_before_cutoff = None
-
- if lastRawResponse:
- try:
- # CRITICAL: Always try to find incomplete section from raw JSON
- # Even if JSON can be parsed, it might be incomplete (cut off mid-element)
- raw_stripped = stripCodeFences(lastRawResponse.strip()).strip()
-
- # Check if response is just a fragment (not full JSON structure)
- # Fragments are continuation content that should be appended to the last incomplete element
- is_fragment = not (raw_stripped.strip().startswith('{') or raw_stripped.strip().startswith('['))
-
- if is_fragment:
- # Response is a fragment - it continues the last incomplete element
- # Find the last incomplete element from allSections
- if allSections:
- last_section = allSections[-1]
- elements = last_section.get("elements", [])
- if isinstance(elements, list) and elements:
- # Get the last element (which should be incomplete)
- last_elem = elements[-1]
- if isinstance(last_elem, dict):
- # The fragment continues this element
- # Show the fragment as cut_off_element
- cut_off_element = raw_stripped
- # Show the element before (if there is one)
- if len(elements) > 1:
- element_before_cutoff = json.dumps(elements[-2])
- else:
- element_before_cutoff = json.dumps(last_elem)
- else:
- # Response is full JSON - use standard extraction
- # Strategy 1: Try to find incomplete section using structured parsing
- incomplete_section = _findIncompleteSectionInRaw(raw_stripped)
- if incomplete_section:
- cut_off_element, element_before_cutoff = _extractCutOffElements(incomplete_section, raw_stripped)
-
- # Strategy 2: If no incomplete section found, extract directly from raw JSON
- # This handles cases where JSON is cut off mid-element within a complete section
- if not cut_off_element:
- cut_off_element, element_before_cutoff = _extractCutOffElementsFromRaw(raw_stripped, allSections)
- except Exception as e:
- logger.debug(f"Error extracting cut-off point: {e}")
-
- # Extract overlap information for continuation prompt
- # GENERIC overlap extraction: handles elements of any size, including long strings
- # Strategy: Extract last N elements, but if an element is very large, extract only a portion
- overlapElements = []
- overlapString = ""
-
- if allSections:
- # Get last section
- lastSection = allSections[-1]
- elements = lastSection.get("elements", [])
-
- if isinstance(elements, list) and len(elements) > 0:
- # Extract last 2-3 complete elements as overlap context
- # This helps the AI understand what was already delivered
- overlapCount = min(3, len(elements))
- overlapElements = elements[-overlapCount:]
-
- # Build overlap string showing these elements (with size limits for large elements)
- overlapStrings = []
- for elem in overlapElements:
- if isinstance(elem, dict):
- elemType = elem.get("type", "unknown")
- overlapElem = _extractOverlapFromElement(elem, elemType)
- if overlapElem:
- overlapStrings.append(json.dumps(overlapElem, ensure_ascii=False))
- else:
- # Non-dict element - show as-is (but limit size)
- elemStr = json.dumps(elem, ensure_ascii=False)
- if len(elemStr) > 500:
- elemStr = elemStr[:500] + "..."
- overlapStrings.append(elemStr)
-
- if overlapStrings:
- overlapString = ",\n".join(overlapStrings)
-
- # Store raw JSON response and extract structure context
+ # Extract continuation contexts using centralized jsonContinuation module
+ # This is the single source of truth for handling cut-off JSON strings
last_raw_json = lastRawResponse or ""
last_complete_part = ""
incomplete_part = ""
- structure_context = ""
+ overlap_context = ""
+ hierarchy_context = ""
if lastRawResponse:
- # Extract JSON structure context for continuation prompt
- # This provides: last complete part, incomplete part, structure context
- # NOTE: template_structure is now passed as parameter, not extracted
try:
- structureContext = extractJsonStructureContext(lastRawResponse, useCaseId)
- last_complete_part = structureContext.get("last_complete_part", "")
- incomplete_part = structureContext.get("incomplete_part", "")
- structure_context = structureContext.get("structure_context", "")
+ from modules.shared.jsonContinuation import getContexts
+
+ # Normalize JSON string
+ normalized = stripCodeFences(normalizeJsonText(lastRawResponse)).strip()
+ if normalized:
+ # Find first '{' or '[' to start
+ startIdx = -1
+ for i, char in enumerate(normalized):
+ if char in '{[':
+ startIdx = i
+ break
+
+ if startIdx >= 0:
+ jsonContent = normalized[startIdx:]
+ contexts = getContexts(jsonContent)
+
+ # Store all contexts from centralized module
+ last_complete_part = contexts.completePart
+ incomplete_part = jsonContent[len(contexts.completePart):].strip()
+ overlap_context = contexts.overlapContext
+ hierarchy_context = contexts.hierarchyContext
except Exception as e:
- logger.warning(f"Error extracting JSON structure context: {e}", exc_info=True)
+ logger.warning(f"Error extracting JSON continuation contexts: {e}", exc_info=True)
# Return ContinuationContext Pydantic model
return ContinuationContext(
section_count=section_count,
delivered_summary=delivered_summary,
- cut_off_element=cut_off_element,
- element_before_cutoff=element_before_cutoff,
- template_structure=templateStructure, # Use passed parameter, not extracted
+ template_structure=templateStructure,
last_complete_part=last_complete_part,
incomplete_part=incomplete_part,
- structure_context=structure_context,
- last_raw_json=last_raw_json
+ last_raw_json=last_raw_json,
+ overlap_context=overlap_context,
+ hierarchy_context=hierarchy_context
)
-
-def extractJsonStructureContext(
- incompleteJson: str,
- useCaseId: Optional[str] = None
-) -> Dict[str, Any]:
- """
- Extract JSON structure context from incomplete JSON for continuation prompts.
-
- Extracts:
- 1. Template JSON structure of the complete object (structure only, no content)
- 2. Last complete part (last complete element/object)
- 3. Incomplete part (the cut-off portion)
- 4. Structure context (parent structure metadata only, no content)
-
- Args:
- incompleteJson: Incomplete JSON string (may be cut off mid-element)
- useCaseId: Optional use case ID to determine expected structure
-
- Returns:
- Dict with:
- - template_structure: Template JSON structure (structure only)
- - last_complete_part: Last complete element/object as JSON string
- - incomplete_part: Incomplete/cut-off portion as JSON string
- - structure_context: Parent structure metadata (keys only, no content)
- """
- from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText
-
- result = {
- "template_structure": "",
- "last_complete_part": "",
- "incomplete_part": "",
- "structure_context": ""
- }
-
- if not incompleteJson or not incompleteJson.strip():
- return result
-
- # Normalize JSON string
- normalized = stripCodeFences(normalizeJsonText(incompleteJson)).strip()
- if not normalized:
- return result
-
- # Find first '{' or '[' to start
- startIdx = -1
- for i, char in enumerate(normalized):
- if char in '{[':
- startIdx = i
- break
-
- if startIdx == -1:
- return result
-
- jsonContent = normalized[startIdx:]
-
- # Step 1: Extract template structure (structure only, no content)
- templateStructure = _extractTemplateStructure(jsonContent, useCaseId)
- result["template_structure"] = templateStructure
-
- # Step 2: Find last complete part and incomplete part
- lastComplete, incompletePart = _extractLastCompleteAndIncomplete(jsonContent)
- result["last_complete_part"] = lastComplete
- result["incomplete_part"] = incompletePart
-
- # Step 3: Extract structure context (parent structure metadata only)
- # Pass both incomplete part and last complete part to show positions
- structureContext = _extractStructureContext(jsonContent, incompletePart, lastComplete)
- result["structure_context"] = structureContext
-
- return result
-
-
-def _extractTemplateStructure(jsonContent: str, useCaseId: Optional[str] = None) -> str:
- """
- Extract template JSON structure (structure only, no content).
-
- Examples:
- - {"documents": [{"chapters": [{"sections": [...]}]}]}
- - {"elements": [{"type": "...", "content": {...}}]}
- """
- import json
- import re
-
- # Try to parse JSON to understand structure
- try:
- # Try to close and parse
- closed = closeJsonStructures(jsonContent)
- parsed = json.loads(closed)
-
- # Build template structure (keys only, no content)
- template = _buildStructureTemplate(parsed)
- return json.dumps(template, indent=2, ensure_ascii=False)
- except Exception:
- # If parsing fails, try to extract structure from string
- # Look for top-level keys
- topLevelKeys = []
-
- # Pattern: "key": { or "key": [
- keyPattern = r'"([^"]+)"\s*:\s*[{\[]'
- matches = re.findall(keyPattern, jsonContent)
- if matches:
- topLevelKeys = matches[:3] # Take first 3 keys
-
- # Build template based on use case or detected keys
- if useCaseId == "chapter_structure":
- return json.dumps({"documents": [{"chapters": [{"id": "", "title": "", "level": 0}]}]}, indent=2, ensure_ascii=False)
- elif useCaseId == "section_content":
- return json.dumps({"elements": [{"type": "", "content": {}}]}, indent=2, ensure_ascii=False)
- elif useCaseId == "code_structure":
- return json.dumps({"files": [{"id": "", "filename": "", "fileType": ""}]}, indent=2, ensure_ascii=False)
- elif topLevelKeys:
- # Build generic template
- template = {}
- for key in topLevelKeys:
- template[key] = []
- return json.dumps(template, indent=2, ensure_ascii=False)
- else:
- return json.dumps({}, indent=2, ensure_ascii=False)
-
-
-def _buildStructureTemplate(obj: Any, maxDepth: int = 3) -> Any:
- """
- Build structure template from parsed JSON (keys only, no content).
- """
- if isinstance(obj, dict):
- template = {}
- for key, value in obj.items():
- if isinstance(value, (dict, list)):
- template[key] = _buildStructureTemplate(value, maxDepth - 1) if maxDepth > 0 else None
- else:
- # Keep key but use empty value of same type
- if isinstance(value, str):
- template[key] = ""
- elif isinstance(value, (int, float)):
- template[key] = 0
- elif isinstance(value, bool):
- template[key] = False
- else:
- template[key] = None
- return template
- elif isinstance(obj, list) and obj:
- # Use first element as template
- return [_buildStructureTemplate(obj[0], maxDepth - 1) if maxDepth > 0 else None]
- else:
- return None
-
-
-def _extractLastCompleteAndIncomplete(jsonContent: str) -> Tuple[str, str]:
- """
- Extract last complete part and incomplete part from JSON.
-
- Returns:
- Tuple of (last_complete_part, incomplete_part) as JSON strings
- """
- import json
-
- # Try to find the last complete element/object
- # Strategy: Parse backwards, find where structures are balanced
-
- # Count braces and brackets to find where JSON becomes incomplete
- braceCount = 0
- bracketCount = 0
- lastCompleteEnd = -1
-
- inString = False
- escapeNext = False
-
- for i, char in enumerate(jsonContent):
- if escapeNext:
- escapeNext = False
- continue
-
- if char == '\\':
- escapeNext = True
- continue
-
- if char == '"':
- inString = not inString
- continue
-
- if not inString:
- if char == '{':
- braceCount += 1
- elif char == '}':
- braceCount -= 1
- if braceCount == 0 and bracketCount == 0:
- # Found end of complete structure
- lastCompleteEnd = i + 1
- elif char == '[':
- bracketCount += 1
- elif char == ']':
- bracketCount -= 1
- if braceCount == 0 and bracketCount == 0:
- # Found end of complete structure
- lastCompleteEnd = i + 1
-
- # Extract parts
- if lastCompleteEnd > 0:
- lastCompletePart = jsonContent[:lastCompleteEnd]
- incompletePart = jsonContent[lastCompleteEnd:].strip()
-
- # Try to find last complete element within the structure
- # Look for last complete object/array element
- lastCompleteElement = _findLastCompleteElement(lastCompletePart)
- if lastCompleteElement:
- # Build context for incomplete part - show structure around the break
- incompleteWithContext = buildIncompleteContext(jsonContent, lastCompleteEnd)
- return lastCompleteElement, incompleteWithContext
- else:
- # Build context for incomplete part
- incompleteWithContext = buildIncompleteContext(jsonContent, lastCompleteEnd)
- return lastCompletePart, incompleteWithContext
- else:
- # No complete structure found - everything is incomplete
- # Still try to show context
- incompleteWithContext = buildIncompleteContext(jsonContent, 0)
- return "", incompleteWithContext
-
-
-def _findLastCompleteElement(jsonStr: str) -> str:
- """
- Find the last complete element in JSON string.
- """
- import json
-
- # Try to parse and extract last element
- try:
- closed = closeJsonStructures(jsonStr)
- parsed = json.loads(closed)
-
- # If it's a dict with arrays, get last element from first array
- if isinstance(parsed, dict):
- for key, value in parsed.items():
- if isinstance(value, list) and value:
- lastElem = value[-1]
- return json.dumps(lastElem, indent=2, ensure_ascii=False)
-
- # If it's a list, get last element
- if isinstance(parsed, list) and parsed:
- lastElem = parsed[-1]
- return json.dumps(lastElem, indent=2, ensure_ascii=False)
- except Exception:
- pass
-
- # Fallback: try to find last complete object using brace matching
- braceCount = 0
- startPos = -1
- lastCompleteEnd = -1
-
- for i, char in enumerate(jsonStr):
- if char == '{':
- if braceCount == 0:
- startPos = i
- braceCount += 1
- elif char == '}':
- braceCount -= 1
- if braceCount == 0 and startPos >= 0:
- lastCompleteEnd = i + 1
-
- if lastCompleteEnd > 0:
- return jsonStr[startPos:lastCompleteEnd]
-
- return ""
-
-
-def buildIncompleteContext(jsonContent: str, breakPosition: int) -> str:
- """
- Build hierarchical context showing incomplete JSON structure.
-
- Shows:
- - Full hierarchy structure (always shown)
- - Complete elements before cut (within 200 char DATA budget)
- - Cut piece marked with <-- CUT POINT (incomplete)
- - Does NOT close open structures
- """
- if breakPosition <= 0 or breakPosition > len(jsonContent):
- return jsonContent
-
- hierarchy = findStructureHierarchy(jsonContent, breakPosition)
- if not hierarchy:
- return jsonContent[:breakPosition]
-
- cutPiece = extractCutPiece(jsonContent, breakPosition)
- resultLines = []
- DATA_BUDGET = 500
-
- # Build hierarchy level by level - show actual JSON structure
- for levelIndex, level in enumerate(hierarchy):
- levelType = level['type']
- levelStart = level['start_pos']
- levelDepth = level['depth']
- indent = " " * levelDepth
- isCutLevel = (levelIndex == len(hierarchy) - 1)
- isParentOfCutLevel = (levelIndex == len(hierarchy) - 2)
-
- # Get next level info
- if levelIndex < len(hierarchy) - 1:
- nextLevel = hierarchy[levelIndex + 1]
- nextLevelStart = nextLevel['start_pos']
- else:
- nextLevelStart = breakPosition
-
- # Show opening structure for this level
- # For cut level, check if cut piece already starts with bracket/brace - if so, don't duplicate
- if isCutLevel and cutPiece:
- cutPieceStripped = cutPiece.strip()
- if (levelType == 'array' and cutPieceStripped.startswith('[')) or \
- (levelType == 'object' and cutPieceStripped.startswith('{')):
- # Cut piece already includes opening bracket, don't add it separately
- # Use parent level's child indent (cut element is a child of parent, not a separate level)
- if levelIndex > 0:
- parentLevel = hierarchy[levelIndex - 1]
- parentIndent = " " * parentLevel['depth']
- childIndent = parentIndent + " "
- else:
- childIndent = indent + " "
- for line in cutPiece.split('\n'):
- stripped = line.strip()
- if stripped:
- resultLines.append(f'{childIndent}{stripped}')
- resultLines[-1] += ' <-- CUT POINT (incomplete)'
- else:
- # Cut piece doesn't start with matching bracket, add opening structure
- resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
- childIndent = indent + " "
- for line in cutPiece.split('\n'):
- stripped = line.strip()
- if stripped:
- resultLines.append(f'{childIndent}{stripped}')
- resultLines[-1] += ' <-- CUT POINT (incomplete)'
- elif isCutLevel:
- # Cut level but no cut piece - add opening structure
- resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
- childIndent = indent + " "
- resultLines.append(f'{childIndent}... <-- CUT POINT (incomplete)')
- elif isParentOfCutLevel:
- # Parent of cut level: add opening structure, then show complete elements with budget
- # Works for both arrays and objects
- resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
- childIndent = indent + " "
- completeElements = _findCompleteElementsAtLevel(
- jsonContent, levelStart, nextLevelStart, levelDepth
- )
-
- dataBudget = DATA_BUDGET
- for elementStart, elementEnd in reversed(completeElements):
- elementData = jsonContent[elementStart:elementEnd].strip()
- elementSize = len(elementData)
-
- if elementSize == 0:
- continue
-
- if elementSize > dataBudget:
- break
-
- for line in elementData.split('\n'):
- stripped = line.strip()
- if stripped:
- resultLines.append(f'{childIndent}{stripped}')
- if elementEnd < nextLevelStart:
- resultLines[-1] += ','
-
- dataBudget -= elementSize
-
- if dataBudget <= 0:
- break
-
- else:
- # Other parent levels: add opening structure, then show path content
- resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
- childIndent = indent + " "
- pathContent = jsonContent[levelStart + 1:nextLevelStart].strip()
- if pathContent:
- # Show all path content (structure is always shown, not truncated)
- for line in pathContent.split('\n'):
- stripped = line.strip()
- if stripped:
- resultLines.append(f'{childIndent}{stripped}')
-
- return "\n".join(resultLines)
-
-
-def _buildNestedHierarchy(
- resultLines: List[str],
- jsonContent: str,
- hierarchy: List[Dict[str, Any]],
- levelIndex: int,
- breakPosition: int,
- cutPiece: str,
- cutLevel: Dict[str, Any]
-) -> None:
- """
- Recursively build nested hierarchy from root to cut level.
- This ensures proper nesting where each level contains the next level.
- """
- if levelIndex >= len(hierarchy):
- return
-
- level = hierarchy[levelIndex]
- levelType = level['type']
- levelStart = level['start_pos']
- levelKey = level.get('key')
- levelDepth = level['depth']
- indent = " " * levelDepth
-
- isCutLevel = (levelIndex == len(hierarchy) - 1)
-
- # Show opening structure for this level
- if levelKey:
- resultLines.append(f'{indent}"{levelKey}": {{' if levelType == 'object' else f'{indent}"{levelKey}": [')
- else:
- resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
-
- childIndent = indent + " "
-
- if isCutLevel:
- # Cut level - show content (complete elements + cut piece)
- if levelType == 'array':
- charBudget = 1000
- completeElements = _findCompleteElementsAtLevel(
- jsonContent, levelStart, breakPosition, levelDepth
- )
-
- # Show complete elements (working backwards from the cut)
- for elementStart, elementEnd in reversed(completeElements):
- elementSize = elementEnd - elementStart
- if charBudget >= elementSize:
- element = jsonContent[elementStart:elementEnd].strip()
- if element:
- elementLines = element.split('\n')
- for line in elementLines:
- if line.strip():
- resultLines.append(f'{childIndent}{line}')
- if elementEnd < breakPosition:
- resultLines[-1] += ','
- charBudget -= elementSize
- else:
- break
-
- # Show cut piece
- if cutPiece:
- cutPieceLines = cutPiece.split('\n')
- for line in cutPieceLines:
- if line.strip():
- resultLines.append(f'{childIndent}{line}')
- resultLines[-1] += ' <-- CUT POINT (incomplete)'
- else:
- cutPart = jsonContent[max(0, breakPosition-50):breakPosition]
- resultLines.append(f'{childIndent}{cutPart} <-- CUT POINT (incomplete)')
-
- else:
- # Object at cut level
- previewSize = breakPosition - levelStart
- maxPreviewSize = 500
- if previewSize > maxPreviewSize:
- previewStart = breakPosition - maxPreviewSize
- preview = jsonContent[previewStart:breakPosition]
- else:
- preview = jsonContent[levelStart:breakPosition]
-
- previewLines = preview.split('\n')
- for line in previewLines:
- if line.strip():
- resultLines.append(f'{childIndent}{line}')
-
- cutPart = jsonContent[breakPosition:min(breakPosition + 50, len(jsonContent))]
- resultLines.append(f'{childIndent}... {cutPart} <-- CUT POINT (incomplete)')
-
- else:
- # Parent level - show path to next level, then recursively build next level
- nextLevel = hierarchy[levelIndex + 1]
- nextLevelKey = nextLevel.get('key')
- nextLevelStart = nextLevel['start_pos']
- nextLevelType = nextLevel['type']
-
- # Extract content between this level's opening and next level's start
- # This shows any keys/values that come before the next level
- pathContent = jsonContent[levelStart + 1:nextLevelStart].strip()
-
- # Show the path content (keys/values before next level)
- if len(pathContent) > 0 and len(pathContent) <= 500:
- pathLines = pathContent.split('\n')
- nonEmptyLines = [line for line in pathLines if line.strip()]
- if nonEmptyLines:
- for line in nonEmptyLines[:20]: # Show more lines
- if line.strip():
- resultLines.append(f'{childIndent}{line}')
- if len(nonEmptyLines) > 20:
- resultLines.append(f'{childIndent}... ({len(nonEmptyLines) - 20} more lines) ...')
- elif len(pathContent) > 500:
- # Content too large - show placeholder
- resultLines.append(f'{childIndent}... (content too large, {len(pathContent)} chars) ...')
-
- # Always show the key leading to next level if it exists
- # The recursive call will show the opening bracket/brace, so we just show the key here
- if nextLevelKey:
- # Show the key (the recursive call will add the opening bracket/brace)
- # Actually, the recursive call already shows the full opening with key,
- # so we don't need to show it here - just let the recursive call handle it
- pass
-
- # Recursively build next level (this will show its opening structure and content)
- _buildNestedHierarchy(resultLines, jsonContent, hierarchy, levelIndex + 1, breakPosition, cutPiece, cutLevel)
-
- # Close this level
- resultLines.append(f'{indent}}}' if levelType == 'object' else f'{indent}]')
-
-
-def _findCompleteElementsAtLevel(
- jsonContent: str,
- levelStart: int,
- breakPosition: int,
- targetDepth: int
-) -> List[Tuple[int, int]]:
- """
- Find all complete elements at a specific depth level.
-
- Elements inside the structure at targetDepth are at targetDepth + 1.
- We track depth relative to the start of the structure.
-
- Returns list of (start, end) tuples for complete elements.
- """
- completeElements = []
-
- # Track depth relative to the level start
- # When we're at levelStart, we're at the opening bracket/brace (depth = targetDepth)
- # Elements inside are at depth = targetDepth + 1
- relativeDepth = 0 # Depth relative to level start (0 = at opening bracket/brace)
- inString = False
- escapeNext = False
- currentElementStart = None
-
- # Find the first non-whitespace character after the opening bracket/brace
- for i in range(levelStart + 1, min(breakPosition, len(jsonContent))):
- if jsonContent[i] not in [' ', '\n', '\r', '\t']:
- currentElementStart = i
- break
-
- if currentElementStart is None:
- return completeElements
-
- for i in range(currentElementStart, min(breakPosition, len(jsonContent))):
- char = jsonContent[i]
-
- if escapeNext:
- escapeNext = False
- continue
-
- if char == '\\':
- escapeNext = True
- continue
-
- if char == '"':
- inString = not inString
- continue
-
- if not inString:
- if char == '{':
- relativeDepth += 1
- elif char == '}':
- relativeDepth -= 1
- # Element is complete when we return to the level's depth (relativeDepth == 0)
- if relativeDepth == 0:
- # Found end of complete element
- if currentElementStart is not None:
- completeElements.append((currentElementStart, i + 1))
- # Find start of next element
- j = i + 1
- while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t', ',']:
- j += 1
- if j < breakPosition:
- currentElementStart = j
- else:
- currentElementStart = None
- elif char == '[':
- relativeDepth += 1
- elif char == ']':
- relativeDepth -= 1
- # Element is complete when we return to the level's depth (relativeDepth == 0)
- if relativeDepth == 0:
- # Found end of complete element
- if currentElementStart is not None:
- completeElements.append((currentElementStart, i + 1))
- # Find start of next element
- j = i + 1
- while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t', ',']:
- j += 1
- if j < breakPosition:
- currentElementStart = j
- else:
- currentElementStart = None
- elif char == ',':
- # Comma at relativeDepth == 0 means we're between elements at the cut level
- if relativeDepth == 0:
- # Element boundary - check if we have a complete element
- if currentElementStart is not None and currentElementStart < i:
- # Simple value (string, number, boolean, null) - complete at comma
- completeElements.append((currentElementStart, i))
- # Find start of next element
- j = i + 1
- while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']:
- j += 1
- if j < breakPosition:
- currentElementStart = j
- else:
- currentElementStart = None
-
- return completeElements
-
-
-def extractCutPiece(jsonContent: str, breakPosition: int) -> str:
- """
- Extract the incomplete piece at the cut point.
- Generic function that works with all JSON types: arrays, objects, strings, numbers, booleans, null.
-
- CRITICAL: Uses findStructureHierarchy to identify the cut level, then parses from the cut level start
- to correctly identify which element contains the break position.
- This approach handles all JSON structures generically, including:
- - Nested objects and arrays
- - Strings containing brackets, braces, commas
- - Complex nested structures
-
- Returns the incomplete element from its start to the break position.
- """
- if breakPosition <= 0 or breakPosition > len(jsonContent):
- return ""
-
- # First, find the structure hierarchy to identify the cut level
- hierarchy = findStructureHierarchy(jsonContent, breakPosition)
- if not hierarchy:
- # Fallback: return content before break
- return jsonContent[max(0, breakPosition - 200):breakPosition].lstrip()
-
- # Get the cut level (the structure containing the break position)
- cutLevel = hierarchy[-1]
- cutLevelStart = cutLevel['start_pos']
- cutLevelDepth = cutLevel.get('depth', 0)
-
- # Parse from cutLevelStart to breakPosition to find element boundaries
- braceDepth = 0 # Absolute brace depth
- bracketDepth = 0 # Absolute bracket depth
- inString = False
- escapeNext = False
-
- # Track element start at the cut level
- currentElementStart = cutLevelStart # Start of current element
-
- # Parse from cut level start to break position
- for i in range(cutLevelStart, min(breakPosition, len(jsonContent))):
- char = jsonContent[i]
-
- if escapeNext:
- escapeNext = False
- continue
-
- if char == '\\':
- escapeNext = True
- continue
-
- if char == '"':
- inString = not inString
- continue
-
- if not inString:
- if char == '{':
- braceDepth += 1
- elif char == '}':
- braceDepth -= 1
- elif char == '[':
- bracketDepth += 1
- elif char == ']':
- bracketDepth -= 1
- elif char == ',':
- # Comma at cut level separates elements
- currentDepth = braceDepth + bracketDepth
- if currentDepth == cutLevelDepth:
- # This comma is at the cut level - next element starts after it
- j = i + 1
- while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']:
- j += 1
- if j < breakPosition:
- currentElementStart = j
- elif char == ':':
- # Colon at cut level separates key from value
- currentDepth = braceDepth + bracketDepth
- if currentDepth == cutLevelDepth:
- # This colon is at the cut level - value starts after it
- j = i + 1
- while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']:
- j += 1
- if j < breakPosition:
- currentElementStart = j
-
- # The element containing breakPosition starts at currentElementStart
- # Find the actual start by skipping leading whitespace
- actualStart = currentElementStart
- for i in range(currentElementStart, min(breakPosition, len(jsonContent))):
- char = jsonContent[i]
- if char not in [' ', '\n', '\r', '\t']:
- actualStart = i
- break
-
- # Extract the incomplete piece from actualStart to breakPosition
- # Preserve trailing whitespace - it's needed for merging
- cutPiece = jsonContent[actualStart:breakPosition]
-
- # Remove leading whitespace but preserve trailing whitespace
- cutPiece = cutPiece.lstrip()
-
- return cutPiece if cutPiece else jsonContent[actualStart:breakPosition]
-
-
-def findStructureHierarchy(jsonContent: str, breakPosition: int) -> List[Dict[str, Any]]:
- """
- Find the structure hierarchy backwards from break point to root.
-
- Returns list of level info dicts, from root to cut level.
- Each level has: type, start_pos, end_pos, depth, key
-
- CRITICAL: Returns the path from root to cut point.
- - For closed structures: uses actual end position
- - For open structures: uses breakPosition
- """
- hierarchy = []
-
- # Track depth and positions
- braceDepth = 0
- bracketDepth = 0
- inString = False
- escapeNext = False
-
- # Track ALL structures (both closed and open) to get correct end positions
- # Stack of (type, start_pos, depth, end_pos)
- # end_pos is None until structure is closed
- structureStack = [] # Stack of (type, start_pos, depth, end_pos)
- closedStructures = [] # List of closed structures with their end positions
-
- for i in range(breakPosition):
- if i >= len(jsonContent):
- break
-
- char = jsonContent[i]
-
- if escapeNext:
- escapeNext = False
- continue
-
- if char == '\\':
- escapeNext = True
- continue
-
- if char == '"':
- inString = not inString
- continue
-
- if not inString:
- if char == '{':
- # Store depth BEFORE incrementing (this is the level of the structure being opened)
- currentDepth = braceDepth + bracketDepth
- structureStack.append(('object', i, currentDepth, None))
- braceDepth += 1
- elif char == '}':
- # When closing, record the end position and move to closed structures
- if structureStack and structureStack[-1][0] == 'object':
- structType, start, depth, _ = structureStack.pop()
- closedStructures.append({
- 'type': structType,
- 'start_pos': start,
- 'end_pos': i + 1, # Actual end position
- 'depth': depth,
- 'key': findKeyBefore(jsonContent, start)
- })
- braceDepth -= 1
- elif char == '[':
- # Store depth BEFORE incrementing
- currentDepth = braceDepth + bracketDepth
- structureStack.append(('array', i, currentDepth, None))
- bracketDepth += 1
- elif char == ']':
- # When closing, record the end position
- if structureStack and structureStack[-1][0] == 'array':
- structType, start, depth, _ = structureStack.pop()
- closedStructures.append({
- 'type': structType,
- 'start_pos': start,
- 'end_pos': i + 1, # Actual end position
- 'depth': depth,
- 'key': findKeyBefore(jsonContent, start)
- })
- bracketDepth -= 1
-
- # Build hierarchy: we need the actual path from root to cut level
- # CRITICAL: Only include structures that are actually on the path
- # A structure is on the path if it contains the next level's start position
-
- if not structureStack:
- # No open structures - all were closed before break
- # Return path to deepest closed structure
- if closedStructures:
- maxDepth = max(s['depth'] for s in closedStructures)
- # Build path: each level must contain the next level
- path = []
- for depth in range(maxDepth + 1):
- candidates = [s for s in closedStructures if s['depth'] == depth]
- if candidates:
- # If multiple at same depth, use the one that contains structures at deeper depths
- if depth < maxDepth:
- # Find the one that contains a structure at depth + 1
- nextDepthCandidates = [s for s in closedStructures if s['depth'] == depth + 1]
- if nextDepthCandidates:
- nextStart = min(s['start_pos'] for s in nextDepthCandidates)
- # Find candidate that contains nextStart
- for candidate in candidates:
- if candidate['start_pos'] < nextStart < candidate['end_pos']:
- path.append(candidate)
- break
- else:
- # Fallback: use first candidate
- path.append(candidates[0])
- else:
- path.append(candidates[0])
- else:
- path.append(candidates[0])
- return path
- return []
-
- # We have open structures - build path from root to deepest open structure
- # Strategy: Start from deepest open structure and work backwards to root,
- # ensuring each level contains the next level
-
- openByDepth = {}
- for structType, start, depth, _ in structureStack:
- openByDepth[depth] = {
- 'type': structType,
- 'start_pos': start,
- 'end_pos': breakPosition,
- 'depth': depth,
- 'key': findKeyBefore(jsonContent, start)
- }
-
- maxOpenDepth = max(openByDepth.keys())
-
- # Build path backwards from deepest to root
- path = []
- currentDepth = maxOpenDepth
- currentStart = openByDepth[maxOpenDepth]['start_pos']
-
- while currentDepth >= 0:
- # Look for structure at currentDepth that contains currentStart
- # First check open structures
- if currentDepth in openByDepth:
- struct = openByDepth[currentDepth]
- if struct['start_pos'] <= currentStart:
- path.insert(0, struct)
- currentStart = struct['start_pos']
- currentDepth -= 1
- continue
-
- # Check closed structures
- candidates = [s for s in closedStructures if s['depth'] == currentDepth and s['start_pos'] <= currentStart < s['end_pos']]
- if candidates:
- # Use the one that ends latest (most recent)
- struct = max(candidates, key=lambda x: x['end_pos'])
- path.insert(0, struct)
- currentStart = struct['start_pos']
- currentDepth -= 1
- else:
- # No structure found at this depth - break
- break
-
- return path
-
- # Return the hierarchy (path from root to cut level)
- if hierarchy:
- return hierarchy
-
- # Fallback: if JSON starts with { or [, create a root level
- if jsonContent and jsonContent.strip():
- firstChar = jsonContent.strip()[0]
- if firstChar == '{':
- return [{
- 'type': 'object',
- 'start_pos': 0,
- 'end_pos': breakPosition,
- 'depth': 0,
- 'key': None
- }]
- elif firstChar == '[':
- return [{
- 'type': 'array',
- 'start_pos': 0,
- 'end_pos': breakPosition,
- 'depth': 0,
- 'key': None
- }]
-
- return []
-
-
-def extractOverlapContext(jsonContent: str, breakPosition: int) -> str:
- """
- Extract overlap context: the object containing the cut element.
-
- Returns ONLY the object containing the cut element (the incomplete element itself).
- This is what the continuation should start with for proper merging.
-
- CRITICAL: Preserves trailing whitespace for proper merging.
-
- Args:
- jsonContent: The incomplete JSON string
- breakPosition: Position where JSON was cut
-
- Returns:
- String with the object containing the cut element
- """
- if not jsonContent or breakPosition <= 0:
- return jsonContent[-200:].strip() if jsonContent else ""
-
- # Extract cut piece (incomplete element) - this is the object containing the cut element
- cutPiece = extractCutPiece(jsonContent, breakPosition)
-
- # Return only the cut piece - the object containing the cut element
- if cutPiece:
- return cutPiece
-
- # Fallback: show content before break
- return jsonContent[max(0, breakPosition - 200):breakPosition].lstrip()
-
-
-def findKeyBefore(jsonContent: str, pos: int) -> Optional[str]:
- """Find the key name before a structure start position."""
- # Look backwards for "key": pattern
- before = jsonContent[max(0, pos - 100):pos]
- match = re.search(r'"([^"]+)"\s*:\s*[{\[]\s*$', before)
- if match:
- return match.group(1)
- return None
-
-
-def _formatLevelContext(level: Dict[str, Any], jsonContent: str, maxContentChars: int = 1000) -> str:
- """Format a level in the hierarchy for display."""
- levelType = level['type']
- start = level['start_pos']
- end = level['end_pos']
- key = level.get('key')
-
- # Get content for this level
- levelContent = jsonContent[start:end]
-
- # If content is too large, show only metadata
- if len(levelContent) > maxContentChars:
- # Show opening and key if available
- if key:
- return f' "{key}": {levelType} (content too large, {len(levelContent)} chars)'
- else:
- return f' {levelType} (content too large, {len(levelContent)} chars)'
- else:
- # Show full content (formatted)
- indent = " " * level['depth']
- if key:
- return f'{indent}"{key}": {levelContent[:maxContentChars]}'
- else:
- return f'{indent}{levelContent[:maxContentChars]}'
-
-
-def _formatCutLevelContextDetailed(level: Dict[str, Any], cutPiece: str, jsonContent: str, breakPosition: int) -> str:
- """
- Format the cut level showing detailed hierarchy as per user instruction:
- 1. Cut piece level: element of a list (the incomplete element)
- 2. Parent of the cut element: the list containing the cut piece (with cut point shown)
- 3. Last complete object on the same level like the cut object (if exists) PLUS further
- previous content from the json string (maximum 1000 characters)
- """
- levelType = level['type']
- start = level['start_pos']
- key = level.get('key')
-
- # Get content before break point in this level
- contentBeforeBreak = jsonContent[start:breakPosition]
-
- result = []
-
- if levelType == 'array':
- # Step 3: Show last complete elements on same level + previous content (max 1000 chars)
- # Extract last complete array elements with context (up to 1000 chars)
- lastCompleteElements = _extractLastCompleteArrayElementsWithContext(
- contentBeforeBreak, jsonContent, start, maxChars=1000
- )
- if lastCompleteElements:
- result.append("3. Last complete elements on same level (plus previous content, max 1000 chars):")
- result.append(lastCompleteElements)
- result.append("")
-
- # Step 2: Show parent container (the list) with cut piece
- # Find the array element that contains the cut piece
- cutArrayElement = _findCutArrayElement(jsonContent, breakPosition, start)
- if cutArrayElement:
- result.append("2. Parent container (list containing cut piece):")
- result.append(f" {cutArrayElement}")
- else:
- # Fallback: show cut piece directly
- cutPart = jsonContent[breakPosition:breakPosition + 200].strip()
- result.append("2. Parent container (list containing cut piece):")
- result.append(f" {cutPart}")
- result.append("")
-
- # Step 1: Show cut piece (incomplete element at cut point)
- result.append("1. Cut piece level (incomplete element at cut point):")
- if cutPiece:
- result.append(f" {cutPiece}")
- else:
- cutPart = jsonContent[breakPosition:breakPosition + 50].strip()
- result.append(f" {cutPart}")
- else:
- # Object - show structure with cut point
- result.append("Cut point in object:")
- cutPart = jsonContent[breakPosition:breakPosition + 200].strip()
- preview = contentBeforeBreak[-500:] if len(contentBeforeBreak) > 500 else contentBeforeBreak
- result.append(f" {preview}... {cutPart} <-- CUT POINT")
-
- return "\n".join(result)
-
-
-def _formatParentLevelContext(level: Dict[str, Any], jsonContent: str, maxContentChars: int = 1000) -> str:
- """
- Format a parent level showing content (if small enough) or metadata only.
- Used for levels above the cut level, showing path to root.
- """
- levelType = level['type']
- start = level['start_pos']
- end = level['end_pos']
- key = level.get('key')
-
- # Get content for this level
- levelContent = jsonContent[start:end]
-
- # If content is too large, show only metadata
- if len(levelContent) > maxContentChars:
- # Show opening structure with key if available
- opening = jsonContent[start:start + 200].strip()
- if key:
- return f' "{key}": {levelType} (content too large, {len(levelContent)} chars)\n {opening}...'
- else:
- return f' {levelType} (content too large, {len(levelContent)} chars)\n {opening}...'
- else:
- # Show full content (formatted, but limit to maxContentChars)
- content = levelContent[:maxContentChars]
- if key:
- return f' "{key}": {content}'
- else:
- return f' {content}'
-
-
-def _extractLastCompleteArrayElementsWithContext(
- arrayContent: str, fullJsonContent: str, arrayStart: int, maxChars: int = 1000
-) -> str:
- """
- Extract last complete array elements PLUS further previous content from json string (max 1000 chars).
-
- This shows:
- - Last complete elements on the same level as the cut element
- - Additional previous content from the JSON string (up to maxChars total)
- """
- # First, extract last complete elements from arrayContent
- completeElements = []
- currentElement = ""
- braceDepth = 0
- bracketDepth = 0
- inString = False
- escapeNext = False
- totalChars = 0
-
- # Parse backwards to find complete elements
- for i in range(len(arrayContent) - 1, -1, -1):
- char = arrayContent[i]
-
- if escapeNext:
- escapeNext = False
- currentElement = char + currentElement
- continue
-
- if char == '\\':
- escapeNext = True
- currentElement = char + currentElement
- continue
-
- if char == '"':
- inString = not inString
- currentElement = char + currentElement
- continue
-
- if not inString:
- if char == '}':
- braceDepth += 1
- currentElement = char + currentElement
- elif char == '{':
- braceDepth -= 1
- currentElement = char + currentElement
- if braceDepth == 0 and bracketDepth == 0:
- # Found complete element
- element = currentElement.strip()
- if element and element[0] in ['{', '[']:
- completeElements.insert(0, element)
- totalChars += len(element)
- if totalChars >= maxChars:
- break
- currentElement = ""
- elif char == ']':
- bracketDepth += 1
- currentElement = char + currentElement
- elif char == '[':
- bracketDepth -= 1
- currentElement = char + currentElement
- if braceDepth == 0 and bracketDepth == 0:
- # Found complete element
- element = currentElement.strip()
- if element and element[0] == '[':
- completeElements.insert(0, element)
- totalChars += len(element)
- if totalChars >= maxChars:
- break
- currentElement = ""
- elif char == ',' and braceDepth == 0 and bracketDepth == 0:
- # Element boundary
- if currentElement.strip():
- element = currentElement.strip()
- if element and element[0] in ['{', '[', '"']:
- completeElements.insert(0, element)
- totalChars += len(element)
- if totalChars >= maxChars:
- break
- currentElement = ""
- else:
- currentElement = char + currentElement
-
- # Format the elements
- if completeElements:
- # Show last few complete elements (up to maxChars)
- formattedElements = []
- charsUsed = 0
- for elem in reversed(completeElements): # Show from newest to oldest
- if charsUsed + len(elem) <= maxChars:
- formattedElements.insert(0, elem)
- charsUsed += len(elem)
- else:
- break
-
- if formattedElements:
- # Format as JSON array rows (without hardcoded indentation - caller will add it)
- result = []
- for elem in formattedElements:
- # Remove leading comma if present (from mid-element extraction)
- cleanElem = elem.lstrip(',').strip()
- if cleanElem:
- result.append(f"{cleanElem},")
- return "\n".join(result)
-
- return ""
-
-
-def _findCutArrayElement(jsonContent: str, breakPosition: int, arrayStart: int) -> Optional[str]:
- """Find the array element that contains the cut piece."""
- # Look backwards from break position to find the start of the current array element
- braceDepth = 0
- bracketDepth = 0
- inString = False
- escapeNext = False
- elementStart = -1
-
- # Search backwards from break position
- for i in range(breakPosition - 1, arrayStart - 1, -1):
- if i < 0:
- break
-
- char = jsonContent[i]
-
- if escapeNext:
- escapeNext = False
- continue
-
- if char == '\\':
- escapeNext = True
- continue
-
- if char == '"':
- inString = not inString
- continue
-
- if not inString:
- if char == '}':
- braceDepth += 1
- elif char == '{':
- braceDepth -= 1
- if braceDepth == 0 and bracketDepth == 0:
- elementStart = i
- break
- elif char == ']':
- bracketDepth += 1
- elif char == '[':
- bracketDepth -= 1
- if braceDepth == 0 and bracketDepth == 0:
- elementStart = i
- break
- elif char == ',' and braceDepth == 0 and bracketDepth == 0:
- # Found element boundary
- elementStart = i + 1
- break
-
- if elementStart >= 0:
- # Extract the element (including incomplete part)
- elementContent = jsonContent[elementStart:breakPosition + 100].strip()
- # Clean up - remove leading comma if present
- if elementContent.startswith(','):
- elementContent = elementContent[1:].strip()
- return elementContent[:300] # Limit length
-
- return None
-
-
-def _extractLastCompleteArrayElements(arrayContent: str, maxChars: int = 1000) -> str:
- """Extract last complete array elements, up to maxChars."""
- # Count complete elements from the end
- elements = []
- currentElement = ""
- braceDepth = 0
- bracketDepth = 0
- inString = False
- escapeNext = False
- totalChars = 0
-
- # Parse backwards to find complete elements
- for i in range(len(arrayContent) - 1, -1, -1):
- char = arrayContent[i]
-
- if escapeNext:
- escapeNext = False
- currentElement = char + currentElement
- continue
-
- if char == '\\':
- escapeNext = True
- currentElement = char + currentElement
- continue
-
- if char == '"':
- inString = not inString
- currentElement = char + currentElement
- continue
-
- if not inString:
- if char == '}':
- braceDepth += 1
- currentElement = char + currentElement
- elif char == '{':
- braceDepth -= 1
- currentElement = char + currentElement
- if braceDepth == 0 and bracketDepth == 0:
- # Found complete element
- element = currentElement.strip()
- if element and element[0] in ['{', '[']:
- elements.insert(0, element)
- totalChars += len(element)
- if totalChars >= maxChars:
- break
- currentElement = ""
- elif char == ']':
- bracketDepth += 1
- currentElement = char + currentElement
- elif char == '[':
- bracketDepth -= 1
- currentElement = char + currentElement
- if braceDepth == 0 and bracketDepth == 0:
- # Found complete element
- element = currentElement.strip()
- if element and element[0] == '[':
- elements.insert(0, element)
- totalChars += len(element)
- if totalChars >= maxChars:
- break
- currentElement = ""
- elif char == ',' and braceDepth == 0 and bracketDepth == 0:
- # Element boundary
- if currentElement.strip():
- element = currentElement.strip()
- if element and element[0] in ['{', '[', '"']:
- elements.insert(0, element)
- totalChars += len(element)
- if totalChars >= maxChars:
- break
- currentElement = ""
- else:
- currentElement = char + currentElement
-
- if elements:
- indent = " "
- formatted = ",\n".join([f"{indent}{elem}" for elem in elements[-5:]]) # Show last 5 elements
- if len(elements) > 5:
- formatted = f"... ({len(elements) - 5} more elements) ...\n{formatted}"
- return formatted
-
- return ""
-
-
-def _extractStructureContext(jsonContent: str, incompletePart: str, lastCompletePart: str = "") -> str:
- """
- Extract structure context showing WHERE in the structure the last complete and incomplete elements are.
-
- Returns a clear description of the structure context for the broken element.
- """
- import json
- import re
-
- if not incompletePart:
- # No incomplete part extracted - try to show context from raw JSON
- try:
- # Show last part of JSON to indicate where it broke
- lastPart = jsonContent[-300:] if len(jsonContent) > 300 else jsonContent
- return f"Structure context unavailable. Last part of response:\n{lastPart}"
- except Exception:
- return "Structure context unavailable - response was completely broken"
-
- # Find where incomplete part starts
- incompleteStart = jsonContent.find(incompletePart)
- if incompleteStart == -1:
- incompleteStart = len(jsonContent)
-
- # Try to extract the structure context showing the broken element
- try:
- # Get the part before incomplete to understand structure
- beforeIncomplete = jsonContent[:incompleteStart]
-
- # Try to find the array/object context where the break occurred
- # Look for the last complete structure before the break
- structureContext = ""
-
- # Try to parse what we have before the incomplete part
- try:
- closed = closeJsonStructures(beforeIncomplete)
- parsed = json.loads(closed)
-
- # Build structure showing where we are
- if isinstance(parsed, dict) and "elements" in parsed:
- elements = parsed.get("elements", [])
- if isinstance(elements, list):
- structureContext = f"Structure: elements array with {len(elements)} complete elements\n"
- structureContext += f"Break occurred in element at index {len(elements)}"
- else:
- structureContext = "Structure: elements (not an array)"
- else:
- structureContext = "Structure: " + json.dumps(_buildStructureContext(parsed), indent=2, ensure_ascii=False)
- except Exception:
- # Can't parse - show raw context
- structureContext = f"Structure parsing failed. Context before break:\n{beforeIncomplete[-200:]}"
-
- return structureContext
-
- except Exception:
- # Fallback: show minimal context
- return f"Structure context unavailable. Break occurred at position {incompleteStart} in JSON string"
-
-
-def _findElementPath(parsed: Any, elementStr: str, originalJson: str, isIncomplete: bool = False) -> str:
- """
- Find the path to an element in the parsed JSON structure.
-
- Returns a path like "elements[2]" or "documents[0].chapters[1].sections[3]"
- """
- import json
-
- if not elementStr or not elementStr.strip():
- return ""
-
- # Strategy: Find position in original JSON string, then determine path from structure
- elementStart = originalJson.find(elementStr.strip())
- if elementStart == -1:
- return ""
-
- # Find the array context by looking backwards from element position
- beforeElement = originalJson[:elementStart]
-
- # Find the nearest array declaration before this position
- # Look for patterns like "elements": [ or "chapters": [
- arrayPattern = r'"(\w+)"\s*:\s*\['
- matches = list(re.finditer(arrayPattern, beforeElement))
- if not matches:
- return ""
-
- # Get the most recent array (closest to element)
- lastMatch = matches[-1]
- arrayName = lastMatch.group(1)
- arrayStartPos = lastMatch.end()
-
- # Count complete array elements before this position
- arrayContent = beforeElement[arrayStartPos:]
-
- # Count complete objects (balanced braces) - each complete object is an array element
- braceCount = 0
- elementIndex = 0
- inString = False
- escapeNext = False
- lastCompleteObjectEnd = -1
-
- for i, char in enumerate(arrayContent):
- if escapeNext:
- escapeNext = False
- continue
- if char == '\\':
- escapeNext = True
- continue
- if char == '"':
- inString = not inString
- continue
- if not inString:
- if char == '{':
- if braceCount == 0:
- # Start of new object
- elementIndex += 1
- braceCount += 1
- elif char == '}':
- braceCount -= 1
- if braceCount == 0:
- # End of complete object
- lastCompleteObjectEnd = i
-
- # Determine the index
- # If we're looking for incomplete element, it's at the current elementIndex
- # If we're looking for last complete element, it's at elementIndex - 1
- if isIncomplete:
- index = elementIndex
- else:
- index = elementIndex - 1 if elementIndex > 0 else 0
-
- # Build the full path by traversing the parsed structure
- def _buildPathToArray(obj: Any, targetArrayName: str, targetIndex: int, currentPath: str = "") -> Optional[str]:
- """Recursively find path to array element."""
- if isinstance(obj, dict):
- for key, value in obj.items():
- newPath = f"{currentPath}.{key}" if currentPath else key
- if key == targetArrayName and isinstance(value, list):
- # Found the target array
- if 0 <= targetIndex < len(value):
- return f"{newPath}[{targetIndex}]"
- elif targetIndex >= len(value):
- # Index beyond array - return array path with index
- return f"{newPath}[{targetIndex}]"
- result = _buildPathToArray(value, targetArrayName, targetIndex, newPath)
- if result:
- return result
- elif isinstance(obj, list):
- for i, item in enumerate(obj):
- result = _buildPathToArray(item, targetArrayName, targetIndex, currentPath)
- if result:
- return result
- return None
-
- # Try to find full path in parsed structure
- fullPath = _buildPathToArray(parsed, arrayName, index)
- if fullPath:
- return fullPath
-
- # Fallback: return simple array path
- return f"{arrayName}[{index}]"
-
-
-def _buildStructureContext(obj: Any, maxDepth: int = 5) -> Any:
- """
- Build structure context (metadata only, no content).
- Similar to _buildStructureTemplate but focuses on parent structure.
- """
- if isinstance(obj, dict):
- structure = {}
- for key, value in obj.items():
- if isinstance(value, (dict, list)):
- structure[key] = _buildStructureContext(value, maxDepth - 1) if maxDepth > 0 else []
- else:
- # Skip content values - only keep structure
- pass
- return structure
- elif isinstance(obj, list) and obj:
- # Return empty list structure (no content)
- return []
- else:
- return None
-
-
-def _findIncompleteSectionInRaw(raw_json: str) -> Optional[Dict[str, Any]]:
- """
- Find the incomplete section in raw JSON.
-
- CRITICAL: JSON can be cut off mid-element (e.g., {"text": "20327,20)
- We need to find the last section and check if it's incomplete.
- """
- try:
- # Try to parse documents structure
- if '"documents"' in raw_json:
- # Find last document
- doc_start = raw_json.rfind('"documents"')
- if doc_start >= 0:
- doc_section = raw_json[doc_start:]
- # Try to find sections array
- sections_start = doc_section.find('"sections"')
- if sections_start >= 0:
- sections_section = doc_section[sections_start:]
- # Find sections array start
- array_start = sections_section.find('[')
- if array_start >= 0:
- # Find all complete sections
- section_objects = []
- depth = 0
- section_start = None
-
- for i in range(array_start, len(sections_section)):
- if sections_section[i] == '{':
- if depth == 0:
- section_start = i
- depth += 1
- elif sections_section[i] == '}':
- depth -= 1
- if depth == 0 and section_start is not None:
- # Found complete section
- section_str = sections_section[section_start:i+1]
- try:
- section_obj = json.loads('{' + section_str + '}')
- section_objects.append(section_obj)
- except:
- pass
- section_start = None
-
- # CRITICAL: Check if there's content after the last complete section
- # If JSON ends mid-element, the last section is incomplete
- if section_objects:
- # Find position after last complete section
- last_section_end = sections_section.rfind('}')
- if last_section_end >= 0:
- # Check if there's more content after the last }
- remaining_after_last_section = sections_section[last_section_end+1:].strip()
- # Remove closing brackets/braces that might be there
- remaining_after_last_section = remaining_after_last_section.lstrip('],}')
-
- # If there's still content (like incomplete element), section is incomplete
- if remaining_after_last_section and not remaining_after_last_section.startswith(']'):
- # Last section is incomplete - return it
- return section_objects[-1]
-
- # Also check: if we can't parse the full sections array, last section is incomplete
- try:
- # Try to parse the sections array
- sections_array_str = sections_section[array_start:]
- json.loads(sections_array_str)
- # Parsed successfully - all sections complete
- return None
- except:
- # Cannot parse - last section is incomplete
- return section_objects[-1] if section_objects else None
- except Exception as e:
- logger.debug(f"Error finding incomplete section: {e}")
-
- return None
-
-
-def _extractCutOffElements(incomplete_section: Dict[str, Any], raw_json: str) -> Tuple[Optional[str], Optional[str]]:
- """Extract cut-off element and element before from incomplete section."""
- cut_off_element = None
- element_before_cutoff = None
-
- elements = incomplete_section.get("elements", [])
- if not elements:
- return None, None
-
- # CRITICAL: In 99% of cases, JSON is cut off mid-string or mid-number
- # Deliver the cut-off part AS-IS (don't try to "complete" it)
-
- if isinstance(elements, list):
- # Find last element (might be incomplete)
- if elements:
- # Edge case: If cut-off is in first element, just show cut-off element
- if len(elements) == 1:
- # Only one element - might be cut-off
- last_elem = elements[0]
- if isinstance(last_elem, dict):
- # Check if element contains nested content (e.g., code_block with JSON string)
- cut_off_element = _extractCutOffFromElement(last_elem, raw_json)
- if not cut_off_element:
- cut_off_element = json.dumps(last_elem)
- else:
- cut_off_element = str(last_elem)
- else:
- # Multiple elements - last one might be cut-off, get element before
- element_before_cutoff = json.dumps(elements[-2]) if isinstance(elements[-2], dict) else str(elements[-2])
- last_elem = elements[-1]
- if isinstance(last_elem, dict):
- # Check if element contains nested content
- cut_off_element = _extractCutOffFromElement(last_elem, raw_json)
- if not cut_off_element:
- cut_off_element = json.dumps(last_elem)
- else:
- cut_off_element = str(last_elem)
- elif isinstance(elements, dict):
- # Single element - might be cut-off
- cut_off_element = _extractCutOffFromElement(elements, raw_json)
- if not cut_off_element:
- cut_off_element = json.dumps(elements)
-
- # If we couldn't extract from parsed structure, extract from raw JSON
- if not cut_off_element:
- # Extract the last incomplete part from raw JSON
- # Find the last incomplete string/number/array
- # re is already imported at module level
- # Look for incomplete string at the end
- incomplete_match = re.search(r'"([^"]*?)(?:"|$)', raw_json[-500:], re.DOTALL)
- if incomplete_match:
- cut_off_element = incomplete_match.group(1)
- else:
- # Look for incomplete number
- number_match = re.search(r'(\d+\.?\d*)(?:\s*[,}\]]|$)', raw_json[-200:])
- if number_match:
- cut_off_element = number_match.group(1)
-
- return cut_off_element, element_before_cutoff
-
-
-def _extractCutOffFromElement(element: Dict[str, Any], raw_json: str) -> Optional[str]:
- """
- Extract cut-off point from within an element (e.g., code_block with JSON string, table with incomplete rows).
-
- This helps identify where exactly to continue within nested structures.
- """
- # re is already imported at module level
-
- # Check for code_block with nested JSON
- if "code" in element:
- code_content = element.get("code", "")
- if isinstance(code_content, str) and code_content.strip().startswith("{"):
- # This is JSON inside a code string - find where it was cut off
- # Look for the last complete value in the raw JSON
- # Find the code string in raw JSON
- code_match = re.search(r'"code"\s*:\s*"([^"]*?)(?:"|$)', raw_json[-2000:], re.DOTALL)
- if code_match:
- code_str = code_match.group(1)
- # Try to find the last complete value in the JSON string
- # Look for patterns like: [2, 3, 5, ... 17929, (cut off here)
- array_match = re.search(r'\[([^\]]*?)(?:\]|$)', code_str, re.DOTALL)
- if array_match:
- array_content = array_match.group(1)
- # Find last complete number/item
- # Match: number followed by comma or end
- last_complete = re.findall(r'(\d+)\s*[,]', array_content)
- if last_complete:
- last_num = last_complete[-1]
- # Return context showing where to continue
- return f'{{"code": "{{\\"primes\\": [... up to {last_num}, ]"}}'
-
- # Check for table with incomplete rows
- if "rows" in element:
- rows = element.get("rows", [])
- if isinstance(rows, list) and rows:
- # Find last complete row in raw JSON
- rows_str = str(rows)
- # Try to find where rows were cut off
- last_row_match = re.search(r'\[([^\]]*?)(?:\]|$)', raw_json[-1000:], re.DOTALL)
- if last_row_match:
- return f'{{"rows": [... last complete row shown above, ]}}'
-
- # Check for list items
- if "items" in element:
- items = element.get("items", [])
- if isinstance(items, list) and items:
- # Find last complete item
- last_item_match = re.search(r'"([^"]*?)"\s*(?:,|\])', raw_json[-1000:], re.DOTALL)
- if last_item_match:
- return f'{{"items": [... last item shown above, ]}}'
-
- return None
-
-
-def _extractCutOffElementsFromRaw(raw_json: str, allSections: List[Dict[str, Any]]) -> Tuple[Optional[str], Optional[str]]:
- """
- Extract cut-off element directly from raw JSON when section parsing fails.
-
- This handles ALL cases where JSON is cut off:
- - Mid-element (incomplete element object)
- - Mid-string/number within an element
- - Mid-array within an element (e.g., rows in table, items in list)
- - Mid-nested structure
-
- CRITICAL: In 99% of cases, JSON is cut off mid-string or mid-number - deliver as-is.
- """
- cut_off_element = None
- element_before_cutoff = None
-
- try:
- # Find the last "elements" array in raw JSON
- if '"elements"' in raw_json:
- # Find the last occurrence of "elements"
- last_elements_pos = raw_json.rfind('"elements"')
- if last_elements_pos >= 0:
- elements_section = raw_json[last_elements_pos:]
-
- # Find the array start '['
- array_start = elements_section.find('[')
- if array_start >= 0:
- # Use a simpler approach: find all element objects by tracking braces
- # This works even if elements contain nested arrays/objects
- element_strings = []
- depth = 0
- in_string = False
- escape_next = False
- elem_start = None
-
- for i in range(array_start, len(elements_section)):
- char = elements_section[i]
-
- # Track string state (ignore brackets/braces inside strings)
- if escape_next:
- escape_next = False
- continue
- if char == '\\':
- escape_next = True
- continue
- if char == '"' and not escape_next:
- in_string = not in_string
- continue
-
- if not in_string:
- if char == '{':
- if depth == 0:
- elem_start = i
- depth += 1
- elif char == '}':
- depth -= 1
- if depth == 0 and elem_start is not None:
- # Found complete element (all braces closed, even if nested arrays are incomplete)
- elem_str = elements_section[elem_start:i+1]
- element_strings.append(elem_str)
- elem_start = None
-
- # Now analyze what we found
- if element_strings:
- last_elem = element_strings[-1]
- last_complete_pos = elements_section.rfind('}')
-
- # Check if there's content after the last complete element
- if last_complete_pos >= 0:
- remaining = elements_section[last_complete_pos+1:].strip()
- remaining_clean = remaining.lstrip(',').strip().lstrip(']').strip()
-
- # Case 1: Incomplete element after last complete one
- if remaining_clean and not remaining_clean.startswith(']'):
- incomplete_start = last_complete_pos + 1
- while incomplete_start < len(elements_section) and elements_section[incomplete_start] in ' \n\t\r,':
- incomplete_start += 1
-
- if incomplete_start < len(elements_section):
- incomplete_elem_str = elements_section[incomplete_start:].strip()
- incomplete_elem_str = incomplete_elem_str.rstrip(']').rstrip('}').rstrip()
- cut_off_element = incomplete_elem_str
- element_before_cutoff = element_strings[-1]
-
- # Case 2: Last element itself is incomplete (cut off in nested structure like rows, items, etc.)
- else:
- # Check if JSON is incomplete by analyzing structure
- # Count unclosed brackets/braces in elements section (ignoring strings)
- elements_section_braces = 0
- elements_section_brackets = 0
- in_str = False
- esc = False
-
- for char in elements_section:
- if esc:
- esc = False
- continue
- if char == '\\':
- esc = True
- continue
- if char == '"':
- in_str = not in_str
- continue
- if not in_str:
- if char == '{':
- elements_section_braces += 1
- elif char == '}':
- elements_section_braces -= 1
- elif char == '[':
- elements_section_brackets += 1
- elif char == ']':
- elements_section_brackets -= 1
-
- # Also check raw JSON for unclosed structures
- raw_braces = 0
- raw_brackets = 0
- in_str = False
- esc = False
-
- for char in raw_json:
- if esc:
- esc = False
- continue
- if char == '\\':
- esc = True
- continue
- if char == '"':
- in_str = not in_str
- continue
- if not in_str:
- if char == '{':
- raw_braces += 1
- elif char == '}':
- raw_braces -= 1
- elif char == '[':
- raw_brackets += 1
- elif char == ']':
- raw_brackets -= 1
-
- # Check if last element can be parsed
- last_elem_parsable = False
- try:
- json.loads(last_elem)
- last_elem_parsable = True
- except:
- pass
-
- # Determine if last element is incomplete
- is_incomplete = False
-
- # If there are unclosed structures, element is incomplete
- if elements_section_brackets > 0 or elements_section_braces > 0 or raw_brackets > 0 or raw_braces > 0:
- is_incomplete = True
-
- # If element cannot be parsed, it's incomplete
- elif not last_elem_parsable:
- is_incomplete = True
-
- # Check if JSON ends mid-element by finding where element ends in raw JSON
- elif last_elem_parsable:
- # Find where this element ends in the raw JSON
- elem_end_marker = last_elem[-100:] if len(last_elem) > 100 else last_elem
- elem_end_in_raw = raw_json.rfind(elem_end_marker)
-
- if elem_end_in_raw >= 0:
- actual_elem_end = elem_end_in_raw + len(last_elem)
-
- if actual_elem_end < len(raw_json):
- remaining_after_elem = raw_json[actual_elem_end:].strip()
- remaining_clean = remaining_after_elem.lstrip(',').strip()
-
- # If there's unexpected content, element is incomplete
- if remaining_clean and not remaining_clean.startswith(']'):
- is_incomplete = True
-
- if is_incomplete:
- cut_off_element = last_elem
- if len(element_strings) >= 2:
- element_before_cutoff = element_strings[-2]
- elif len(element_strings) == 1:
- element_before_cutoff = last_elem
-
- # Case 3: No complete elements found, but there's an incomplete one
- elif elem_start is not None:
- # There's an incomplete element that hasn't been closed
- incomplete_elem_str = elements_section[elem_start:].strip()
- cut_off_element = incomplete_elem_str
- # No element before (this is the first/only element)
- element_before_cutoff = None
- except Exception as e:
- logger.debug(f"Error extracting cut-off elements from raw JSON: {e}")
-
- return cut_off_element, element_before_cutoff
-
-
def parseJsonWithModel(jsonString: str, modelClass: Type[T]) -> T:
"""
Parse JSON string using Pydantic model with error handling.
diff --git a/tests/functional/test12_json_split_merge.py b/tests/functional/test12_json_split_merge.py
new file mode 100644
index 00000000..b36b93f2
--- /dev/null
+++ b/tests/functional/test12_json_split_merge.py
@@ -0,0 +1,694 @@
+#!/usr/bin/env python3
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+JSON Split and Merge Test 12 - Tests JSON splitting and merging using workflow tools
+Tests random splitting of JSON files into 3 parts and merging them back using ModularJsonMerger.
+"""
+
+import asyncio
+import json
+import sys
+import os
+import time
+import random
+from typing import Dict, Any, List, Optional, Tuple
+
+# Add the gateway to path (go up 2 levels from tests/functional/)
+_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+if _gateway_path not in sys.path:
+ sys.path.insert(0, _gateway_path)
+
+# Import JSON merger from workflow tools
+from modules.services.serviceAi.subJsonMerger import ModularJsonMerger, JsonMergeLogger
+from modules.shared.jsonContinuation import getContexts
+
+
+class JsonSplitMergeTester12:
+ def __init__(self):
+ self.testResults = {}
+ self.testJsonFiles = []
+ self.logBuffer = []
+ self.logFile = None
+
+ def createTestJsonFiles(self) -> List[Dict[str, Any]]:
+ """Create various test JSON files with different structures."""
+ testFiles = [
+ {
+ "name": "config.json",
+ "data": {
+ "application": "Customer Manager",
+ "version": "1.0.0",
+ "database": {
+ "host": "localhost",
+ "port": 5432,
+ "name": "customers_db"
+ },
+ "api": {
+ "baseUrl": "https://api.example.com",
+ "timeout": 30
+ }
+ }
+ },
+ {
+ "name": "customers.json",
+ "data": {
+ "customers": [
+ {"id": 1, "name": "John Doe", "email": "john@example.com", "phone": "+1234567890", "address": "123 Main St"},
+ {"id": 2, "name": "Jane Smith", "email": "jane@example.com", "phone": "+0987654321", "address": "456 Oak Ave"},
+ {"id": 3, "name": "Bob Johnson", "email": "bob@example.com", "phone": "+1122334455", "address": "789 Pine Rd"},
+ {"id": 4, "name": "Alice Williams", "email": "alice@example.com", "phone": "+5566778899", "address": "321 Elm St"},
+ {"id": 5, "name": "Charlie Brown", "email": "charlie@example.com", "phone": "+9988776655", "address": "654 Maple Dr"}
+ ]
+ }
+ },
+ {
+ "name": "settings.json",
+ "data": {
+ "theme": {
+ "darkMode": True,
+ "fontSize": 14,
+ "language": "en"
+ },
+ "notifications": {
+ "email": True,
+ "sms": False,
+ "push": True
+ },
+ "features": {
+ "enableAnalytics": True,
+ "enableReports": False
+ }
+ }
+ },
+ {
+ "name": "products.json",
+ "data": {
+ "products": [
+ {"id": "P001", "name": "Product A", "price": 29.99, "category": "Electronics", "inStock": True},
+ {"id": "P002", "name": "Product B", "price": 49.99, "category": "Clothing", "inStock": True},
+ {"id": "P003", "name": "Product C", "price": 19.99, "category": "Books", "inStock": False},
+ {"id": "P004", "name": "Product D", "price": 99.99, "category": "Electronics", "inStock": True},
+ {"id": "P005", "name": "Product E", "price": 14.99, "category": "Books", "inStock": True},
+ {"id": "P006", "name": "Product F", "price": 79.99, "category": "Clothing", "inStock": True}
+ ]
+ }
+ },
+ {
+ "name": "document_structure.json",
+ "data": {
+ "metadata": {
+ "title": "Test Document",
+ "author": "Test Author",
+ "date": "2025-01-05"
+ },
+ "documents": [
+ {
+ "id": "doc1",
+ "title": "Document 1",
+ "sections": [
+ {
+ "id": "sec1",
+ "content_type": "heading",
+ "elements": [
+ {"type": "heading", "content": {"text": "Introduction", "level": 1}}
+ ]
+ },
+ {
+ "id": "sec2",
+ "content_type": "paragraph",
+ "elements": [
+ {"type": "paragraph", "content": {"text": "This is a test paragraph."}}
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ },
+ {
+ "name": "large_customers.json",
+ "data": self._createLargeCustomersData()
+ },
+ {
+ "name": "large_products.json",
+ "data": self._createLargeProductsData()
+ },
+ {
+ "name": "large_documents.json",
+ "data": self._createLargeDocumentsData()
+ }
+ ]
+
+ return testFiles
+
+ def _createLargeCustomersData(self) -> Dict[str, Any]:
+ """Create a large customers dataset for budget testing."""
+ customers = []
+ # Create 100 customers with long descriptions
+ for i in range(100):
+ customers.append({
+ "id": i + 1,
+ "name": f"Customer {i + 1}",
+ "email": f"customer{i+1}@example.com",
+ "phone": f"+1{5550000000 + i}",
+ "address": f"{100 + i} Main Street, City {i % 10}, State {i % 5}, ZIP {10000 + i}",
+ "description": f"This is a detailed description for customer {i + 1}. " * 10 +
+ f"They have been a loyal customer since {2000 + (i % 25)}. " +
+ f"Their preferences include various products and services. " * 5,
+ "orders": [
+ {
+ "orderId": f"ORD-{i+1}-{j+1}",
+ "date": f"2024-{(j % 12) + 1:02d}-{(j % 28) + 1:02d}",
+ "total": round(100.0 + (i * 10) + (j * 5), 2),
+ "items": [
+ {
+ "productId": f"PROD-{k+1}",
+ "quantity": (k % 5) + 1,
+ "price": round(10.0 + k * 2, 2)
+ }
+ for k in range(3)
+ ]
+ }
+ for j in range(5)
+ ],
+ "metadata": {
+ "created": f"2020-{(i % 12) + 1:02d}-{(i % 28) + 1:02d}",
+ "lastLogin": f"2024-{(i % 12) + 1:02d}-{(i % 28) + 1:02d}",
+ "tags": [f"tag-{i % 10}", f"category-{i % 5}", f"segment-{i % 3}"]
+ }
+ })
+ return {"customers": customers}
+
+ def _createLargeProductsData(self) -> Dict[str, Any]:
+ """Create a large products dataset for budget testing."""
+ products = []
+ # Create 200 products with detailed information
+ categories = ["Electronics", "Clothing", "Books", "Home & Garden", "Sports", "Toys", "Automotive", "Health"]
+ for i in range(200):
+ category = categories[i % len(categories)]
+ products.append({
+ "id": f"PROD-{i+1:04d}",
+ "name": f"Product {i+1} - {category}",
+ "category": category,
+ "price": round(10.0 + (i * 2.5), 2),
+ "cost": round(5.0 + (i * 1.5), 2),
+ "inStock": i % 3 != 0,
+ "stockQuantity": (i % 100) * 10,
+ "description": f"This is a comprehensive product description for Product {i+1}. " * 15 +
+ f"It belongs to the {category} category and offers excellent value. " * 10 +
+ f"Features include: feature-1, feature-2, feature-3, and many more. " * 5,
+ "specifications": {
+ "weight": f"{1.0 + (i % 10)} kg",
+ "dimensions": f"{10 + (i % 20)}x{5 + (i % 15)}x{3 + (i % 10)} cm",
+ "color": ["red", "blue", "green", "black", "white"][i % 5],
+ "material": ["plastic", "metal", "wood", "fabric"][i % 4],
+ "warranty": f"{1 + (i % 5)} years"
+ },
+ "reviews": [
+ {
+ "userId": f"USER-{j+1}",
+ "rating": (j % 5) + 1,
+ "comment": f"Review {j+1} for product {i+1}: " + "This is a detailed review comment. " * 10,
+ "date": f"2024-{(j % 12) + 1:02d}-{(j % 28) + 1:02d}"
+ }
+ for j in range(3)
+ ],
+ "relatedProducts": [f"PROD-{k+1:04d}" for k in range(max(0, i-2), min(200, i+3)) if k != i]
+ })
+ return {"products": products}
+
+ def _createLargeDocumentsData(self) -> Dict[str, Any]:
+ """Create a large documents dataset for budget testing."""
+ documents = []
+ # Create 50 documents with nested structures
+ for i in range(50):
+ sections = []
+ for j in range(10):
+ elements = []
+ for k in range(5):
+ if k % 2 == 0:
+ elements.append({
+ "type": "heading",
+ "level": (k % 3) + 1,
+ "content": {
+ "text": f"Section {j+1} Heading {k+1} for Document {i+1}",
+ "style": "bold"
+ }
+ })
+ else:
+ elements.append({
+ "type": "paragraph",
+ "content": {
+ "text": f"This is paragraph {k+1} in section {j+1} of document {i+1}. " * 20 +
+ f"It contains detailed information about various topics. " * 15 +
+ f"The content is structured and well-organized. " * 10
+ }
+ })
+
+ sections.append({
+ "id": f"sec-{i+1}-{j+1}",
+ "title": f"Section {j+1}",
+ "content_type": "mixed",
+ "elements": elements,
+ "metadata": {
+ "created": f"2024-{(j % 12) + 1:02d}-{(j % 28) + 1:02d}",
+ "modified": f"2024-{(j % 12) + 1:02d}-{(j % 28) + 1:02d}",
+ "author": f"Author-{(i % 10) + 1}",
+ "tags": [f"tag-{j % 10}", f"category-{i % 5}"]
+ }
+ })
+
+ documents.append({
+ "id": f"doc-{i+1:03d}",
+ "title": f"Document {i+1} - Comprehensive Report",
+ "description": f"This is a comprehensive document with detailed information. " * 30 +
+ f"It covers multiple topics and sections. " * 20 +
+ f"The content is extensive and well-structured. " * 15,
+ "sections": sections,
+ "metadata": {
+ "created": f"2024-{(i % 12) + 1:02d}-{(i % 28) + 1:02d}",
+ "modified": f"2024-{(i % 12) + 1:02d}-{(i % 28) + 1:02d}",
+ "author": f"Author-{(i % 10) + 1}",
+ "version": f"1.{(i % 10)}",
+ "status": ["draft", "review", "published"][i % 3],
+ "tags": [f"tag-{i % 20}" for _ in range(5)]
+ }
+ })
+
+ return {
+ "metadata": {
+ "title": "Large Document Collection",
+ "description": "A collection of 50 comprehensive documents for testing budget limits",
+ "totalDocuments": 50,
+ "created": "2024-01-01"
+ },
+ "documents": documents
+ }
+
+ def splitJsonRandomly(self, jsonString: str, numParts: int = 3) -> List[str]:
+ """
+ Split JSON string randomly into specified number of parts.
+ Simulates real AI response cuts - can split anywhere, even in the middle of strings/numbers/structures.
+ This is the REAL scenario: AI response gets cut off randomly, not at convenient points.
+ """
+ if numParts < 2:
+ return [jsonString]
+
+ jsonLength = len(jsonString)
+
+ # Generate truly random split points - can be anywhere!
+ # Only ensure minimum part size to avoid empty parts
+ minPartSize = max(10, jsonLength // (numParts * 3)) # Smaller minimum to allow more randomness
+
+ splitPoints = []
+ for _ in range(numParts - 1):
+ # Generate random point - can be anywhere in the string
+ # Only ensure we don't create parts smaller than minimum
+ minPoint = len(splitPoints) * minPartSize if splitPoints else minPartSize
+ maxPoint = jsonLength - (numParts - len(splitPoints) - 1) * minPartSize
+
+ if maxPoint <= minPoint:
+ # If we can't avoid minimum size, just use the boundary
+ splitPoint = minPoint
+ else:
+ # Truly random point - can be in the middle of anything!
+ splitPoint = random.randint(minPoint, maxPoint)
+
+ splitPoints.append(splitPoint)
+
+ splitPoints.sort()
+
+ # Create parts - these can be cut anywhere, even mid-string, mid-number, etc.
+ parts = []
+ start = 0
+ for splitPoint in splitPoints:
+ parts.append(jsonString[start:splitPoint])
+ start = splitPoint
+ parts.append(jsonString[start:]) # Last part
+
+ return parts
+
+ def _log(self, message: str):
+ """Add message to log buffer."""
+ self.logBuffer.append(message)
+ print(message)
+
+
+
+ def normalizeJson(self, jsonString: str) -> Optional[Dict[str, Any]]:
+ """Normalize JSON string by parsing and re-serializing. Returns None if parsing fails."""
+ try:
+ parsed = json.loads(jsonString)
+ return parsed
+ except json.JSONDecodeError:
+ # Try to close incomplete JSON structures
+ try:
+ from modules.shared.jsonUtils import closeJsonStructures, tryParseJson
+ closed = closeJsonStructures(jsonString)
+ parsed, error, _ = tryParseJson(closed)
+ if error is None and parsed is not None:
+ return parsed
+ except Exception:
+ pass
+ # Return None if all parsing attempts fail
+ return None
+
+ def compareJson(self, original: Dict[str, Any], merged: Dict[str, Any]) -> Dict[str, Any]:
+ """Compare original and merged JSON structures."""
+ originalStr = json.dumps(original, sort_keys=True, indent=2)
+ mergedStr = json.dumps(merged, sort_keys=True, indent=2)
+
+ exactMatch = originalStr == mergedStr
+
+ # Deep comparison
+ differences = []
+ self._findDifferences(original, merged, "", differences)
+
+ return {
+ "exactMatch": exactMatch,
+ "differences": differences,
+ "originalSize": len(originalStr),
+ "mergedSize": len(mergedStr),
+ "sizeMatch": len(originalStr) == len(mergedStr)
+ }
+
+ def _findDifferences(self, obj1: Any, obj2: Any, path: str, differences: List[str]):
+ """Recursively find differences between two JSON objects."""
+ if type(obj1) != type(obj2):
+ differences.append(f"{path}: Type mismatch - {type(obj1).__name__} vs {type(obj2).__name__}")
+ return
+
+ if isinstance(obj1, dict):
+ allKeys = set(obj1.keys()) | set(obj2.keys())
+ for key in allKeys:
+ newPath = f"{path}.{key}" if path else key
+ if key not in obj1:
+ differences.append(f"{newPath}: Missing in original")
+ elif key not in obj2:
+ differences.append(f"{newPath}: Missing in merged")
+ else:
+ self._findDifferences(obj1[key], obj2[key], newPath, differences)
+ elif isinstance(obj1, list):
+ if len(obj1) != len(obj2):
+ differences.append(f"{path}: Length mismatch - {len(obj1)} vs {len(obj2)}")
+ else:
+ for i, (item1, item2) in enumerate(zip(obj1, obj2)):
+ newPath = f"{path}[{i}]"
+ self._findDifferences(item1, item2, newPath, differences)
+ else:
+ if obj1 != obj2:
+ differences.append(f"{path}: Value mismatch - {obj1} vs {obj2}")
+
+ async def testJsonSplitMerge(self, jsonFile: Dict[str, Any]) -> Dict[str, Any]:
+ """Test splitting and merging a single JSON file."""
+ fileName = jsonFile["name"]
+ originalData = jsonFile["data"]
+
+ self._log("")
+ self._log("="*80)
+ self._log(f"TESTING JSON SPLIT AND MERGE: {fileName}")
+ self._log("="*80)
+
+ # Convert to JSON string
+ originalJsonString = json.dumps(originalData, indent=2, ensure_ascii=False)
+ originalSize = len(originalJsonString)
+
+ # Log original JSON
+ self._log("")
+ self._log("="*80)
+ self._log("ORIGINAL JSON")
+ self._log("="*80)
+ self._log(f"JSON length: {originalSize} characters")
+ self._log("")
+ self._log("Full JSON content:")
+ self._log("-"*80)
+ jsonLines = originalJsonString.split('\n')
+ if len(jsonLines) > 50:
+ for line in jsonLines[:25]:
+ self._log(line)
+ self._log(f"... ({len(jsonLines) - 50} lines omitted) ...")
+ for line in jsonLines[-25:]:
+ self._log(line)
+ else:
+ for line in jsonLines:
+ self._log(line)
+
+ # Split JSON at random position (simulating AI response cut)
+ self._log("")
+ self._log("="*80)
+ self._log("SPLITTING JSON AT RANDOM POSITION (SIMULATING AI RESPONSE CUT)")
+ self._log("="*80)
+
+ # Find random cut position (not at start or end)
+ import random
+ minCutPos = max(100, originalSize // 10) # At least 10% from start
+ maxCutPos = min(originalSize - 100, originalSize * 9 // 10) # At least 10% from end
+ cutPosition = random.randint(minCutPos, maxCutPos)
+
+ # Get part from start to cut
+ partContent = originalJsonString[:cutPosition]
+
+ self._log("")
+ self._log("="*80)
+ self._log("PART (from start to cut):")
+ self._log("="*80)
+ self._log(f"Cut position: {cutPosition} characters")
+ self._log(f"Part length: {len(partContent)} characters")
+ self._log("")
+ self._log("Part content:")
+ partLines = partContent.split('\n')
+ if len(partLines) > 30:
+ for line in partLines[:15]:
+ self._log(f" {line}")
+ self._log(f" ... ({len(partLines) - 30} lines omitted) ...")
+ for line in partLines[-15:]:
+ self._log(f" {line}")
+ else:
+ for line in partLines:
+ self._log(f" {line}")
+
+ # Generate contexts using getContexts()
+ self._log("")
+ self._log("="*80)
+ self._log("GENERATING CONTINUATION CONTEXTS")
+ self._log("="*80)
+
+ contexts = getContexts(partContent)
+
+ # Log overlap context
+ self._log("")
+ self._log("="*80)
+ self._log("OVERLAP CONTEXT (for merging):")
+ self._log("="*80)
+ overlapLines = contexts.overlapContext.split('\n')
+ if len(overlapLines) > 30:
+ for line in overlapLines[:15]:
+ self._log(f" {line}")
+ self._log(f" ... ({len(overlapLines) - 30} lines omitted) ...")
+ for line in overlapLines[-15:]:
+ self._log(f" {line}")
+ else:
+ for line in overlapLines:
+ self._log(f" {line}")
+
+ # Log hierarchy context
+ self._log("")
+ self._log("="*80)
+ self._log("HIERARCHY CONTEXT (with budget logic):")
+ self._log("="*80)
+ hierarchyLines = contexts.hierarchyContext.split('\n')
+ if len(hierarchyLines) > 30:
+ for line in hierarchyLines[:15]:
+ self._log(f" {line}")
+ self._log(f" ... ({len(hierarchyLines) - 30} lines omitted) ...")
+ for line in hierarchyLines[-15:]:
+ self._log(f" {line}")
+ else:
+ for line in hierarchyLines:
+ self._log(f" {line}")
+
+ # Test completePart as valid JSON
+ self._log("")
+ self._log("="*80)
+ self._log("COMPLETE PART (should be valid JSON):")
+ self._log("="*80)
+ completeLines = contexts.completePart.split('\n')
+ if len(completeLines) > 30:
+ for line in completeLines[:15]:
+ self._log(f" {line}")
+ self._log(f" ... ({len(completeLines) - 30} lines omitted) ...")
+ for line in completeLines[-15:]:
+ self._log(f" {line}")
+ else:
+ for line in completeLines:
+ self._log(f" {line}")
+
+ # Validate completePart as JSON
+ self._log("")
+ self._log("="*80)
+ self._log("VALIDATING COMPLETE PART AS JSON:")
+ self._log("="*80)
+
+ isValidJson = False
+ parsedCompletePart = None
+ jsonError = None
+
+ try:
+ parsedCompletePart = json.loads(contexts.completePart)
+ isValidJson = True
+ self._log(" ✅ completePart is valid JSON")
+ self._log(f" Parsed type: {type(parsedCompletePart).__name__}")
+
+ # Compare with original if possible
+ if isinstance(parsedCompletePart, dict) and isinstance(originalData, dict):
+ comparison = self.compareJson(originalData, parsedCompletePart)
+ self._log(f" Comparison with original:")
+ self._log(f" Exact match: {comparison['exactMatch']}")
+ self._log(f" Size match: {comparison['sizeMatch']}")
+ if comparison['differences']:
+ self._log(f" Differences found: {len(comparison['differences'])}")
+ for diff in comparison['differences'][:10]: # Show first 10 differences
+ self._log(f" - {diff}")
+ if len(comparison['differences']) > 10:
+ self._log(f" ... ({len(comparison['differences']) - 10} more differences)")
+ else:
+ self._log(" No differences found")
+ elif isinstance(parsedCompletePart, list) and isinstance(originalData, list):
+ self._log(f" Both are lists: original={len(originalData)} items, completePart={len(parsedCompletePart)} items")
+ else:
+ self._log(f" Different types: original={type(originalData).__name__}, completePart={type(parsedCompletePart).__name__}")
+
+ except json.JSONDecodeError as e:
+ isValidJson = False
+ jsonError = str(e)
+ self._log(f" ❌ completePart is NOT valid JSON")
+ self._log(f" Error: {jsonError}")
+ self._log(f" Error position: line {e.lineno}, column {e.colno}")
+
+ # Return test results
+ return {
+ "success": isValidJson,
+ "fileName": fileName,
+ "originalSize": originalSize,
+ "cutPosition": cutPosition,
+ "partSize": len(partContent),
+ "overlapContextSize": len(contexts.overlapContext),
+ "hierarchyContextSize": len(contexts.hierarchyContext),
+ "completePartSize": len(contexts.completePart),
+ "isValidJson": isValidJson,
+ "jsonError": jsonError,
+ "parsedCompletePart": parsedCompletePart is not None
+ }
+
+ async def testAllJsonFiles(self) -> Dict[str, Any]:
+ """Test splitting and merging all test JSON files."""
+ print("\n" + "="*80)
+ print("TESTING JSON SPLIT AND MERGE")
+ print("="*80)
+
+ testFiles = self.createTestJsonFiles()
+ results = {}
+
+ for jsonFile in testFiles:
+ try:
+ result = await self.testJsonSplitMerge(jsonFile)
+ results[jsonFile["name"]] = result
+
+ # Small delay between tests
+ await asyncio.sleep(0.5)
+
+ except Exception as e:
+ import traceback
+ print(f"\n❌ Error testing {jsonFile['name']}: {str(e)}")
+ print(traceback.format_exc())
+ results[jsonFile["name"]] = {
+ "success": False,
+ "error": str(e),
+ "traceback": traceback.format_exc()
+ }
+
+ return results
+
+ def _writeLogFile(self):
+ """Write log buffer to file."""
+ logDir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "local", "debug")
+ os.makedirs(logDir, exist_ok=True)
+ logFilePath = os.path.join(logDir, "test12_json_split_merge_results.txt")
+
+ with open(logFilePath, 'w', encoding='utf-8') as f:
+ f.write('\n'.join(self.logBuffer))
+
+ self.logFile = logFilePath
+ print(f"\n📝 Detailed log written to: {logFilePath}")
+
+ async def runTest(self):
+ """Run the complete test."""
+ self._log("="*80)
+ self._log("JSON SPLIT AND MERGE TEST 12")
+ self._log("="*80)
+
+ try:
+ # Test all JSON files
+ results = await self.testAllJsonFiles()
+
+ # Write log file
+ self._writeLogFile()
+
+ # Summary
+ print("\n" + "="*80)
+ print("TEST SUMMARY")
+ print("="*80)
+
+ successCount = 0
+
+ for fileName, result in results.items():
+ if result.get("success"):
+ successCount += 1
+ isValidJson = result.get("isValidJson", False)
+ if isValidJson:
+ print(f"✅ {fileName:30s}: Valid JSON - completePart parsed successfully")
+ else:
+ jsonError = result.get("jsonError", "Unknown error")
+ print(f"⚠️ {fileName:30s}: Contexts generated but completePart is not valid JSON - {jsonError}")
+ else:
+ error = result.get("error", "Unknown error")
+ print(f"❌ {fileName:30s}: FAILED - {error}")
+
+ print(f"\nResults: {successCount}/{len(results)} successful")
+
+ self.testResults = {
+ "success": successCount == len(results),
+ "totalFiles": len(results),
+ "successCount": successCount,
+ "results": results
+ }
+
+ return self.testResults
+
+ except Exception as e:
+ import traceback
+ print(f"\n❌ Test failed with error: {type(e).__name__}: {str(e)}")
+ print(f"Traceback:\n{traceback.format_exc()}")
+ self.testResults = {
+ "success": False,
+ "error": str(e),
+ "traceback": traceback.format_exc()
+ }
+ return self.testResults
+
+
+async def main():
+ """Run JSON split and merge test 12."""
+ tester = JsonSplitMergeTester12()
+ results = await tester.runTest()
+
+ # Print final results as JSON for easy parsing
+ print("\n" + "="*80)
+ print("FINAL RESULTS (JSON)")
+ print("="*80)
+ print(json.dumps(results, indent=2, default=str))
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/tests/test_overlap_context.py b/tests/test_overlap_context.py
deleted file mode 100644
index 1a8b9f7b..00000000
--- a/tests/test_overlap_context.py
+++ /dev/null
@@ -1,216 +0,0 @@
-# Copyright (c) 2025 Patrick Motsch
-# All rights reserved.
-"""
-Test function to verify structure hierarchy and overlap context generation.
-Tests the functions used to generate continuation prompts for incomplete JSON.
-"""
-
-import json
-import os
-from pathlib import Path
-
-
-def testOverlapContext():
- """
- Test function that loads two JSON parts and returns:
- 1. Structure hierarchy result
- 2. Overlap requirement context result
- """
- # Load the JSON file (incomplete/cut JSON)
- basePath = Path(__file__).parent.parent.parent / "local" / "debug" / "prompts"
-
- file1Path = basePath / "20260104-220716-032-chapter_2_section_section_2_response.txt"
-
- # Read JSON (incomplete)
- with open(file1Path, 'r', encoding='utf-8') as f:
- json1Content = f.read().strip()
-
- # Find the break position in json1 (where it was cut)
- # The last line in json1 is incomplete: [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039
- # We need to find where this incomplete array element ends (right after the last number)
- # Find the last number in the file - that's where the content actually ends
- import re
- # Find all numbers at the end and get the position of the last one
- # Look for the pattern: number followed by whitespace/newline or end of string
- matches = list(re.finditer(r'\d+', json1Content))
- if matches:
- lastMatch = matches[-1]
- # Break position is right after the last number (where the closing ] should be)
- breakPosition = lastMatch.end()
- else:
- # Fallback: use end of file
- breakPosition = len(json1Content.rstrip())
-
- print(f"Break position determined: {breakPosition}")
- print(f"Content at break position: '{json1Content[max(0, breakPosition-50):breakPosition+10]}'")
-
- # Import the functions we need to test
- import sys
- sys.path.insert(0, str(Path(__file__).parent.parent))
-
- from modules.shared.jsonUtils import findStructureHierarchy, extractCutPiece, buildIncompleteContext
- from modules.services.serviceGeneration.paths.codePath import CodeGenerationPath
-
- # Test 1: Find structure hierarchy
- print("=" * 80)
- print("TEST 1: Structure Hierarchy")
- print("=" * 80)
- print(f"Break position: {breakPosition}")
- print(f"JSON length: {len(json1Content)}")
- print(f"Content around break: '{json1Content[max(0, breakPosition-100):breakPosition+20]}'")
- hierarchy = findStructureHierarchy(json1Content, breakPosition)
- print(f"\nHierarchy levels found: {len(hierarchy) if hierarchy else 0}")
- if not hierarchy:
- print("WARNING: No hierarchy found! This suggests the function isn't working correctly.")
- else:
- print("\nHierarchy details (from root to cut level):")
- for i, level in enumerate(hierarchy):
- levelType = level['type']
- levelKey = level.get('key', 'N/A')
- levelDepth = level['depth']
- levelStart = level['start_pos']
- levelEnd = level['end_pos']
- print(f" Level {i}: {levelType:6s} depth={levelDepth} key='{levelKey}' start={levelStart} end={levelEnd}")
- # Show a snippet of content at this level
- if levelStart < len(json1Content):
- snippet = json1Content[levelStart:min(levelStart + 50, levelEnd, len(json1Content))]
- print(f" Content: {repr(snippet)}")
-
- # Test 2: Extract cut piece
- print("\n" + "=" * 80)
- print("TEST 2: Extract Cut Piece")
- print("=" * 80)
- cutPiece = extractCutPiece(json1Content, breakPosition)
- print(f"\nCut piece extracted (length: {len(cutPiece)}):")
- if cutPiece:
- print(cutPiece[:500] if len(cutPiece) > 500 else cutPiece)
- else:
- print("WARNING: Cut piece is empty! This suggests the function isn't working correctly.")
- # Try to manually find the cut piece
- # Look backwards from break position for the start of the incomplete array
- i = breakPosition - 1
- while i >= 0 and json1Content[i] not in ['[', ',', '\n']:
- i -= 1
- if i >= 0 and json1Content[i] == '[':
- manualCutPiece = json1Content[i:breakPosition]
- print(f"\nManually found cut piece: {manualCutPiece[:200]}")
-
- # Test 3: Build incomplete context (structure hierarchy with cut point)
- print("\n" + "=" * 80)
- print("TEST 3: Build Incomplete Context (Structure Hierarchy with Cut Point)")
- print("=" * 80)
- print("Expected: Should show complete hierarchy from root to cut point")
- print(" with complete elements before cut and cut piece marked")
- incompleteContext = buildIncompleteContext(json1Content, breakPosition)
- print(f"\nIncomplete context (length: {len(incompleteContext)} chars):")
- print("-" * 80)
- print(incompleteContext)
- print("-" * 80)
-
- # Validate the output
- if incompleteContext:
- # Check if it shows hierarchy (should have multiple levels of indentation)
- lines = incompleteContext.split('\n')
- indentLevels = set()
- for line in lines:
- if line.strip():
- indent = len(line) - len(line.lstrip())
- indentLevels.add(indent)
- print(f"\nValidation: Found {len(indentLevels)} different indent levels (should be > 1 for hierarchy)")
-
- # Check if cut point is marked
- if "<-- CUT POINT" in incompleteContext:
- print("Validation: Cut point marker found ✓")
- else:
- print("Validation: WARNING - Cut point marker NOT found!")
-
- # Check if root structure is shown
- if incompleteContext.strip().startswith('{') or incompleteContext.strip().startswith('['):
- print("Validation: Root structure opening found ✓")
- else:
- print("Validation: WARNING - Root structure opening NOT found!")
- else:
- print("WARNING: Incomplete context is empty!")
-
- # Test 4: Extract overlap context (cut part and full part before same level)
- print("\n" + "=" * 80)
- print("TEST 4: Extract Overlap Context (Cut Part + Full Part Before Same Level)")
- print("=" * 80)
- overlapContext = CodeGenerationPath._extractOverlapContext(json1Content, breakPosition)
- print(f"\nOverlap context:")
- print(overlapContext)
-
- # Return results as dictionary
- results = {
- "hierarchy": hierarchy,
- "cutPiece": cutPiece,
- "incompleteContext": incompleteContext,
- "overlapContext": overlapContext,
- "breakPosition": breakPosition,
- "json1Length": len(json1Content),
- "json1Content": json1Content
- }
-
- return results
-
-
-if __name__ == "__main__":
- print("Testing Overlap Context Generation")
- print("=" * 80)
- results = testOverlapContext()
-
- print("\n" + "=" * 80)
- print("SUMMARY")
- print("=" * 80)
- print(f"\nBreak position: {results['breakPosition']}")
- print(f"JSON1 length: {results['json1Length']}")
- print(f"Hierarchy levels: {len(results['hierarchy']) if results['hierarchy'] else 0}")
- print(f"Cut piece length: {len(results['cutPiece'])}")
- print(f"Incomplete context length: {len(results['incompleteContext'])}")
- print(f"Overlap context length: {len(results['overlapContext'])}")
-
- # Save results to file for inspection
- outputPath = Path(__file__).parent.parent.parent / "local" / "debug" / "test_overlap_results.txt"
- outputPath.parent.mkdir(parents=True, exist_ok=True)
-
- with open(outputPath, 'w', encoding='utf-8') as f:
- f.write("=" * 80 + "\n")
- f.write("OVERLAP CONTEXT TEST RESULTS\n")
- f.write("=" * 80 + "\n\n")
-
- f.write("FIRST JSON (CUT/INCOMPLETE):\n")
- f.write("-" * 80 + "\n")
- f.write(f"Break position: {results['breakPosition']}\n")
- f.write(f"JSON length: {results['json1Length']}\n")
- json1Content = results['json1Content']
- f.write(f"Content around break: '{json1Content[max(0, results['breakPosition']-100):results['breakPosition']+20]}'\n\n")
- f.write("Full JSON1 content:\n")
- f.write(json1Content)
-
- f.write("\n\n" + "=" * 80 + "\n")
- f.write("STRUCTURE HIERARCHY:\n")
- f.write("-" * 80 + "\n")
- if results['hierarchy']:
- f.write(f"Hierarchy levels found: {len(results['hierarchy'])}\n\n")
- f.write("Hierarchy details (from root to cut level):\n")
- for i, level in enumerate(results['hierarchy']):
- levelType = level['type']
- levelKey = level.get('key', 'N/A')
- levelDepth = level['depth']
- levelStart = level['start_pos']
- levelEnd = level['end_pos']
- f.write(f" Level {i}: {levelType:6s} depth={levelDepth} key='{levelKey}' start={levelStart} end={levelEnd}\n")
- else:
- f.write("No hierarchy found\n")
-
- f.write("\n\n" + "=" * 80 + "\n")
- f.write("INCOMPLETE CONTEXT (Structure Hierarchy with Cut Point):\n")
- f.write("-" * 80 + "\n")
- f.write(results['incompleteContext'])
-
- f.write("\n\n" + "=" * 80 + "\n")
- f.write("OVERLAP CONTEXT (Object containing the cut element):\n")
- f.write("-" * 80 + "\n")
- f.write(results['overlapContext'])
-
- print(f"\n\nFull results saved to: {outputPath}")